stati.pl

#!/usr/bin/perl
#updated 100304 to deal with hyperlinks in statuses leading to an extra "<description>" section
$in='/Users/surly/.laststati';
open (INFO,$in);
@lastcheck=<info>;
close(INFO);

#SIGH! Moronic Facebook feed switched -0400 to +0400 after Daylight Saving Time ended and suddenly the time stamps are off by nine hours. Stupid stupid stupid - The simplest solution would seem to be to change the local $lastcheck parameter by advancing it by nine hours (for now).
$lastcheck[0]=~s/\n//;
#print $lastcheck[0];
$lastcheck[0]+=32400;



#print "lastcheck[0] = $lastcheck[0]\n";
$out='/Users/surly/code/bayes/statuses.txt';
open (OUTP,">$out");


use Time::Local;
#my $time = timelocal(0,0,0,1,9,109);
#my $string = localtime $time;
#print "the big ball falls at $time => $string\n";
$checktime=time();
#print "checktime= $checktime \n";
`echo $checktime > /Users/surly/.laststati`;

$DBACL_PATH="/Users/surly/Dropbox/core/.dbacl";

@old=`curl --fail -s "http://www.facebook.com/feeds/friends_status.php?id=[some long number]&key=[some alphanumeric sequence]&format=rss20" -A "Mozilla/4.0"|egrep 'title|pubDate'|grep -v "s Friends"|perl -p -i -e "s/^\s+//"|perl -p -i -e "s/\&/\&/g"|perl -p -i -e "s/\>/>/g"|perl -p -i -e "s/\</</g"|perl -p -i -e "s/\<title\>//g"`;
#% dbacl -l twain -g ’^([[:alpha:]]+)’ -g ’[^[:alpha:]]([[:alpha:]]+)’ Mark_Twain.txt
#The category twain which is obtained depends only on single alphabetic words in the text file Mark_Twain.txt (and computed digram statistics for prediction). For a second example, the following command builds a smoothed Markovian (word bigram) model which depends on pairs of consecutive words within each line (but pairs cannot straddle a line break):
#all of this to deal with the twitter @whoever grammar AND to eliminate word pair matching since the corpus is so small.

$smush="@old";

#$smush=~s/\n//g;

#print $smush;

@lines = split(/<\/pubDate>/, $smush);

#@lines=@old;

##@list = split(/<item>/, $tumblr);
##$latest=$list[1];
##$latest=~s/^.*<description>//;$latest=~s/<\/description>//;
##$latest=~s/<link>.*<\/link>//;
##$latest=~s/<guid>.*<\/guid><pubdate>/ - /;
##$latest=~s/<\/pubDate><\/item>//;
##$latest=~s/ -0400.*//;
###$br=`perl /Users/surly/bin/tumstati`;
###$br=~s/ \(tumblr\)//;
###push(@lines,$br);

#Is there anything new at all? Could just compare
#    <lastbuilddate>Sun, 30 Aug 2009 09:42:19 -0400</lastBuildDate>
#to lastcheck

#09.08.30
#some weird error
#Day '' out of range 1..31 at /Users/surly/code/bayes/stati line 108
#has just cropped up because the program is trying to parse an n+1 th status
#when there are only n. I do not know why, but I am just going to limit it by
#stopping the loop when time<lastchecked

$time=99999999999;
#$lastcheck[0]=0;
#$i=0;
#$truth=(($time>$lastcheck[0])&&($lines[$i]));#($lines[$i])&
#print "$time > $lastcheck[0] = $truth $lines[$i]";
#&($time>$lastcheck[0])
$i=0;
while(($lines[$i])&&($time>$lastcheck[0]))
{	$lines[$i]=~s/\<description\>.*/[hyperlinked]/g;
$lines[$i]=~s/\s?\<\/title\>//g;
$lines[$i]=~s/\n//g;
$date=$lines[$i];
$date=~s/.*<pubdate>//;
$date=~s/-0400.*//;#Daylight Savings Time
$date=~s/\+0400.*//;
@part=split(/ /,$date);
$day=$part[1];$month=$part[2];$year=$part[3];$clocktime=$part[4];
@item=split(/:/,$clocktime);
$sec=$item[2];$min=$item[1];$hour=$item[0];
#	print $date;
#	print "day = $day\n";
#	print "month=$month\n";
#	print $clocktime;
#	print "item0 = $item[0]\n";
#	print "month=$month\n";
#	print "sec=$sec";
#	print "min=$min";
#	print "hour=$hour\n";
if ( $part[2] eq "Jan" ) { $m = 0 }
elsif ( $part[2] eq "Feb" ) { $m = 1 }
elsif ( $part[2] eq "Mar" ) { $m = 2 }
elsif ( $part[2] eq "Apr" ) { $m = 3 }
elsif ( $part[2] eq "May" ) { $m = 4 }
elsif ( $part[2] eq "Jun" ) { $m = 5 }
elsif ( $part[2] eq "Jul" ) { $m = 6 }
elsif ( $part[2] eq "Aug" ) { $m = 7 }
elsif ( $part[2] eq "Sep" ) { $m = 8 }
elsif ( $part[2] eq "Oct" ) { $m = 9 }
elsif ( $part[2] eq "Nov" ) { $m = 10 }
elsif ( $part[2] eq "Dec" ) { $m = 11 };
if ($day)
{	$time = timelocal($sec,$min,$hour,$day,$m,$year);}
print "$time: $lines[$i] (Internal testing)";
print "$time $lastcheck[0] (Internal testing)"; print"\n";

if ($time>$lastcheck[0])
{	$lines[$i]=~s/-0400$//;$lines[$i]=~s/\+0400$//;

$lines[$i]=~s/(\s)+\[hyperlinked\]/ [hyperlinked]/g;
$lines[$i]=~s/(\s)+$/)\n/;
$lines[$i]=~s/(\s)+<pubdate>/ (/;
if (($lines[$i]=~/'/)&($lines[$i]=~/"/))
{	$lines[$i]=~s/'/_/g;$lines[$i]=~s/ _s/_s/g;}
else
{	$lines[$i]=~s/'/\'/g;$lines[$i]=~s/ \'s/'s/g;}
#the really precise way to deal with this case would be to 
#separately print bits with double quotes using single quotes
#and vice versa. It might require some kind of loop though,
#first breaking the string into substrings where the quotes are.
#print "substring1";print 'substring2'; print "substring3"
$lines[$i]=~s/"/\"/g;
$lines[$i]=~s/^(\s)+/     /g;#kluge to get rid of extra spaces
#due to deleting people from @lines
$cat=`echo "$lines[$i]"|dbacl -v -c ok -c bad -c urgent`;

if ((($lines[$i]!~/boring person1/)&&($lines[$i]!~/boring person2/))&&(($lines[$i]!~/boring person3/)))
{	print OUTP $lines[$i];}
if ($cat=~/ok/)
{	print "$lines[$i]";}
elsif ($cat=~/bad/)
{	print "\033[0;37;48m$lines[$i]";
print "\033[0m";}
elsif ($cat=~/urgent/)
{	print "\033[0;34;48m$lines[$i]";
print "\033[0m";}
}
$i++;
}
close (OUTP);