#!/usr/bin/perl
#updated 100304 to deal with hyperlinks in statuses leading to an extra "<description>" section
$in='/Users/surly/.laststati';
open (INFO,$in);
@lastcheck=<info>;
close(INFO);
#SIGH! Moronic Facebook feed switched -0400 to +0400 after Daylight Saving Time ended and suddenly the time stamps are off by nine hours. Stupid stupid stupid - The simplest solution would seem to be to change the local $lastcheck parameter by advancing it by nine hours (for now).
$lastcheck[0]=~s/\n//;
#print $lastcheck[0];
$lastcheck[0]+=32400;
#print "lastcheck[0] = $lastcheck[0]\n";
$out='/Users/surly/code/bayes/statuses.txt';
open (OUTP,">$out");
use Time::Local;
#my $time = timelocal(0,0,0,1,9,109);
#my $string = localtime $time;
#print "the big ball falls at $time => $string\n";
$checktime=time();
#print "checktime= $checktime \n";
`echo $checktime > /Users/surly/.laststati`;
$DBACL_PATH="/Users/surly/Dropbox/core/.dbacl";
@old=`curl --fail -s "http://www.facebook.com/feeds/friends_status.php?id=[some long number]&key=[some alphanumeric sequence]&format=rss20" -A "Mozilla/4.0"|egrep 'title|pubDate'|grep -v "s Friends"|perl -p -i -e "s/^\s+//"|perl -p -i -e "s/\&/\&/g"|perl -p -i -e "s/\>/>/g"|perl -p -i -e "s/\</</g"|perl -p -i -e "s/\<title\>//g"`;
#% dbacl -l twain -g ’^([[:alpha:]]+)’ -g ’[^[:alpha:]]([[:alpha:]]+)’ Mark_Twain.txt
#The category twain which is obtained depends only on single alphabetic words in the text file Mark_Twain.txt (and computed digram statistics for prediction). For a second example, the following command builds a smoothed Markovian (word bigram) model which depends on pairs of consecutive words within each line (but pairs cannot straddle a line break):
#all of this to deal with the twitter @whoever grammar AND to eliminate word pair matching since the corpus is so small.
$smush="@old";
#$smush=~s/\n//g;
#print $smush;
@lines = split(/<\/pubDate>/, $smush);
#@lines=@old;
##@list = split(/<item>/, $tumblr);
##$latest=$list[1];
##$latest=~s/^.*<description>//;$latest=~s/<\/description>//;
##$latest=~s/<link>.*<\/link>//;
##$latest=~s/<guid>.*<\/guid><pubdate>/ - /;
##$latest=~s/<\/pubDate><\/item>//;
##$latest=~s/ -0400.*//;
###$br=`perl /Users/surly/bin/tumstati`;
###$br=~s/ \(tumblr\)//;
###push(@lines,$br);
#Is there anything new at all? Could just compare
# <lastbuilddate>Sun, 30 Aug 2009 09:42:19 -0400</lastBuildDate>
#to lastcheck
#09.08.30
#some weird error
#Day '' out of range 1..31 at /Users/surly/code/bayes/stati line 108
#has just cropped up because the program is trying to parse an n+1 th status
#when there are only n. I do not know why, but I am just going to limit it by
#stopping the loop when time<lastchecked
$time=99999999999;
#$lastcheck[0]=0;
#$i=0;
#$truth=(($time>$lastcheck[0])&&($lines[$i]));#($lines[$i])&
#print "$time > $lastcheck[0] = $truth $lines[$i]";
#&($time>$lastcheck[0])
$i=0;
while(($lines[$i])&&($time>$lastcheck[0]))
{ $lines[$i]=~s/\<description\>.*/[hyperlinked]/g;
$lines[$i]=~s/\s?\<\/title\>//g;
$lines[$i]=~s/\n//g;
$date=$lines[$i];
$date=~s/.*<pubdate>//;
$date=~s/-0400.*//;#Daylight Savings Time
$date=~s/\+0400.*//;
@part=split(/ /,$date);
$day=$part[1];$month=$part[2];$year=$part[3];$clocktime=$part[4];
@item=split(/:/,$clocktime);
$sec=$item[2];$min=$item[1];$hour=$item[0];
# print $date;
# print "day = $day\n";
# print "month=$month\n";
# print $clocktime;
# print "item0 = $item[0]\n";
# print "month=$month\n";
# print "sec=$sec";
# print "min=$min";
# print "hour=$hour\n";
if ( $part[2] eq "Jan" ) { $m = 0 }
elsif ( $part[2] eq "Feb" ) { $m = 1 }
elsif ( $part[2] eq "Mar" ) { $m = 2 }
elsif ( $part[2] eq "Apr" ) { $m = 3 }
elsif ( $part[2] eq "May" ) { $m = 4 }
elsif ( $part[2] eq "Jun" ) { $m = 5 }
elsif ( $part[2] eq "Jul" ) { $m = 6 }
elsif ( $part[2] eq "Aug" ) { $m = 7 }
elsif ( $part[2] eq "Sep" ) { $m = 8 }
elsif ( $part[2] eq "Oct" ) { $m = 9 }
elsif ( $part[2] eq "Nov" ) { $m = 10 }
elsif ( $part[2] eq "Dec" ) { $m = 11 };
if ($day)
{ $time = timelocal($sec,$min,$hour,$day,$m,$year);}
print "$time: $lines[$i] (Internal testing)";
print "$time $lastcheck[0] (Internal testing)"; print"\n";
if ($time>$lastcheck[0])
{ $lines[$i]=~s/-0400$//;$lines[$i]=~s/\+0400$//;
$lines[$i]=~s/(\s)+\[hyperlinked\]/ [hyperlinked]/g;
$lines[$i]=~s/(\s)+$/)\n/;
$lines[$i]=~s/(\s)+<pubdate>/ (/;
if (($lines[$i]=~/'/)&($lines[$i]=~/"/))
{ $lines[$i]=~s/'/_/g;$lines[$i]=~s/ _s/_s/g;}
else
{ $lines[$i]=~s/'/\'/g;$lines[$i]=~s/ \'s/'s/g;}
#the really precise way to deal with this case would be to
#separately print bits with double quotes using single quotes
#and vice versa. It might require some kind of loop though,
#first breaking the string into substrings where the quotes are.
#print "substring1";print 'substring2'; print "substring3"
$lines[$i]=~s/"/\"/g;
$lines[$i]=~s/^(\s)+/ /g;#kluge to get rid of extra spaces
#due to deleting people from @lines
$cat=`echo "$lines[$i]"|dbacl -v -c ok -c bad -c urgent`;
if ((($lines[$i]!~/boring person1/)&&($lines[$i]!~/boring person2/))&&(($lines[$i]!~/boring person3/)))
{ print OUTP $lines[$i];}
if ($cat=~/ok/)
{ print "$lines[$i]";}
elsif ($cat=~/bad/)
{ print "\033[0;37;48m$lines[$i]";
print "\033[0m";}
elsif ($cat=~/urgent/)
{ print "\033[0;34;48m$lines[$i]";
print "\033[0m";}
}
$i++;
}
close (OUTP);
stati.pl
Subscribe to:
Comments (Atom)
