#!/bin/rc # Parse XML RSS feeds into BARF blog posts for the specified site. # If a post with a matching _link_ already exists, no new post will # be created for that item. Tags will be created according to the rules # defined in the get_tags() function. # # The file argument should point to a file containing one line per feed, # with fields separated by the | character, in the following format: # # http://feeds.feedburner.com/ImNotReallyStanleyLieber|stanleylieber # # where the first field is the feed URL and each addition field is a tag. # # Requires 20h's xmlpull and rssread. rfork en switch($1){ case /* feeds=$1 site=read.stanleylieber.com tags=() case * echo 'Usage: gz file' >[1=2] exit usage } file=/tmp/gz.$pid werc=/usr/sl/www/werc if(test -f /boot/factotum) cmd=hget if not cmd='curl -s' fn cram{ ssam ' ,s/^link:.*$/HJDIVIDER&HJDIVIDER/g ,s/^title:.*$/HJDIVIDER&HJDIVIDER/g ,s/\n//g ,s/HJDIVIDERtitle:/\n&/g ' } fn scape{ ssam ' ,s/ //g ,s/\"/\"/g ,s/\"/\"/g ,s/\&/\&/g ,s/\&/\&/g ,s/\'/''/g ,s/\,/,/g ,s/\-/-/g ,s/\./\./g ,s/\//\//g ,s/\:/:/g ,s/\;/;/g ,s/\<//g ,s/\>/>/g ,s/\_/_/g ,s/\|/\|/g ' } fn get_posts{ $"cmd $"feed | rssread | cram | scape >$file echo >>$file } fn parse_posts{ ifs=' ' { posts=`{cat $file} for(i in `{seq 1 $#posts | sort -nr}){ post=$posts($i) if(! ~ $post ''){ a_title=`{echo $post | sed 's/^.*HJDIVIDERtitle: //g; s/HJDIVIDER.*$//g'} a_date=`{date} a_link=`{echo $post | sed 's/^.*HJDIVIDERlink: //g; s/HJDIVIDER.*$//g'} a_body=`{echo $post | sed 's/^.*HJDIVIDER//g; s/^.*HJDIVIDER//g; s/^.*HJDIVIDER//g'} a_id=`{echo `{ls -p $werc/sites/$site/src | sort -n | tail -1}^+1 | bc} if(~ $#a_id 0) a_id=1 while(test -d $werc/sites/$site/src/$a_id) a_id=`{echo $a_id^+1 | bc} if(! ~ $"a_link '' && ! grep -s `{echo $"a_link | md5sum} $werc/sites/$site/links){ mkdir -p $werc/sites/$site/src/$a_id/tags # big fat race echo $"a_title >$werc/sites/$site/src/$a_id/title echo $"a_date >$werc/sites/$site/src/$a_id/date echo $a_link(1) >$werc/sites/$site/src/$a_id/link echo $a_link(1) | md5sum >>$werc/sites/$site/links if(~ $a_link(1) *staticflickr.com*) echo '' >$werc/sites/$site/src/$a_id/body echo $"a_body '' >>$werc/sites/$site/src/$a_id/body ifs=' ' { for(j in $tags){ >$werc/sites/$site/src/$a_id/tags/$j echo $a_id/tags/$j >>$werc/sites/$site/tags } } } if(test -f /boot/factotum && ~ $site *.stanleylieber.com && test -d $werc/sites/$site/src/$a_id) chmod +t $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id/* $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id/tags/* } } } } if(test -f /boot/factotum && test -f /rc/bin/hget) webfs for(i in `{grep -v -e '^#' $feeds}){ feed=`{echo $"i | sed 's/\|.*$//g'} tags=`{echo $"i | sed 's/\|/ /g'} tags=$tags(2-) get_posts parse_posts }