#!/bin/bash # make_publish.bash # Extracts the latest HTM files from the web site and creates RSS feed XML file. # Creates Send_Publish.bash file to ftp the contents. # (c) Dennis Adams Associates Ltd. # This file may be freely used and copied without prior permission, # providing that an acknowledgement of origin is attached. # No warranty is expressed or implied regarding use ! # # name of file to create: MANIFEST=$HOME/Send_Publish.bash # names of temporary files: RSSFILE=rss/news.xml RSSTMP1=$HOME/rss.tmp1 RSSEXTRACTS=$HOME/rssextracts.data # root directory of local copy of web site: WEBDIR="/web" # URL of website for including in RSS feeds: WEBSITE=www.yourdomain.com # domain name and login id for FTP site for uploading: FTPSITE=www.yourdomain.com WEBUSER=yourusername # email addresses of webmaster and RSS editor for showing on XML feed: WEBMASTER=webmaster@yourdomain.com RSSEDITOR=info@yourdomain.com # no of htm files to include for RSS and for uploading to ftp site. RSSSIZE=5 TAILSIZE=10 if [ "$1" != "" ] then TAILSIZE="$1" fi echo "Starting..." echo "No of files to include=$TAILSIZE" echo "No of files in RSS feed=$RSSSIZE" cd cd "$WEBDIR" echo "Default Directory is: " pwd # # # tidy up old rss file... DATESTAMP=`date "+%Y%m%d%H%M"` if [ -f $RSSFILE ] then echo "Found earlier file $RSSFILE" # echo "Renaming $RSSFILE to $RSSFILE.$DATESTAMP" # mv $RSSFILE $RSSFILE.$DATESTAMP fi # # # create a new rss file and write the header to it... echo "Creating $RSSTMP1..." cat /dev/null > $RSSTMP1 cat >> $RSSTMP1 < Your Website Title en-us Copyright 2005. All rights reserved. This RSS file is offered to individuals and non-commercial organizations only. Commercial websites wishing to use RSS files from this site, please contact $RSSEDITOR. $RSSEDITOR $WEBMASTER Your description of your site... . http://$WEBSITE ENDOFTOP1 # # # work out the date properly and the rest of the header ... echo "Publication date is..." date echo "`date "+%a, %e %b %Y %H:%M:%S GMT"`" >> $RSSTMP1 # # You can have as many tags as you want... cat >> $RSSTMP1 <Information Technology Consulting http://$WEBSITE/graphics/logo.jpg http://$WEBSITE 144 64 Your description of your site.. ENDOFTOP2 # # # extract the names of the all the candidate htm files that are changed... echo "Analysing Items..." ls -tr *.htm | tail -$RSSSIZE | awk '{ print $1}' > $RSSEXTRACTS # Add index.html in all cases... echo index.html >> $RSSEXTRACTS # # # cycle through all candidate htm files and extract item information... while read HTMFILE do #echo found: $HTMFILE cat $HTMFILE | awk -v htmfile=$HTMFILE -v website=$WEBSITE ' BEGIN { ttitle=""; para1="" p1=""; h1=""; h2=""; gotp1=0; goth1=0; goth2=0 } /\/ { ttitle=$0 ; } /\/ { if (gotp1==0) { p1=$0; gotp1=1} } /\/ { if (goth1==0) { h1=$0; goth1=1} } /\/ { if (goth2==0) { h2=$0; goth2=1} } END { print ""; printf "\t%s\n", ttitle; printf "\t" ; para1=p1; if ( para1=="" ) { para1=h1 } if ( para1=="" ) { para1=h2 } printf para1 ; printf "\n" ; printf "\thttp://%s/%s\n", website, htmfile ; print "" } ' done < $RSSEXTRACTS >> $RSSTMP1 # # # Process the internal structure of the html files which have download links. # Structure is assumed to be: is the break to a new file title. # The following line is the description. # \\/ { if (gottitle==0) { ttitle=$0; gottitle=1; next } } { if (gottitle==1 && gotdesc==0 ) { desc=$0; gotdesc=1} } /\"; printf "\t%s\n", ttitle; printf "\t" ; printf desc ; printf "\n" ; printf "\t%s\n", llink ; print "" } ' | sed 's//s/">[^<>]*//g; ' >> $RSSTMP1 done # # # Add the channel footer... #echo "Adding footer..." echo "" >> $RSSTMP1 echo "" >> $RSSTMP1 # # # strip out all the extra HTML (non-XML) tags... cat $RSSTMP1 | \ sed '//s/]*>//g; s/

//g; s/<\/p>/ /g; s/

//g; s/<\/h1>/ /g; s/

//g ; s/<\/h2>/ /g; s/<\/a>/ /g; s// /g; s/<\/tr>/ /g; s// /g; s/<\/td>/ /g; s/
/ /g; s///g s/<\/strong>//g s///g s/"//g ' > $RSSFILE echo "Created $RSSFILE ..." # # # create the ftp file... echo "Creating $MANIFEST..." echo "#!/bin/bash" > $MANIFEST echo "echo \"Running ftp...\"" >> $MANIFEST echo "ftp -v $FTPSITE <>$MANIFEST echo "$WEBUSER" >> $MANIFEST echo "passive" >> $MANIFEST echo "prompt" >> $MANIFEST echo "ascii" >> $MANIFEST echo "cd htdocs" >> $MANIFEST echo "pwd" >> $MANIFEST echo "lcd \"$WEBDIR\"" >> $MANIFEST echo "lpwd" >> $MANIFEST ls -tr *.htm | tail -$TAILSIZE | awk '{ print "put ",$1}' >> $MANIFEST echo "put index.html" >> $MANIFEST echo "cd rss" >> $MANIFEST echo "pwd" >> $MANIFEST echo "lcd rss" >> $MANIFEST echo "lpwd" >> $MANIFEST echo "put news.xml" >> $MANIFEST echo "bye" >> $MANIFEST echo "ENDOFSTUFF" >> $MANIFEST chmod 775 $MANIFEST echo "Created $MANIFEST ..." # # # cleanup... rm -f $RSSEXTRACTS rm -f $RSSTMP1 exit