Main PageAwards and FellowshipsDetailed ResumeProjectsSelected Publications


Page eavesdropping

This script monitors a page, or part of it, searching for a given set of words... I made it to monitor a news board in the region I leave searching for a bike for my son ;). But it can be used to monitor virtually any page. It is in bash and you can easily change it to monitor other pages with different time intervals. As usually you are interested just in one part of the page you can configure the part of the page the script should focus its attention. For example, between the statements Begin forum” and “End forum. Probably there are hundreds of other, and better, ways to do this... but this one works reasonably well :).    

Here the script without any formating.

  



#!/bin/bash

# LINK to the page you want to eavesdrop
# I will leave this one as example but change it to your own page of interest
LINK=http://riviera.angloinfo.com/forum/forum.asp?FID=8

# email to where the results should be sent
EMAIL=<Your e-mail>

# email subject (without spaces PLEASE)
SUBJECT=“<The_Subject_Of_Your_Email>”

# words that are interesting to you
#WORDS=( any series of words you are interested in separeted by space)
WORDS=( away free cheap bicycle bike give )

# polling time (time between two script runs 5m=5 10s would be 10 seconds minutes)
SLEEP_TIME=“5m”

# begin of the part of the page the script should monitore,
# configured for the previous URL, for other one you should change this
BEGIN_STRING=“Current Topics”

# end of the part of the page the script should monitore
# the same as the previous one
END_STRING=“New posts”

############
# Functions
############

####
# - initEnvironment
# Clean the environment for the first run.
#    - For now, basically, removes the old files
####
function initEnvironment() {
  for word in ${WORDS[@]}
  do
   echo “rm -f $word.old
   rm -f $word.old
   rm -f $word.new
   touch $word.old
  done
};
export -f initEnvironment;

####
# - parseFile
# gets the important part of the file and sends it at interestingPart.parsed file.
####
function parseFile {
#Open file for reading to array
rm -f interestingPart.parsed ;
echo “Parsed file begin: $1
LINE=““
existsBeg=““
existsEnd=““
existsBegTmp=““
existsEndTmp=““

exec<$1

# stores the file into the array
while read LINE
do
  
    if [ $existsBeg == ““ ]; then
      existsBeg=$(echo $LINE | grep $BEGIN_STRING )
 
      if [ $existsBeg != ““ ]; then
      begins=$(( $i + 1 ))
          echo $LINE | sed s/>/\\`echo -e '\n\r'`/g  | sed s/</\\`echo -e '\n\r'`/g | sed s/=/\\`echo -e '\n\r'`/g | while read lineTmp; do
                     if [ $existsBegTmp == ““ ]; then
                          existsBegTmp=$(echo ${lineTmp} | grep $BEGIN_STRING )
                      else      
                          echo $lineTmp >> interestingPart.parsed
                      fi
                 done
      fi
    else
      if [ $existsEnd == ““ ]; then
         existsEnd=$(echo $LINE | grep $END_STRING )

         if [ $existsEnd != ““ ]; then
           echo $LINE | sed s/>/\\`echo -e '\n\r'`/g  | sed s/</\\`echo -e '\n\r'`/g | sed s/=/\\`echo -e '\n\r'`/g | while read lineTmp; do

                     if [ $existsEndTmp == ““ ]; then
                          existsEndTmp=$(echo ${lineTmp} | grep $END_STRING )
                          echo $lineTmp >> interestingPart.parsed
                      fi
                 done
         else
           echo $LINE | sed s/>/\\`echo -e '\n\r'`/g  | sed s/</\\`echo -e '\n\r'`/g | sed s/=/\\`echo -e '\n\r'`/g >> interestingPart.parsed
         fi
      fi
      
    fi
done

# close file
exec 10>&-
echo “Parsed file end”
} ;
export -f parseFile ;


smail=““

####
# - sendEmail
# sends an e-mai with the string passed as parameter,
# if the parameter is not empty.
####

function sendEmail {
  echo “smail---$smail

# Formats the output
   smail=$(echo $smail |  sed s/##/\\`echo -e '\n\r'`/g)
   smail=$(echo $smail |  sed s/ >/\\`echo -e '\n\r>'`/g)

#  If there are new occurrences, sends e-mail
  if [ $smail != ““ ]; then
    echo “sending mail -$smail-”
    echo $smail > sendMail;
    echo $LINK >> sendMail;
    mail -s $SUBJECT $EMAIL < sendMail ;
  fi
} ;
export -f sendEmail;


####
# - eavesdrop
# Fetches the page, breaks it and verify if there is
# any new occurrence of the observed words.
#    - Fetches the page
#    - Finds the area inside the page it should monitor
#    - Breaks the interesting part of the page
#    - For each word 
#        - Search for occurrences inside the interest lines range
#        - Verify if the founded ones are different from the
#          previous one
#        - if so, file it to send, if not do nothing
#        - makes this occurrence set be the old one
#    - Formats the output
#    - If there are new occurrences, sends e-mail
####

function eavesdrop   {
smail=““

# Fetches the page
wget -t 2 -O page.html $LINK

if [ $? -eq 0 ]; then

  parseFile page.html ;

# For each word from the list
  for word in ${WORDS[@]}
  do

    # Searches for occurrences
    grep -i  $word interestingPart.parsed | sort > $word.new
    
    fileSize=$(stat -c%s $word.new)

    # Verifies if the size of the new file is zero, if so
    # ignores it
    if [ $fileSize -ne 0 ]; then

       cat  $word.new” | sort >  $word.new”

       # Verifies if the founded ones are different from the
       # previous one
       test=$(diff -E -b -w -B $word.old $word.new);

       # if so, files it to send, if not does nothing
       if [ $test != ““ ]; then
          smail=$(echo $smail -----##”);
          smail=$(echo $smail $word new instance##”)
          smail=$(echo $smail $test## ## “);
          echo $smail
       fi

       echo “move $word
       # This occurrence set becomes the old one just in the case
       # that it found something, otherwise keeps the old one
       # this is in this way to decrease the messages regarding
       # topics that get out and just returned to the first page 
       mv $word.new $word.old ;

     fi
          echo “smail = $smail

  done

sendEmail

else
 echo “wget error”
fi
} ;
export -f eavesdrop

################
# Main Program
################

# inits the environment
 initEnvironment;

####
# loop forever calling the eavesdrop function at each
# five minutes
####
while [ 1 ]; do
    date
    echo “calling function”;
    echo ““;
    eavesdrop;
    sleep $SLEEP_TIME;
done


© Daniel Câmara
Last revised: Mar/19/2008 (_camara@NoSpamPlease.eurecom.fr)