Retrieve links and emailaddresses from some webpage

This script started as a Bash coding exercise but turned out to be a very useful way of getting data from websites, and it contains many useful little coding tricks.

#!/bin/sh

##############################################
NAME_="webextract"
PURPOSE_="extract links and/or email-addresses from a webpage"
SYNOPSIS_="$NAME_ [-e ] [-l ] "
OPTIONS_="
-e extract all emailaddresses from a url
-l extract all links from url
-h show help (this)"
REQUIRES_="Curl"
VERSION_="0.9"
PUROSE_=""
#Created by Lx
#licence : GPL3
#################################################

usage () {
echo >&2 "
\033[1m$NAME_ $VERSION_ \033[0m - $PURPOSE_
Usage: $SYNOPSIS_
Options: $OPTIONS_
"
exit 1
}

# tmp file set up
tmp_1=/tmp/tmp.${RANDOM}$$

# signal trapping and tmp file removal
trap 'rm -f $tmp_1 >/dev/null 2>&1' 0
trap "exit 1" 1 2 3 15

extract () {
while getopts ":e:l:h:" optname
do
case "$optname" in
"e")
curl -s -S $OPTARG |{
tr ',;<>()"\47 ' '[\n*]' | sed -n -e 's/mailto://gI' -e '/@/p' > $tmp_1
cat $tmp_1
}
;;
"l")
curl -s -S $OPTARG |{
tr '<>"\47 ' '[\n*]' | sed -n -e 's/href=//gI' -e 's/src=//gI' -e '/http:/Ip' > $tmp_1
cat $tmp_1
}
;;
"h")
usage
;;
"?")
echo " Unknown option $OPTARG"
usage
;;
":")
usage
;;
*)
# should not occur
echo "unknown error"
;;
esac
done
return $OPTIND
}

showargs () {
for p in "$@"
do
echo "[$p]"
done
}

###
if [ $# != 0 ]; then
extract $@
else
usage
fi

Post your comment

Comments

No one has commented on this page yet.

RSS feed for comments on this page | RSS feed for all comments