#!/bin/sh
# Clean up URL address mess left by w3get
# should do this in the w3get perl script but it looks like line noise to me.

if [ $# -eq 0 ] ; then
    echo "need dir to copy fm"
    echo "how about ~/tmp/html_get/www.cis.ohio-state.edu/hypertext/faq/usenet/tcl-faq/part1"
    exit 1
fi

fdir=$1

for i in $fdir/*
do
    if [ -d $i ] ; then
        continue
    fi
    
    if [ ! -f $i ] ; then
        echo "No files in $fdir"
        echo "exiting..."
        exit 2
    fi

    fto=`basename $i`
    echo "$fto"
    
# 
# lines 4, 5 last two added to remove spurious added html escaped <> in 
#   all  links and descriptions (14-Dec-94)
# line 6  to convert any remaining HOPS_LOCAL to nothing 14-Dec-94

    sed -e 's^edu:80^edu^g' \
        -e 's^HOPS_LOCAL/www.cis.ohio-state.edu/hypertext/faq/usenet/tcl-faq/^../^g' \
        -e 's^HOPS_LOCAL/www.sco.com^http://www.sco.com^g' \
        -e 's/\(ftp:[^&]*\)&#62;\(["<]\)/\1\2/g' \
        -e 's/\(http:[^&]*\)&#62;\(["<]\)/\1\2/g' \
        -e 's^HOPS_LOCAL^http:/^g' \
        $i > $fto

done
