+proc RobotHref {url hrefx hostx pathx} {
+ global URL domains
+ upvar $hrefx href
+ upvar $hostx host
+ upvar $pathx path
+
+ # puts "Ref url = $url href=$href"
+ # get method (if any)
+ if {![regexp {^([^/:]+):(.*)} $href x method hpath]} {
+ set hpath $href
+ set method http
+ } else {
+ if {[string compare $method http]} {
+ return 0
+ }
+ }
+ # get host (if any)
+ if {![regexp {^//([^/]+)(.*)} $hpath x host epath]} {
+ set epath $hpath
+ set host $URL($url,host)
+ } else {
+ if {![string length $epath]} {
+ set epath /
+ }
+ set ok 0
+ foreach domain $domains {
+ if {[string match $domain $host]} {
+ set ok 1
+ break
+ }
+ }
+ if {!$ok} {
+ return 0
+ }
+ }
+ if {[regexp {^(\#|\?)} $epath]} {
+ # within page
+ return 0
+ } elseif {![regexp {^([/][^\#?]*)} $epath x path]} {
+ # relative path
+ set ext [file extension $URL($url,path)]
+ if {[string compare $ext {}]} {
+ set dpart [file dirname $URL($url,path)]
+ } else {
+ set dpart $URL($url,path)
+ }
+ regexp {^([^\#?]+)} $epath x path
+ set path [string trimright $dpart /]/$path
+ }
+ set c [split $path /]
+ set i [llength $c]
+ incr i -1
+ set path [lindex $c $i]
+ incr i -1
+ while {$i >= 0} {
+ switch -- [lindex $c $i] {
+ .. {
+ incr i -2
+ }
+ . {
+ incr i -1
+ }
+ default {
+ set path [lindex $c $i]/$path
+ incr i -1
+ }
+ }
+ }
+ set href "$method://$host$path"
+ # puts "Ref href = $href"
+ return 1
+}
+
+proc Robot401 {url} {
+ global URL
+
+ puts "Bad link $url"
+ RobotFileUnlink unvisited $URL($url,host) $URL($url,path)
+ if {![RobotFileExist forbidden $URL($url,host) $URL($url,path)]} {
+ set outf [RobotFileOpen forbidden $URL($url,host) $URL($url,path)]
+ close $outf
+ }
+}
+
+proc Robot404 {url} {
+ global URL
+
+ puts "Bad link $url"
+ RobotFileUnlink unvisited $URL($url,host) $URL($url,path)
+ if {![RobotFileExist bad $URL($url,host) $URL($url,path)]} {
+ set outf [RobotFileOpen bad $URL($url,host) $URL($url,path)]
+ close $outf
+ }
+}
+
+proc Robot301 {url tourl} {
+ global URL
+
+ puts "Redirecting from $url to $tourl"
+ RobotFileUnlink unvisited $URL($url,host) $URL($url,path)
+ if {[RobotHref $url tourl host path]} {
+ if {![RobotFileExist unvisited $host $path]} {
+ set outf [RobotFileOpen unvisited $host $path]
+ close $outf
+ }
+ }
+}
+
+proc Robot200 {url} {