-
- set out [RobotFileOpen visited $URL($url,host) $URL($url,path)]
- set ti 0
- if {[info exists URL($url,line)]} {
- set htmlContent [join $URL($url,line)]
-
- htmlSwitch $htmlContent \
- title {
- if {!$ti} {
- headSave $url $out $body
- set ti 1
- }
- } a {
- if {![info exists parm(href)]} continue
- if {!$ti} {
- headSave $url $out "untitled"
- set ti 1
- }
-
- if {[regexp {^\#} $parm(href)]} {
- continue
- } elseif {[regexp {^([^:]+):([^#]+)} $parm(href) x method hpath]} {
- if {![string compare $method http]} {
- if {![regexp {^//([^/]+)(.*)} $hpath x host path]} {
- set host $URL($url,host)
- set path $hpath
- }
- if {![regexp {\.dk$} $host]} continue
- } else {
- continue
- }
- } elseif {[regexp {^([/~][^#]*)} $parm(href) x path]} {
- set host $URL($url,host)
- set method http
- } else {
- puts " href=$parm(href)"
- set ext [file extension $URL($url,path)]
- if {[string compare $ext {}]} {
- set dpart [file dirname $URL($url,path)]
- } else {
- set dpart $URL($url,path)
- }
- regexp {^([^#]+)} $parm(href) x path
- set host $URL($url,host)
- set path [string trimright $dpart /]/$path
- set method http
+
+ puts "Bad URL $url, $code"
+ set fromurl {}
+ set distance -1
+ if {[RobotFileExist unvisited $URL($url,hostport) $URL($url,path)]} {
+ set inf [RobotFileOpen unvisited $URL($url,hostport) $URL($url,path) r]
+ RobotReadRecord $inf fromurl distance
+ RobotFileClose $inf
+ }
+ RobotFileUnlink unvisited $URL($url,hostport) $URL($url,path)
+ if {![RobotFileExist bad $URL($url,hostport) $URL($url,path)]} {
+ set outf [RobotFileOpen bad $URL($url,hostport) $URL($url,path)]
+ RobotWriteRecord $outf $fromurl $distance
+ RobotFileClose $outf
+ }
+}
+
+proc RobotRedirect {url tourl code} {
+ global URL
+
+ puts "Redirecting from $url to $tourl"
+
+ set distance {}
+ set fromurl {}
+ if {[RobotFileExist unvisited $URL($url,hostport) $URL($url,path)]} {
+ set inf [RobotFileOpen unvisited $URL($url,hostport) $URL($url,path) r]
+ RobotReadRecord $inf fromurl distance
+ RobotFileClose $inf
+ }
+ if {![RobotFileExist bad $URL($url,hostport) $URL($url,path)]} {
+ set outf [RobotFileOpen bad $URL($url,hostport) $URL($url,path)]
+ RobotWriteRecord $outf $fromurl $distance
+ RobotFileClose $outf
+ }
+ if {[RobotHref $url tourl host path]} {
+ if {![RobotFileExist visited $host $path]} {
+ if {![RobotFileExist unvisited $host $path]} {
+ set outf [RobotFileOpen unvisited $host $path]
+ RobotWriteRecord $outf $fromurl $distance
+ RobotFileClose $outf
+ }
+ } else {
+ set olddistance {}
+ set inf [RobotFileOpen visited $host $path r]
+ RobotReadRecord $inf oldurl olddistance
+ RobotFileClose $inf
+ if {[string length $olddistance] == 0} {
+ set olddistance 1000
+ }
+ if {[string length $distance] == 0} {
+ set distance 1000
+ }
+ puts "distance=$distance olddistance=$olddistance"
+ if {[expr $distance < $olddistance]} {
+ set outf [RobotFileOpen unvisited $host $path]
+ RobotWriteRecord $outf $tourl $distance
+ RobotFileClose $outf
+ }
+ }
+ }
+ if {[catch {RobotFileUnlink unvisited $URL($url,hostport) $URL($url,path)}]} {
+ puts "unlink failed"
+ exit 1
+ }
+}
+
+proc RobotTextHtml {url out} {
+ global URL maxDistance
+
+ set distance 0
+ if {$maxDistance < 1000 && [info exists URL($url,dist)]} {
+ set distance [expr $URL($url,dist) + 1]
+ }
+ htmlSwitch $URL($url,buf) \
+ title {
+ puts $out "<title>$body</title>"
+ } -nonest meta {
+ puts -nonewline $out "<meta"
+ foreach a [array names parm] {
+ puts -nonewline $out " $a"
+ puts -nonewline $out {="}
+ puts -nonewline $out $parm($a)
+ puts -nonewline $out {"}