+proc wellform {body} {
+ regsub -all {<!--[^-]*-->} $body { } abody
+ regsub -all -nocase {<script[^<]*</script>} $abody {} body
+ regsub -all {<[^\>]+>} $body {} abody
+ regsub -all { } $abody { } body
+ regsub -all {&} $body {&} abody
+ return $abody
+}
+
+proc link {task url out href body distance} {
+ global URL control
+ if {[expr $distance > $control($task,distance)]} return
+
+ if {![RobotHref $task $url href host path]} return
+
+ puts $out "<cr>"
+ puts $out "<identifier>$href</identifier>"
+ set abody [wellform $body]
+ puts $out "<description>$abody</description>"
+ puts $out "</cr>"
+
+ if {![RobotFileExist $task visited $host $path]} {
+ set olddistance 1000
+ if {![RobotFileExist $task bad $host $path]} {
+ if {[RobotFileExist $task unvisited $host $path]} {
+ set inf [RobotFileOpen $task unvisited $host $path r]
+ RobotReadRecord $inf oldurl olddistance
+ RobotFileClose $inf
+ }
+ } else {
+ set olddistance 0
+ }
+ if {[string length $olddistance] == 0} {
+ set olddistance 1000
+ }
+ if {[expr $distance < $olddistance]} {
+ set outf [RobotFileOpen $task unvisited $host $path]
+ RobotWriteRecord $outf $url $distance
+ RobotFileClose $outf
+ }
+ } elseif {[string compare $href $url]} {
+ set inf [RobotFileOpen $task visited $host $path r]
+ RobotReadRecord $inf xurl olddistance
+ close $inf
+ if {[string length $olddistance] == 0} {
+ set olddistance 1000
+ }
+ if {[expr $distance < $olddistance]} {
+ puts "OK remarking url=$url href=$href"
+ puts "olddistance = $olddistance"
+ puts "newdistance = $distance"
+ set outf [RobotFileOpen $task unvisited $host $path]
+ RobotWriteRecord $outf $url $distance
+ RobotFileClose $outf
+ }
+ }
+}
+
+proc RobotTextHtml {task url out} {
+ global URL control
+
+ # set title so we can emit it for the body
+ set title {}
+ # if true, nothing will be indexed
+ set noindex 0
+ # if true, nothing will be followed
+ set nofollow 0