#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.44 2003/06/11 10:11:39 adam Exp $
+# $Id: robot.tcl,v 1.47 2003/12/10 09:58:22 adam Exp $
#
proc RobotFileNext1 {area lead} {
# puts "RobotFileNext1 area=$area lead=$lead"
}
}
+proc wellform {body} {
+ regsub -all {<!--[^-]*-->} $body { } abody
+ regsub -all -nocase {<script[^<]*</script>} $abody {} body
+ regsub -all {<[^\>]+>} $body {} abody
+ regsub -all { } $abody { } body
+ regsub -all {&} $body {&} abody
+ return $abody
+}
+
proc link {task url out href body distance} {
global URL control
if {[expr $distance > $control($task,distance)]} return
puts $out "<cr>"
puts $out "<identifier>$href</identifier>"
- puts $out "<description>$body</description>"
+ set abody [wellform $body]
+ puts $out "<description>$abody</description>"
puts $out "</cr>"
if {![RobotFileExist $task visited $host $path]} {
set metacontent $parm($a)
}
}
- unset parm($al)
+ unset parm($a)
}
puts $out "></meta>"
# go through robots directives (af any)
# don't print title of document content if noindex is used
if {!$noindex} {
puts $out "<title>$title</title>"
- regsub -all {<!--[^-]*-->} $body { } abody
- regsub -all -nocase {<script[^<]*</script>} $abody {} bbody
- regsub -all {<[^\>]+>} $bbody {} nbody
+ set bbody [wellform $body]
puts $out "<documentcontent>"
- puts $out $nbody
+ puts $out $bbody
puts $out "</documentcontent>"
}
} -nonest base {
set href [string trim $parm(href)]
if {![RobotHref $task $url href host path]} continue
set URL($task,$url,bpath) $path
- } a {
+ } -nonest a {
# <a href="...."> .. </a>
# we're not using nonest - otherwise body isn't set
if {$nofollow} continue