X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=robot.tcl;h=73d558aad3893ad0b0b823f929247a5cb5d02dac;hb=4355628830cd0f9e27c059d20254d8e1c30896eb;hp=3ab1d816a01651c5850c8d311d394da721bc3c98;hpb=64d3a6a403795bb44f89ffad04463fd7a8863184;p=tclrobot.git diff --git a/robot.tcl b/robot.tcl index 3ab1d81..73d558a 100755 --- a/robot.tcl +++ b/robot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: robot.tcl,v 1.44 2003/06/11 10:11:39 adam Exp $ +# $Id: robot.tcl,v 1.45 2003/06/11 10:29:41 adam Exp $ # proc RobotFileNext1 {area lead} { # puts "RobotFileNext1 area=$area lead=$lead" @@ -610,6 +610,15 @@ proc RobotRedirect {task url tourl code} { } } +proc wellform {body} { + regsub -all {} $body { } abody + regsub -all -nocase {} $abody {} body + regsub -all {<[^\>]+>} $body {} abody + regsub -all { } $abody { } body + regsub -all {&} $body {&} abody + return $abody +} + proc link {task url out href body distance} { global URL control if {[expr $distance > $control($task,distance)]} return @@ -618,7 +627,8 @@ proc link {task url out href body distance} { puts $out "" puts $out "$href" - puts $out "$body" + set abody [wellform $body] + puts $out "$abody" puts $out "" if {![RobotFileExist $task visited $host $path]} { @@ -714,11 +724,9 @@ proc RobotTextHtml {task url out} { # don't print title of document content if noindex is used if {!$noindex} { puts $out "$title" - regsub -all {} $body { } abody - regsub -all -nocase {} $abody {} bbody - regsub -all {<[^\>]+>} $bbody {} nbody + set bbody [wellform $body] puts $out "" - puts $out $nbody + puts $out $bbody puts $out "" } } -nonest base {