#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.30 2002/02/17 09:29:18 adam Exp $
+# $Id: robot.tcl,v 1.32 2002/03/25 16:11:08 adam Exp $
#
proc RobotFileNext1 {area lead} {
# puts "RobotFileNext1 area=$area lead=$lead"
exec mkdir $d
cd ./$d
if {![string compare $area unvisited] && $i == 1 && $mode == "w"} {
- set out [open frobots.txt w]
- puts "creating robots.txt in $d"
- close $out
- incr status(unvisited)
+ if {[string compare $path /robots.txt]} {
+ set out [open frobots.txt w]
+ puts "creating robots.txt in $d"
+ close $out
+ incr status(unvisited)
+ }
}
}
}
set d [lindex $comp $len]
if {[string length $d]} {
set out [open f$d $mode]
- if {0} {
- if {[file isfile $d/f]} {
- set out [open $d/f $mode]
- } else {
- set out [open f$d $mode]
- }
- }
} else {
set out [open f $mode]
}
if {[string length $href] > 256} {
return 0
}
- if {[string first {?} $href] >= 0} {
- return 0
- }
- if {[string first {?} $url] >= 0 && [string first {?} $href] >= 0} {
- return 0
- }
+# if {[string first {?} $href] >= 0} {
+# return 0
+# }
+# if {[string first {?} $url] >= 0 && [string first {?} $href] >= 0} {
+# return 0
+# }
# get method (if any)
if {![regexp {^([^/:]+):(.*)} $href x method hpath]} {
set hpath $href
# don't print title of document content if noindex is used
if {!$noindex} {
puts $out "<title>$title</title>"
- regsub -all {<!--[^-]*->} $body { } abody
+ regsub -all {<!--[^-]*-->} $body { } abody
regsub -all -nocase {<script[^<]*</script>} $abody {} bbody
regsub -all {<[^\>]+>} $bbody {} nbody
puts $out "<documentcontent>"