#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.15 2001/06/05 08:44:50 adam Exp $
+# $Id: robot.tcl,v 1.16 2001/06/06 07:10:31 adam Exp $
#
proc RobotFileNext1 {area lead} {
puts "RobotFileNext1 area=$area lead=$lead"
}
}
}
+ } -nonest area {
+ if {![info exists parm(href)]} {
+ puts "no href"
+ continue
+ }
+ if {[expr $distance <= $maxDistance]} {
+ set href [string trim $parm(href)]
+ if {![RobotHref $url href host path]} continue
+
+ puts $out "<cr>"
+ puts $out "<identifier>$href</identifier>"
+ puts $out "<description></description>"
+ puts $out "</cr>"
+
+ if {![RobotFileExist visited $host $path]} {
+ set olddistance 1000
+ if {![RobotFileExist bad $host $path]} {
+ if {[RobotFileExist unvisited $host $path]} {
+ set inf [RobotFileOpen unvisited $host $path r]
+ RobotReadRecord $inf oldurl olddistance
+ RobotFileClose $inf
+ }
+ } else {
+ set olddistance 0
+ }
+ if {[string length $olddistance] == 0} {
+ set olddistance 1000
+ }
+ if {[expr $distance < $olddistance]} {
+ set outf [RobotFileOpen unvisited $host $path]
+ RobotWriteRecord $outf $url $distance
+ RobotFileClose $outf
+ }
+ } elseif {[string compare $href $url]} {
+ set inf [RobotFileOpen visited $host $path r]
+ RobotReadRecord $inf xurl olddistance
+ close $inf
+ if {[string length $olddistance] == 0} {
+ set olddistance 1000
+ }
+ if {[expr $distance < $olddistance]} {
+ puts "OK remarking url=$url href=$href"
+ puts "olddistance = $olddistance"
+ puts "newdistance = $distance"
+ set outf [RobotFileOpen unvisited $host $path]
+ RobotWriteRecord $outf $url $distance
+ RobotFileClose $outf
+ }
+ }
+ }
}
}
global URL agent
set section 0
foreach l [split $buf \n] {
- if {[regexp {([-A-Za-z]+):[ \t]*([^\#]+)} $l match cmd arg]} {
+ if {[regexp {([-A-Za-z]+):[ \t]*([^\#\t ]+)} $l match cmd arg]} {
puts "cmd=$cmd arg=$arg"
- switch $cmd {
- User-Agent {
+ switch -- [string tolower $cmd] {
+ user-agent {
if {$section} break
set pat [string tolower $arg]*
set section [string match $pat $agent]
}
- Disallow {
+ disallow {
if {$section} {
puts "rule [list 0 $arg]"
lappend $v [list 0 $arg]
}
}
- Allow {
+ allow {
if {$section} {
puts "rule [list 1 $arg]"
lappend $v [list 1 $arg]
set buf [read $inf 32768]
close $inf
} else {
- set buf "User-Agent: *\nAllow: /\n"
+ set buf "User-agent: *\nAllow: /\n"
}
RobotsTxt0 URL($hostport,robots) $buf
}
}
}
if {!$ok} {
+ puts "skipped due to robots.txt"
return -1
}
}