X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=robot.tcl;h=c7d85c47a68dfe22d74f029269b06a90de19b5de;hb=8278051059f0aa2849729755d70967d58dddd8a6;hp=bad9a2505a6d3d639798eee4f23c9878165cf068;hpb=14639758cbc66407c40bde3d09894cfaff6da0e8;p=tclrobot.git diff --git a/robot.tcl b/robot.tcl index bad9a25..c7d85c4 100755 --- a/robot.tcl +++ b/robot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: robot.tcl,v 1.11 2001/01/23 11:26:43 adam Exp $ +# $Id: robot.tcl,v 1.13 2001/02/26 22:51:51 adam Exp $ # proc RobotFileNext1 {area lead} { puts "RobotFileNext1 area=$area lead=$lead" @@ -264,6 +264,9 @@ proc RobotHref {url hrefx hostx pathx} { if {[string first { } $href] >= 0} { return 0 } + if {[string length $href] > 256} { + return 0 + } if {[string first {?} $url] >= 0 && [string first {?} $href] >= 0} { return 0 } @@ -281,16 +284,18 @@ proc RobotHref {url hrefx hostx pathx} { if {![string length $surl]} { set surl / } - set ok 0 - foreach domain $domains { - if {[string match $domain $host]} { - set ok 1 - break + if {[info exist domains]} { + set ok 0 + foreach domain $domains { + if {[string match $domain $host]} { + set ok 1 + break + } } - } - if {!$ok} { - return 0 - } + if {!$ok} { + return 0 + } + } } else { regexp {^([^\#]*)} $hpath x surl set host $URL($url,hostport) @@ -429,7 +434,7 @@ proc RobotTextHtml {url out} { puts $out "" puts $out $nbody puts $out "" - } a { + } -nonest a { if {![info exists parm(href)]} { puts "no href" continue @@ -755,7 +760,7 @@ set i 0 set l [llength $argv] if {$l < 2} { - puts {tclrobot: usage [-j jobs] [-c count] [-d domain] [url ..]} + puts {tclrobot: usage [-j jobs] [-i idle] [-c count] [-d domain] [url ..]} puts " Example: -c 3 -d '*.dk' http://www.indexdata.dk/" exit 1 } @@ -782,6 +787,12 @@ while {$i < $l} { } lappend domains $dom } + -i* { + set idleTime [string range $arg 2 end] + if {![string length $idleTime]} { + set idleTime [lindex $argv [incr i]] + } + } default { set href $arg if {[RobotHref http://www.indexdata.dk/ href host path]} {