projects
/
tclrobot.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Minor changes.
[tclrobot.git]
/
robot.tcl
diff --git
a/robot.tcl
b/robot.tcl
index
bad9a25
..
c7d85c4
100755
(executable)
--- a/
robot.tcl
+++ b/
robot.tcl
@@
-1,5
+1,5
@@
#!/usr/bin/tclsh
#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.11 2001/01/23 11:26:43 adam Exp $
+# $Id: robot.tcl,v 1.13 2001/02/26 22:51:51 adam Exp $
#
proc RobotFileNext1 {area lead} {
puts "RobotFileNext1 area=$area lead=$lead"
#
proc RobotFileNext1 {area lead} {
puts "RobotFileNext1 area=$area lead=$lead"
@@
-264,6
+264,9
@@
proc RobotHref {url hrefx hostx pathx} {
if {[string first { } $href] >= 0} {
return 0
}
if {[string first { } $href] >= 0} {
return 0
}
+ if {[string length $href] > 256} {
+ return 0
+ }
if {[string first {?} $url] >= 0 && [string first {?} $href] >= 0} {
return 0
}
if {[string first {?} $url] >= 0 && [string first {?} $href] >= 0} {
return 0
}
@@
-281,16
+284,18
@@
proc RobotHref {url hrefx hostx pathx} {
if {![string length $surl]} {
set surl /
}
if {![string length $surl]} {
set surl /
}
- set ok 0
- foreach domain $domains {
- if {[string match $domain $host]} {
- set ok 1
- break
+ if {[info exist domains]} {
+ set ok 0
+ foreach domain $domains {
+ if {[string match $domain $host]} {
+ set ok 1
+ break
+ }
}
}
- }
- if {!$ok} {
- return 0
- }
+ if {!$ok} {
+ return 0
+ }
+ }
} else {
regexp {^([^\#]*)} $hpath x surl
set host $URL($url,hostport)
} else {
regexp {^([^\#]*)} $hpath x surl
set host $URL($url,hostport)
@@
-429,7
+434,7
@@
proc RobotTextHtml {url out} {
puts $out "<documentcontent>"
puts $out $nbody
puts $out "</documentcontent>"
puts $out "<documentcontent>"
puts $out $nbody
puts $out "</documentcontent>"
- } a {
+ } -nonest a {
if {![info exists parm(href)]} {
puts "no href"
continue
if {![info exists parm(href)]} {
puts "no href"
continue
@@
-755,7
+760,7
@@
set i 0
set l [llength $argv]
if {$l < 2} {
set l [llength $argv]
if {$l < 2} {
- puts {tclrobot: usage [-j jobs] [-c count] [-d domain] [url ..]}
+ puts {tclrobot: usage [-j jobs] [-i idle] [-c count] [-d domain] [url ..]}
puts " Example: -c 3 -d '*.dk' http://www.indexdata.dk/"
exit 1
}
puts " Example: -c 3 -d '*.dk' http://www.indexdata.dk/"
exit 1
}
@@
-782,6
+787,12
@@
while {$i < $l} {
}
lappend domains $dom
}
}
lappend domains $dom
}
+ -i* {
+ set idleTime [string range $arg 2 end]
+ if {![string length $idleTime]} {
+ set idleTime [lindex $argv [incr i]]
+ }
+ }
default {
set href $arg
if {[RobotHref http://www.indexdata.dk/ href host path]} {
default {
set href $arg
if {[RobotHref http://www.indexdata.dk/ href host path]} {