From: Adam Dickmeiss Date: Thu, 15 Oct 1998 13:27:19 +0000 (+0000) Subject: Minor changes. X-Git-Tag: ZMBOT.0.1~40 X-Git-Url: http://git.indexdata.com/?p=tclrobot.git;a=commitdiff_plain;h=4e9e33394cbc2f242fd26fef99d127f95df4f7f3 Minor changes. --- diff --git a/robot.tcl b/robot.tcl index c942418..8c46a63 100755 --- a/robot.tcl +++ b/robot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: robot.tcl,v 1.2 1998/10/15 12:31:03 adam Exp $ +# $Id: robot.tcl,v 1.3 1998/10/15 13:27:19 adam Exp $ # proc RobotFileNext {area} { if {[catch {set ns [glob ${area}/*]}]} { @@ -118,6 +118,7 @@ proc headSave {url out title} { proc RobotSave {url} { global URL + global domains set out [RobotFileOpen visited $URL($url,host) $URL($url,path)] set ti 0 @@ -149,15 +150,20 @@ proc RobotSave {url} { if {[regexp {^\#} $parm(href)]} { continue } elseif {[regexp {^([^:]+):([^#]+)} $parm(href) x method hpath]} { + set ok 0 if {![string compare $method http]} { if {![regexp {^//([^/]+)(.*)} $hpath x host path]} { set host $URL($url,host) set path $hpath } - if {![regexp {\.indexdata\.dk$} $host]} continue - } else { - continue + foreach domain $domains { + if {[string match $domain $host]} { + set ok 1 + break + } + } } + if {!$ok} continue } elseif {[regexp {^([/~][^#]*)} $parm(href) x path]} { set host $URL($url,host) set method http @@ -278,7 +284,7 @@ proc RobotGetUrl {url phost} { set port 80 puts "---------" puts $url - if {[regexp {([^:]+)://([^/]+)([^ ?]*)} $url x method host path]} { + if {[regexp {([^:]+)://([^/]+)([^ ]*)} $url x method host path]} { puts "method=$method host=$host path=$path" } else { return -1 @@ -304,14 +310,17 @@ if {![llength [info commands htmlSwitch]]} { } } -if {![llength $argv]} { - puts "Tclrobot: specify one or more sites." +if {[llength $argv] < 2} { + puts "Tclrobot: usage " exit 1 } -foreach site $argv { +set domains [lindex $argv 0] +set site [lindex $argv 1] +if {[string length $site]} { set x [RobotFileOpen unvisited $site /] close $x } + RobotRestart vwait forever