From: Per M. Hansen Date: Thu, 4 Feb 1999 20:37:25 +0000 (+0000) Subject: Changed tags for the output. X-Git-Tag: ZMBOT.0.1~39 X-Git-Url: http://git.indexdata.com/?p=tclrobot.git;a=commitdiff_plain;h=10c922040186d738784fba9aab7295ab64d6e4d7 Changed tags for the output. --- diff --git a/robot.tcl b/robot.tcl index 8c46a63..c539a04 100755 --- a/robot.tcl +++ b/robot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: robot.tcl,v 1.3 1998/10/15 13:27:19 adam Exp $ +# $Id: robot.tcl,v 1.4 1999/02/04 20:37:25 perhans Exp $ # proc RobotFileNext {area} { if {[catch {set ns [glob ${area}/*]}]} { @@ -92,33 +92,32 @@ proc RobotRestart {} { proc headSave {url out title} { global URL - puts $out {} - puts $out " $title" - if {[info exists URL($url,head,Last-modified)]} { - puts $out " $URL($url,head,Last-modified)" + puts $out {} + puts $out "$title" + if {[info exists URL($url,head,last-modified)]} { + puts $out "$URL($url,head,last-modified)" } puts $out {} - if {[info exists URL($url,head,Date)]} { - puts $out " $URL($url,head,Date)" + if {[info exists URL($url,head,date)]} { + puts $out " $URL($url,head,date)" } - if {[info exists URL($url,head,Content-length)]} { - puts $out " $URL($url,head,Content-length)" + if {[info exists URL($url,head,content-length)]} { + puts $out " $URL($url,head,content-length)" } - if {[info exists URL($url,head,Server)]} { - puts $out " $URL($url,head,Server)" + if {[info exists URL($url,head,server)]} { + puts $out " $URL($url,head,server)" } puts $out {} - puts $out {} - puts $out " $url" - if {[info exists URL($url,head,Content-type)]} { - puts $out " $URL($url,head,Content-type)" + puts $out {} + puts $out " $url" + if {[info exists URL($url,head,content-type)]} { + puts $out " $URL($url,head,content-type)" } - puts $out {} + puts $out {} } proc RobotSave {url} { - global URL - global domains + global URL domains set out [RobotFileOpen visited $URL($url,host) $URL($url,path)] set ti 0 @@ -134,9 +133,9 @@ proc RobotSave {url} { } body { regsub -all -nocase {} $body {} abody regsub -all {<[^\>]+>} $abody {} nbody - puts $out "" + puts $out "" puts $out $nbody - puts $out "" + puts $out "" } a { if {![info exists parm(href)]} { puts "no href" @@ -191,24 +190,24 @@ proc RobotSave {url} { set path [lindex $c $i] incr i -1 while {$i >= 0} { - switch -- [lindex $c $i] { - .. { - incr i -2 - } - . { - incr i -1 - } - default { - set path [lindex $c $i]/$path - incr i -1 - } - } - } - set href "$method://$host$path" + switch -- [lindex $c $i] { + .. { + incr i -2 + } + . { + incr i -1 + } + default { + set path [lindex $c $i]/$path + incr i -1 + } + } + } + set href "$method://$host$path" - puts $out "" - puts $out "
  • $href" - puts $out " $body" + puts $out "" + puts $out "$href" + puts $out "$body" puts $out "" if {![regexp {/.*bin/} $href)]} { @@ -223,7 +222,7 @@ proc RobotSave {url} { headSave $url $out "untitled" set ti 1 } - puts $out "" + puts $out "" close $out RobotFileUnlink unvisited $URL($url,host) $URL($url,path) } @@ -243,12 +242,11 @@ proc RobotRead {url sock} { head { puts "head: $line" if {[regexp {([^:]+):[ ]+(.*)} $line x name value]} { - set URL($url,head,$name) $value + set URL($url,head,[string tolower $name]) $value } } html { lappend URL($url,line) $line -# puts "body: $line" } skip { close $sock @@ -258,8 +256,8 @@ proc RobotRead {url sock} { } } else { set URL($url,state) html - if {[info exists URL($url,head,Content-type)]} { - if {![string compare $URL($url,head,Content-type) text/html]} { + if {[info exists URL($url,head,content-type)]} { + if {![string compare $URL($url,head,content-type) text/html]} { set URL($url,state) html } } @@ -306,12 +304,13 @@ proc RobotGetUrl {url phost} { if {![llength [info commands htmlSwitch]]} { set e [info sharedlibextension] if {[catch {load ./tclrobot$e}]} { - load tclrobot$e + load tclrobot$e } } if {[llength $argv] < 2} { puts "Tclrobot: usage " + puts " Example: '*.dk' www.indexdata.dk" exit 1 } set domains [lindex $argv 0] @@ -323,4 +322,3 @@ if {[string length $site]} { RobotRestart vwait forever -