X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=dcdot.tcl;h=8e15fea593a473224bac1ef1eebcf84f6d74af26;hb=5a6e4380b9b39674fb63839a5aeaa23cb68537cb;hp=a29e6f5414c136b99ce6073d8aed5b641179d9db;hpb=04f32b20fae8795aab0c1f8b703394056f3efea3;p=tclrobot.git diff --git a/dcdot.tcl b/dcdot.tcl index a29e6f5..8e15fea 100755 --- a/dcdot.tcl +++ b/dcdot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: dcdot.tcl,v 1.3 2000/12/08 22:46:53 adam Exp $ +# $Id: dcdot.tcl,v 1.5 2003/01/13 13:59:07 adam Exp $ # proc RobotRestart {} { @@ -10,15 +10,23 @@ proc RobotRestart {} { proc RobotTextHtml {url} { global URL - + set b $URL($url,buf) - set e {<[mM][eE][tT][aA][^>]*>} + set e {]*>} catch {unset $URL($url,meta)} - while {[regexp -indices $e $b i]} { + while {[regexp -nocase -indices $e $b i]} { set meta [string range $b [lindex $i 0] [lindex $i 1]] lappend URL($url,meta) $meta set b [string range $b [lindex $i 1] end] } + set b $URL($url,buf) + set e {[^>]*>} + catch {unset $URL($url,meta)} + while {[regexp -nocase -indices $e $b i]} { + set title [string range $b [lindex $i 0] [lindex $i 1]] + lappend URL($url,title) $title + set b [string range $b [lindex $i 1] end] + } } proc Robot200 {url} { @@ -62,23 +70,24 @@ proc RobotReadHeader {url sock} { # puts "Got $readCount bytes" set URL($url,buf) $URL($url,buf)$buffer - set n [string first \n\n $URL($url,buf)] + set n [string first \r\n\r\n $URL($url,buf)] if {$n > 1} { + puts "string first match n = $n" set code 0 set version {} set headbuf [string range $URL($url,buf) 0 $n] - incr n - incr n + incr n 4 set URL($url,buf) [string range $URL($url,buf) $n end] regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code set lines [split $headbuf \n] foreach line $lines { - if {[regexp {^([^:]+):[ ]+(.*)} $line x name value]} { - set URL($url,head,[string tolower $name]) $value + if {[regexp {^([^:]+):[ ]+([^;]*)} $line x name value]} { + set URL($url,head,[string tolower $name]) [string trim $value] } } set URL($url,state) skip + puts "code=$code" switch $code { 200 { if {![info exists URL($url,head,content-type)]} { @@ -91,6 +100,10 @@ proc RobotReadHeader {url sock} { text/plain { fileevent $sock readable [list RobotReadContent $url $sock] } + application/pdf { + puts "ok preceeed with this thingy" + fileevent $sock readable [list RobotReadContent $url $sock] + } default { close $sock Robot200 $url @@ -110,7 +123,7 @@ proc RobotReadHeader {url sock} { proc RobotConnect {url sock} { global URL agent - fconfigure $sock -translation {auto crlf} -blocking 0 + fconfigure $sock -translation {lf crlf} -blocking 0 fileevent $sock readable [list RobotReadHeader $url $sock] puts $sock "GET $URL($url,path) HTTP/1.0" puts $sock "Host: $URL($url,host)" @@ -169,7 +182,16 @@ if {$argc == 1} { puts $m } } + if {[info exist URL($url,title)]} { + foreach m $URL($url,title) { + puts $m + } + } foreach v [array names URL $url,head,*] { puts "$v = $URL($v)" } + puts "Buffer length is [string length $URL($url,buf)]" + set f [open out.pdf w] + puts -nonewline $f $URL($url,buf) + close $f }