X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=dcdot.tcl;h=3b2edcb5649734fc69dbfd6fc5055c7fa1cba7e6;hb=8278051059f0aa2849729755d70967d58dddd8a6;hp=aeb4f91484a9e635a060ee5d3ff9f6e66bc8c51a;hpb=d8234df96ab8fb03ed71f6358f7211ebe725b495;p=tclrobot.git diff --git a/dcdot.tcl b/dcdot.tcl index aeb4f91..3b2edcb 100755 --- a/dcdot.tcl +++ b/dcdot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: dcdot.tcl,v 1.1 2000/12/07 20:16:11 adam Exp $ +# $Id: dcdot.tcl,v 1.4 2000/12/11 17:11:03 adam Exp $ # proc RobotRestart {} { @@ -11,28 +11,14 @@ proc RobotRestart {} { proc RobotTextHtml {url} { global URL - set head 0 - htmlSwitch $URL($url,buf) \ - title { - set URL($url,title) $body - } -nonest meta { - set scheme {} - if {[info exist parm(scheme)]} { - set scheme $parm(scheme) - unset parm(scheme) - } - if {[info exist parm(name)]} { - if {[info exist parm(content)]} { - set URL($url,meta,$parm(name),$scheme) $parm(content) - unset parm(content) - } - unset parm(name) - } - } a { - if {[info exists parm(href)]} { - lappend URL($url,links) $parm(href) - } - } + set b $URL($url,buf) + set e {<[mM][eE][tT][aA][^>]*>} + catch {unset $URL($url,meta)} + while {[regexp -indices $e $b i]} { + set meta [string range $b [lindex $i 0] [lindex $i 1]] + lappend URL($url,meta) $meta + set b [string range $b [lindex $i 1] end] + } } proc Robot200 {url} { @@ -76,20 +62,20 @@ proc RobotReadHeader {url sock} { # puts "Got $readCount bytes" set URL($url,buf) $URL($url,buf)$buffer - set n [string first \n\n $URL($url,buf)] + set n [string first \r\n\r\n $URL($url,buf)] if {$n > 1} { + puts "string first match n = $n" set code 0 set version {} set headbuf [string range $URL($url,buf) 0 $n] - incr n - incr n + incr n 4 set URL($url,buf) [string range $URL($url,buf) $n end] regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code set lines [split $headbuf \n] foreach line $lines { if {[regexp {^([^:]+):[ ]+(.*)} $line x name value]} { - set URL($url,head,[string tolower $name]) $value + set URL($url,head,[string tolower $name]) [string trim $value] } } set URL($url,state) skip @@ -105,6 +91,10 @@ proc RobotReadHeader {url sock} { text/plain { fileevent $sock readable [list RobotReadContent $url $sock] } + application/pdf { + puts "ok preceeed with this thingy" + fileevent $sock readable [list RobotReadContent $url $sock] + } default { close $sock Robot200 $url @@ -113,7 +103,6 @@ proc RobotReadHeader {url sock} { } } default { - Robot404 $url close $sock RobotRestart } @@ -125,7 +114,7 @@ proc RobotReadHeader {url sock} { proc RobotConnect {url sock} { global URL agent - fconfigure $sock -translation {auto crlf} -blocking 0 + fconfigure $sock -translation {lf crlf} -blocking 0 fileevent $sock readable [list RobotReadHeader $url $sock] puts $sock "GET $URL($url,path) HTTP/1.0" puts $sock "Host: $URL($url,host)" @@ -157,14 +146,7 @@ proc RobotGetUrl {url phost} { return 0 } -if {![llength [info commands htmlSwitch]]} { - set e [info sharedlibextension] - if {[catch {load ./tclrobot$e}]} { - load tclrobot$e - } -} - -set agent "zmbot/0.0" +set agent "dcdot.tcl/0.0" if {![catch {set os [exec uname -s -r]}]} { set agent "$agent ($os)" } @@ -185,8 +167,17 @@ proc RobotGetDCDOT {url} { if {$argc == 1} { set url [lindex $argv 0] RobotGetDCDOT $url - set mask {,meta,[Dd][Cc]\.*} - foreach a [array names URL $url$mask] { - puts "URL($a) = $URL($a)" + set mask {,meta} + if {[info exist URL($url,meta)]} { + foreach m $URL($url,meta) { + puts $m + } + } + foreach v [array names URL $url,head,*] { + puts "$v = $URL($v)" } -} \ No newline at end of file + puts "Buffer length is [string length $URL($url,buf)]" + set f [open out.pdf w] + puts -nonewline $f $URL($url,buf) + close $f +}