X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;ds=sidebyside;f=dcdot.tcl;h=8e15fea593a473224bac1ef1eebcf84f6d74af26;hb=3201adca0560cf447024e23b0e572c9b5137111c;hp=af0bef76a3ce1ed9862fe72279d3f8ffc82999ba;hpb=975b229918744816b12263fae523eaad4f8e2d87;p=tclrobot.git
diff --git a/dcdot.tcl b/dcdot.tcl
index af0bef7..8e15fea 100755
--- a/dcdot.tcl
+++ b/dcdot.tcl
@@ -1,5 +1,5 @@
#!/usr/bin/tclsh
-# $Id: dcdot.tcl,v 1.2 2000/12/08 08:55:35 adam Exp $
+# $Id: dcdot.tcl,v 1.5 2003/01/13 13:59:07 adam Exp $
#
proc RobotRestart {} {
@@ -10,15 +10,23 @@ proc RobotRestart {} {
proc RobotTextHtml {url} {
global URL
-
+
set b $URL($url,buf)
- set e {<[mM][eE][tT][aA][^>]*>}
+ set e {]*>}
catch {unset $URL($url,meta)}
- while {[regexp -indices $e $b i]} {
+ while {[regexp -nocase -indices $e $b i]} {
set meta [string range $b [lindex $i 0] [lindex $i 1]]
lappend URL($url,meta) $meta
set b [string range $b [lindex $i 1] end]
}
+ set b $URL($url,buf)
+ set e {
[^>]*>}
+ catch {unset $URL($url,meta)}
+ while {[regexp -nocase -indices $e $b i]} {
+ set title [string range $b [lindex $i 0] [lindex $i 1]]
+ lappend URL($url,title) $title
+ set b [string range $b [lindex $i 1] end]
+ }
}
proc Robot200 {url} {
@@ -62,23 +70,24 @@ proc RobotReadHeader {url sock} {
# puts "Got $readCount bytes"
set URL($url,buf) $URL($url,buf)$buffer
- set n [string first \n\n $URL($url,buf)]
+ set n [string first \r\n\r\n $URL($url,buf)]
if {$n > 1} {
+ puts "string first match n = $n"
set code 0
set version {}
set headbuf [string range $URL($url,buf) 0 $n]
- incr n
- incr n
+ incr n 4
set URL($url,buf) [string range $URL($url,buf) $n end]
regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code
set lines [split $headbuf \n]
foreach line $lines {
- if {[regexp {^([^:]+):[ ]+(.*)} $line x name value]} {
- set URL($url,head,[string tolower $name]) $value
+ if {[regexp {^([^:]+):[ ]+([^;]*)} $line x name value]} {
+ set URL($url,head,[string tolower $name]) [string trim $value]
}
}
set URL($url,state) skip
+ puts "code=$code"
switch $code {
200 {
if {![info exists URL($url,head,content-type)]} {
@@ -91,6 +100,10 @@ proc RobotReadHeader {url sock} {
text/plain {
fileevent $sock readable [list RobotReadContent $url $sock]
}
+ application/pdf {
+ puts "ok preceeed with this thingy"
+ fileevent $sock readable [list RobotReadContent $url $sock]
+ }
default {
close $sock
Robot200 $url
@@ -99,7 +112,6 @@ proc RobotReadHeader {url sock} {
}
}
default {
- Robot404 $url
close $sock
RobotRestart
}
@@ -111,7 +123,7 @@ proc RobotReadHeader {url sock} {
proc RobotConnect {url sock} {
global URL agent
- fconfigure $sock -translation {auto crlf} -blocking 0
+ fconfigure $sock -translation {lf crlf} -blocking 0
fileevent $sock readable [list RobotReadHeader $url $sock]
puts $sock "GET $URL($url,path) HTTP/1.0"
puts $sock "Host: $URL($url,host)"
@@ -170,4 +182,16 @@ if {$argc == 1} {
puts $m
}
}
+ if {[info exist URL($url,title)]} {
+ foreach m $URL($url,title) {
+ puts $m
+ }
+ }
+ foreach v [array names URL $url,head,*] {
+ puts "$v = $URL($v)"
+ }
+ puts "Buffer length is [string length $URL($url,buf)]"
+ set f [open out.pdf w]
+ puts -nonewline $f $URL($url,buf)
+ close $f
}