2 # $Id: dcdot.tcl,v 1.1 2000/12/07 20:16:11 adam Exp $
11 proc RobotTextHtml {url} {
15 htmlSwitch $URL($url,buf) \
17 set URL($url,title) $body
20 if {[info exist parm(scheme)]} {
21 set scheme $parm(scheme)
24 if {[info exist parm(name)]} {
25 if {[info exist parm(content)]} {
26 set URL($url,meta,$parm(name),$scheme) $parm(content)
32 if {[info exists parm(href)]} {
33 lappend URL($url,links) $parm(href)
42 switch $URL($url,head,content-type) {
50 proc RobotReadContent {url sock} {
53 set buffer [read $sock 16384]
54 set readCount [string length $buffer]
56 if {$readCount <= 0} {
61 # puts "Got $readCount bytes"
62 set URL($url,buf) $URL($url,buf)$buffer
66 proc RobotReadHeader {url sock} {
69 set buffer [read $sock 2148]
70 set readCount [string length $buffer]
72 if {$readCount <= 0} {
76 # puts "Got $readCount bytes"
77 set URL($url,buf) $URL($url,buf)$buffer
79 set n [string first \n\n $URL($url,buf)]
83 set headbuf [string range $URL($url,buf) 0 $n]
86 set URL($url,buf) [string range $URL($url,buf) $n end]
88 regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code
89 set lines [split $headbuf \n]
91 if {[regexp {^([^:]+):[ ]+(.*)} $line x name value]} {
92 set URL($url,head,[string tolower $name]) $value
95 set URL($url,state) skip
98 if {![info exists URL($url,head,content-type)]} {
99 set URL($url,head,content-type) {}
101 switch $URL($url,head,content-type) {
103 fileevent $sock readable [list RobotReadContent $url $sock]
106 fileevent $sock readable [list RobotReadContent $url $sock]
125 proc RobotConnect {url sock} {
128 fconfigure $sock -translation {auto crlf} -blocking 0
129 fileevent $sock readable [list RobotReadHeader $url $sock]
130 puts $sock "GET $URL($url,path) HTTP/1.0"
131 puts $sock "Host: $URL($url,host)"
132 puts $sock "User-Agent: $agent"
137 proc RobotGetUrl {url phost} {
139 if {![regexp {([^:]+)://([^/]+)([^ ]*)} $url x method hostport path]} {
142 if {![regexp {([^:]+):([0-9]+)} $hostport x host port]} {
146 set URL($url,method) $method
147 set URL($url,host) $host
148 set URL($url,port) $port
149 set URL($url,path) $path
150 set URL($url,state) head
152 if [catch {set sock [socket -async $host $port]}] {
155 RobotConnect $url $sock
160 if {![llength [info commands htmlSwitch]]} {
161 set e [info sharedlibextension]
162 if {[catch {load ./tclrobot$e}]} {
167 set agent "zmbot/0.0"
168 if {![catch {set os [exec uname -s -r]}]} {
169 set agent "$agent ($os)"
172 proc RobotGetDCDOT {url} {
173 global robotMoreWork 1
176 if [RobotGetUrl $url {}] {
180 while {$robotMoreWork} {
186 set url [lindex $argv 0]
188 set mask {,meta,[Dd][Cc]\.*}
189 foreach a [array names URL $url$mask] {
190 puts "URL($a) = $URL($a)"