+ set buffer [read $sock 16384]
+ set readCount [string length $buffer]
+
+ if {$readCount <= 0} {
+ Robot200 $task $url
+ RobotRestart $task $url $sock
+ } elseif {!$binary && [string first \0 $buffer] >= 0} {
+ Robot200 $task $url
+ RobotRestart $task $url $sock
+ } else {
+ # puts "Got $readCount bytes"
+ set URL($task,$url,buf) $URL($task,$url,buf)$buffer
+ }
+}
+
+proc RobotReadHeader {task url sock} {
+ global URL debuglevel
+
+ if {$debuglevel > 1} {
+ puts "HTTP head $url"
+ }
+ if {[catch {set buffer [read $sock 2148]}]} {
+ RobotError $task $url 404
+ RobotRestart $task $url $sock
+ return
+ }
+ set readCount [string length $buffer]
+
+ if {$readCount <= 0} {
+ RobotError $task $url 404
+ RobotRestart $task $url $sock
+ } else {
+ # puts "Got $readCount bytes"
+ set URL($task,$url,buf) $URL($task,$url,buf)$buffer
+
+ set n [string first \r\n\r\n $URL($task,$url,buf)]
+ if {$n > 1} {
+ set code 0
+ set version {}
+ set headbuf [string range $URL($task,$url,buf) 0 $n]
+ incr n 4
+ set URL($task,$url,charset) ISO-8859-1
+ set URL($task,$url,buf) [string range $URL($task,$url,buf) $n end]
+
+ regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code
+ set lines [split $headbuf \n]
+ foreach line $lines {
+ if {[regexp {^([^:]+):[ ]+([^;]*)} $line x name value]} {
+ set URL($task,$url,head,[string tolower $name]) [string trim $value]
+ }
+ regexp {^Content-Type:.*charset=([A-Za-z0-9_-]*)} $line x URL($task,$url,charset)
+ }
+ puts "HTTP CODE $code"
+ set URL($task,$url,state) skip
+ switch $code {
+ 301 {
+ RobotRedirect $task $url $URL($task,$url,head,location) 301
+ RobotRestart $task $url $sock
+ }
+ 302 {
+ RobotRedirect $task $url $URL($task,$url,head,location) 302
+ RobotRestart $task $url $sock
+ }
+ 200 {
+ if {![info exists URL($task,$url,head,content-type)]} {
+ set URL($task,$url,head,content-type) {}
+ }
+ set binary 1
+ switch -glob -- $URL($task,$url,head,content-type) {
+ text/* {
+ set binary 0
+ }
+ }
+ if {![regexp {/robots.txt$} $url]} {
+ if {![checkrule $task mime $URL($task,$url,head,content-type)]} {
+ RobotError $task $url mimedeny
+ RobotRestart $task $url $sock
+ return
+ }
+ }
+ fileevent $sock readable [list RobotReadContent $task $url $sock $binary]
+ }
+ default {
+ RobotError $task $url $code
+ RobotRestart $task $url $sock
+ }
+ }
+ }
+ }
+}
+
+proc RobotSockCancel {task url sock} {
+
+ puts "RobotSockCancel sock=$sock url=$url"
+ RobotError $task $url 401
+ RobotRestart $task $url $sock
+}
+
+proc RobotConnect {task url sock} {
+ global URL agent acceptLanguage
+
+ fconfigure $sock -translation {lf crlf} -blocking 0
+ fileevent $sock readable [list RobotReadHeader $task $url $sock]
+ puts $sock "GET $URL($task,$url,path) HTTP/1.0"
+ puts $sock "Host: $URL($task,$url,host)"
+ puts $sock "User-Agent: $agent"
+ if {[string length $acceptLanguage]} {
+ puts $sock "Accept-Language: $acceptLanguage"
+ }