From 64d3a6a403795bb44f89ffad04463fd7a8863184 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 11 Jun 2003 10:11:39 +0000 Subject: [PATCH] XML headers with character encoding as specified by HTTP server --- robot.tcl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/robot.tcl b/robot.tcl index a90d6e8..3ab1d81 100755 --- a/robot.tcl +++ b/robot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: robot.tcl,v 1.43 2003/06/11 09:40:22 adam Exp $ +# $Id: robot.tcl,v 1.44 2003/06/11 10:11:39 adam Exp $ # proc RobotFileNext1 {area lead} { # puts "RobotFileNext1 area=$area lead=$lead" @@ -32,6 +32,7 @@ proc RobotFileNext1 {area lead} { } proc RobotWriteRecord {outf fromurl distance} { + puts $outf {} puts $outf "" puts $outf "" puts $outf $distance @@ -47,6 +48,7 @@ proc RobotReadRecord {inf fromurlx distancex} { upvar $distancex distance gets $inf gets $inf + gets $inf set distance [string trim [gets $inf]] # puts "got distance = $distance" gets $inf @@ -801,6 +803,8 @@ proc RobotTextPlain {task url out} { proc RobotWriteMetadata {task url out} { global URL + set charset $URL($task,$url,charset) + puts $out "" puts $out "" set distance 1000 @@ -886,6 +890,7 @@ proc RobotReadHeader {task url sock} { set version {} set headbuf [string range $URL($task,$url,buf) 0 $n] incr n 4 + set URL($task,$url,charset) ISO-8859-1 set URL($task,$url,buf) [string range $URL($task,$url,buf) $n end] regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code @@ -894,6 +899,7 @@ proc RobotReadHeader {task url sock} { if {[regexp {^([^:]+):[ ]+([^;]*)} $line x name value]} { set URL($task,$url,head,[string tolower $name]) [string trim $value] } + regexp {^Content-Type:.*charset=([A-Za-z0-9_-]*)} $line x URL($task,$url,charset) } puts "HTTP CODE $code" set URL($task,$url,state) skip -- 1.7.10.4