From 4d94083b545d3665a3ceca7962ebb6788bc62dd3 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 29 Jun 2001 21:47:31 +0000 Subject: [PATCH] Added option to specify Accept-Language. --- robot.tcl | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/robot.tcl b/robot.tcl index b8db6c4..6323bc3 100755 --- a/robot.tcl +++ b/robot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: robot.tcl,v 1.18 2001/06/07 08:17:00 adam Exp $ +# $Id: robot.tcl,v 1.19 2001/06/29 21:47:31 adam Exp $ # proc RobotFileNext1 {area lead} { # puts "RobotFileNext1 area=$area lead=$lead" @@ -267,6 +267,9 @@ proc RobotHref {url hrefx hostx pathx} { if {[string length $href] > 256} { return 0 } + if {[string first {?} $href] >= 0} { + return 0 + } if {[string first {?} $url] >= 0 && [string first {?} $href] >= 0} { return 0 } @@ -722,13 +725,16 @@ proc RobotSockCancel {url sock} { } proc RobotConnect {url sock} { - global URL agent + global URL agent acceptLanguage fconfigure $sock -translation {lf crlf} -blocking 0 fileevent $sock readable [list RobotReadHeader $url $sock] puts $sock "GET $URL($url,path) HTTP/1.0" puts $sock "Host: $URL($url,host)" puts $sock "User-Agent: $agent" + if {[string length $acceptLanguage]} { + puts $sock "Accept-Language: $acceptLanguage" + } puts $sock "" flush $sock set URL($sock,cancel) [after 30000 [list RobotSockCancel $url $sock]] @@ -814,6 +820,7 @@ set robotsRunning 0 set robotSeq 0 set workdir [pwd] set idleTime 60000 +set acceptLanguage {} set i 0 set l [llength $argv] @@ -863,6 +870,12 @@ while {$i < $l} { set idleTime [lindex $argv [incr i]] } } + -l* { + set acceptLanguage [string range $arg 2 end] + if {![string length $acceptLanguage]} { + set acceptLanguage [lindex $argv [incr i]] + } + } default { set href $arg if {[RobotHref http://www.indexdata.dk/ href host path]} { -- 1.7.10.4