X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=robot.tcl;h=a90d6e8d91c801e0ed392f4b48797c9542c2be46;hb=1eb62bcb2a33b6e5bf29ec82e2fed329953bbf9a;hp=2d9ea64253d2dcf8b24e82ff07888f570d48f1cf;hpb=e0e42a2b8319ccf7b548ba425d42ba4c82b97ce0;p=tclrobot.git diff --git a/robot.tcl b/robot.tcl index 2d9ea64..a90d6e8 100755 --- a/robot.tcl +++ b/robot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: robot.tcl,v 1.41 2003/06/10 13:16:16 adam Exp $ +# $Id: robot.tcl,v 1.43 2003/06/11 09:40:22 adam Exp $ # proc RobotFileNext1 {area lead} { # puts "RobotFileNext1 area=$area lead=$lead" @@ -9,14 +9,19 @@ proc RobotFileNext1 {area lead} { foreach n $ns { if {[file isfile $n]} { set off [string last / $n] - incr off 2 - return $lead/[string range $n $off end] + # skip / + incr off + set end [string length $n] + # skip _.tkl + incr end -6 + return $lead/[string range $n $off $end] } } foreach n $ns { if {[file isdirectory $n]} { set off [string last / $n] - incr off 2 + # skip / + incr off set sb [RobotFileNext1 $n $lead/[string range $n $off end]] if {[string length $sb]} { return $sb @@ -50,15 +55,15 @@ proc RobotReadRecord {inf fromurlx distancex} { } proc RobotFileNext {task area} { - global robotSeq + global control global idletime ns global status - # puts "RobotFileNext robotSeq=$robotSeq($task)" - if {$robotSeq($task) < 0} { + # puts "RobotFileNext seq=$control($task,seq)" + if {$control($task,seq) < 0} { return {} } - if {$robotSeq($task) == 0} { + if {$control($task,seq) == 0} { if {[catch {set ns($task) [glob $task/$area/*]}]} { return done } @@ -66,19 +71,19 @@ proc RobotFileNext {task area} { # puts "ns=$ns($task)" set off [string length $task/$area] incr off - set n [lindex $ns($task) $robotSeq($task)] + set n [lindex $ns($task) $control($task,seq)] # puts "n=$n" if {![string length $n]} { - set robotSeq($task) -1 + set control($task,seq) -1 flush stdout set statusfile [open $task/status w] puts $statusfile "$status($task,unvisited) $status($task,bad) $status($task,visited)" close $statusfile return wait } - incr robotSeq($task) - if {[file isfile $n/frobots.txt]} { - puts "ok returning http://[string range $n $off end]/robots.txt" + incr control($task,seq) + if {[file isfile $n/robots.txt_.tkl]} { + # puts "ok returning http://[string range $n $off end]/robots.txt" return http://[string range $n $off end]/robots.txt } elseif {[file isdirectory $n]} { set sb [RobotFileNext1 $n http://[string range $n $off end]] @@ -98,35 +103,20 @@ proc RobotFileExist {task area host path} { if {$debuglevel > 3} { puts "RobotFileExist begin area=$area host=$host path=$path" } - set lpath [split $path /] - set l [llength $lpath] - incr l -1 - set t [lindex $lpath $l] - incr l -1 - set npath $task/$area/$host[join [lrange $lpath 0 $l] /d]/f$t - if {$debuglevel > 3} { - puts "RobotFileExist end npath=$npath" - } - return [file exists $npath] + return [file exists $task/$area/$host${path}_.tkl] } proc RobotFileUnlink {task area host path} { global status # puts "RobotFileUnlink begin" # puts "area=$area host=$host path=$path" - set lpath [split $path /] - set l [llength $lpath] - incr l -1 - set t [lindex $lpath $l] - incr l -1 - set npath $task/$area/$host[join [lrange $lpath 0 $l] /d]/f$t + set npath $task/$area/$host${path}_.tkl # puts "npath=$npath" set comp [split $npath /] - if {[catch {exec rm [join $comp /]}]} return + if {[catch {exec rm $npath}]} return set l [llength $comp] - incr l -1 - incr l -1 + incr l -2 incr status($task,$area) -1 for {set i $l} {$i > 0} {incr i -1} { set path [join [lrange $comp 0 $i] /] @@ -148,7 +138,7 @@ proc RobotFileOpen {task area host path {mode w}} { global status global debuglevel - puts "RobotFileOpen task=$task path=$path" + # puts "RobotFileOpen task=$task path=$path" if {![info exists workdir]} { return stdout @@ -174,12 +164,11 @@ proc RobotFileOpen {task area host path {mode w}} { if {[string length $d] == 0} { cd / } elseif {[catch {cd $d}]} { - puts "mkdir (1) d=$d" exec mkdir $d cd ./$d if {![string compare $area unvisited] && $i == $len && $mode == "w"} { if {[string compare $path /robots.txt]} { - set out [open frobots.txt w] + set out [open robots.txt_.tkl w] puts "creating robots.txt in $d" close $out incr status($task,unvisited) @@ -195,21 +184,16 @@ proc RobotFileOpen {task area host path {mode w}} { # puts "2 path=$path comp=$comp" for {set i 0} {$i < $len} {incr i} { - set d "d[lindex $comp $i]" - if {[string length $d] > 1} { + set d [lindex $comp $i] + if {[string length $d] > 0} { if {[catch {cd $d}]} { - puts "mkdir (2) d=$d" exec mkdir $d cd ./$d } } } set d [lindex $comp $len] - if {[string length $d]} { - set out [open f$d $mode] - } else { - set out [open f $mode] - } + set out [open ${d}_.tkl $mode] if {$mode == "w"} { incr status($task,$area) } @@ -217,11 +201,9 @@ proc RobotFileOpen {task area host path {mode w}} { return $out } - proc RobotStartJob {fname t} { global control - set f [open $fname r] set xml [read $f] puts "Reading $fname" @@ -308,7 +290,7 @@ proc RobotScanDir {} { } proc RobotRR {task} { - global robotSeq robotsRunning tasks robotsMax status + global control robotsRunning tasks robotsMax status puts "RobotRR -- running=$robotsRunning max=$robotsMax---------------" incr robotsRunning -1 @@ -330,7 +312,7 @@ proc RobotRR {task} { set statusfile [open $t/status w] puts $statusfile "$status($t,unvisited) $status($t,bad) $status($t,visited)" close $statusfile - set robotSeq($t) 0 + set control($t,seq) 0 RobotStart $t } } @@ -353,7 +335,7 @@ proc RobotDaemonLoop {} { if {[info exists tasks]} { puts "daemon loop tasks $tasks" foreach t $tasks { - set robotSeq($t) 0 + set control($t,seq) 0 RobotStart $t } while {$robotsRunning} { @@ -1150,7 +1132,7 @@ proc debug {level} { } proc task {t} { - global tasks task status robotSeq control + global tasks task status control set task $t @@ -1166,7 +1148,7 @@ proc task {t} { set status($t,bad) 0 set status($t,raw) 0 set status($t,active) 1 - set robotSeq($t) 0 + set control($t,seq) 0 set control($t,distance) 10 return 1 }