#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.40 2003/06/10 12:46:04 adam Exp $
+# $Id: robot.tcl,v 1.43 2003/06/11 09:40:22 adam Exp $
#
proc RobotFileNext1 {area lead} {
# puts "RobotFileNext1 area=$area lead=$lead"
foreach n $ns {
if {[file isfile $n]} {
set off [string last / $n]
- incr off 2
- return $lead/[string range $n $off end]
+ # skip /
+ incr off
+ set end [string length $n]
+ # skip _.tkl
+ incr end -6
+ return $lead/[string range $n $off $end]
}
}
foreach n $ns {
if {[file isdirectory $n]} {
set off [string last / $n]
- incr off 2
+ # skip /
+ incr off
set sb [RobotFileNext1 $n $lead/[string range $n $off end]]
if {[string length $sb]} {
return $sb
}
proc RobotFileNext {task area} {
- global robotSeq
+ global control
global idletime ns
global status
- # puts "RobotFileNext robotSeq=$robotSeq($task)"
- if {$robotSeq($task) < 0} {
+ # puts "RobotFileNext seq=$control($task,seq)"
+ if {$control($task,seq) < 0} {
return {}
}
- if {$robotSeq($task) == 0} {
+ if {$control($task,seq) == 0} {
if {[catch {set ns($task) [glob $task/$area/*]}]} {
return done
}
# puts "ns=$ns($task)"
set off [string length $task/$area]
incr off
- set n [lindex $ns($task) $robotSeq($task)]
+ set n [lindex $ns($task) $control($task,seq)]
# puts "n=$n"
if {![string length $n]} {
- set robotSeq($task) -1
+ set control($task,seq) -1
flush stdout
set statusfile [open $task/status w]
puts $statusfile "$status($task,unvisited) $status($task,bad) $status($task,visited)"
close $statusfile
return wait
}
- incr robotSeq($task)
- if {[file isfile $n/frobots.txt]} {
- puts "ok returning http://[string range $n $off end]/robots.txt"
+ incr control($task,seq)
+ if {[file isfile $n/robots.txt_.tkl]} {
+ # puts "ok returning http://[string range $n $off end]/robots.txt"
return http://[string range $n $off end]/robots.txt
} elseif {[file isdirectory $n]} {
set sb [RobotFileNext1 $n http://[string range $n $off end]]
if {$debuglevel > 3} {
puts "RobotFileExist begin area=$area host=$host path=$path"
}
- set lpath [split $path /]
- set l [llength $lpath]
- incr l -1
- set t [lindex $lpath $l]
- incr l -1
- set npath $task/$area/$host[join [lrange $lpath 0 $l] /d]/f$t
- if {$debuglevel > 3} {
- puts "RobotFileExist end npath=$npath"
- }
- return [file exists $npath]
+ return [file exists $task/$area/$host${path}_.tkl]
}
proc RobotFileUnlink {task area host path} {
global status
# puts "RobotFileUnlink begin"
# puts "area=$area host=$host path=$path"
- set lpath [split $path /]
- set l [llength $lpath]
- incr l -1
- set t [lindex $lpath $l]
- incr l -1
- set npath $task/$area/$host[join [lrange $lpath 0 $l] /d]/f$t
+ set npath $task/$area/$host${path}_.tkl
# puts "npath=$npath"
set comp [split $npath /]
- if {[catch {exec rm [join $comp /]}]} return
+ if {[catch {exec rm $npath}]} return
set l [llength $comp]
- incr l -1
- incr l -1
+ incr l -2
incr status($task,$area) -1
for {set i $l} {$i > 0} {incr i -1} {
set path [join [lrange $comp 0 $i] /]
global status
global debuglevel
- puts "RobotFileOpen task=$task path=$path"
+ # puts "RobotFileOpen task=$task path=$path"
if {![info exists workdir]} {
return stdout
if {[string length $d] == 0} {
cd /
} elseif {[catch {cd $d}]} {
- puts "mkdir (1) d=$d"
exec mkdir $d
cd ./$d
if {![string compare $area unvisited] && $i == $len && $mode == "w"} {
if {[string compare $path /robots.txt]} {
- set out [open frobots.txt w]
+ set out [open robots.txt_.tkl w]
puts "creating robots.txt in $d"
close $out
incr status($task,unvisited)
# puts "2 path=$path comp=$comp"
for {set i 0} {$i < $len} {incr i} {
- set d "d[lindex $comp $i]"
- if {[string length $d] > 1} {
+ set d [lindex $comp $i]
+ if {[string length $d] > 0} {
if {[catch {cd $d}]} {
- puts "mkdir (2) d=$d"
exec mkdir $d
cd ./$d
}
}
}
set d [lindex $comp $len]
- if {[string length $d]} {
- set out [open f$d $mode]
- } else {
- set out [open f $mode]
- }
+ set out [open ${d}_.tkl $mode]
if {$mode == "w"} {
incr status($task,$area)
}
return $out
}
-
proc RobotStartJob {fname t} {
global control
-
set f [open $fname r]
set xml [read $f]
puts "Reading $fname"
}
proc RobotRR {task} {
- global robotSeq robotsRunning tasks robotsMax status
+ global control robotsRunning tasks robotsMax status
puts "RobotRR -- running=$robotsRunning max=$robotsMax---------------"
incr robotsRunning -1
set statusfile [open $t/status w]
puts $statusfile "$status($t,unvisited) $status($t,bad) $status($t,visited)"
close $statusfile
- set robotSeq($t) 0
+ set control($t,seq) 0
RobotStart $t
}
}
if {[info exists tasks]} {
puts "daemon loop tasks $tasks"
foreach t $tasks {
- set robotSeq($t) 0
+ set control($t,seq) 0
RobotStart $t
}
while {$robotsRunning} {
global control
global debuglevel
+ set default_ret 1
+
if {$debuglevel > 3} {
puts "CHECKRULE $type $this"
}
# consider mask (! negates)
set masks [lindex $l 2]
set ok 0
+ set default_ret 0
foreach mask $masks {
if {$debuglevel > 4} {
puts "consider single mask $mask"
return 0
}
}
- if {$debuglevel > 3} {
- puts "CHECKFULE MATCH FAIL"
- }
- return 0
}
if {$debuglevel > 3} {
- puts "CHECKRULE MATCH OK"
+ puts "CHECKRULE MATCH DEFAULT $default_ret"
}
- return 1
+ return $default_ret
}
}
proc task {t} {
- global tasks task status robotSeq control
+ global tasks task status control
set task $t
set status($t,bad) 0
set status($t,raw) 0
set status($t,active) 1
- set robotSeq($t) 0
+ set control($t,seq) 0
set control($t,distance) 10
return 1
}