+set idletime 30000
+set acceptLanguage {}
+set debuglevel 0
+
+# Rules: allow, deny, url
+
+proc checkrule {task type this} {
+ global control
+ global debuglevel
+
+ if {$debuglevel > 3} {
+ puts "CHECKRULE $type $this"
+ }
+ if {[info exist control($task,alrules)]} {
+ foreach l $control($task,alrules) {
+ if {$debuglevel > 3} {
+ puts "consider $l"
+ }
+ # consider type
+ if {[lindex $l 1] != $type} continue
+ # consider mask (! negates)
+ set masks [lindex $l 2]
+ set ok 0
+ foreach mask $masks {
+ if {$debuglevel > 4} {
+ puts "consider single mask $mask"
+ }
+ if {[string index $mask 0] == "!"} {
+ set mask [string range $mask 1 end]
+ if {[string match $mask $this]} continue
+ } else {
+ if {![string match $mask $this]} continue
+ }
+ set ok 1
+ }
+ if {$debuglevel > 4} {
+ puts "ok = $ok"
+ }
+ if {!$ok} continue
+ # OK, we have a match
+ if {[lindex $l 0] == "allow"} {
+ if {$debuglevel > 3} {
+ puts "CHECKRULE MATCH OK"
+ }
+ return 1
+ } else {
+ if {$debuglevel > 3} {
+ puts "CHECKFULE MATCH FAIL"
+ }
+ return 0
+ }
+ }
+ }
+ if {$debuglevel > 3} {
+ puts "CHECKRULE MATCH OK"
+ }
+ return 1
+}
+
+
+proc url {href} {
+ global debuglevel task
+
+ if {[RobotHref $task http://www.indexdata.dk/ href host path]} {
+ if {![RobotFileExist $task visited $host $path]} {
+ set outf [RobotFileOpen $task unvisited $host $path]
+ RobotWriteRecord $outf href 0
+ RobotFileClose $outf
+ }
+ }
+}
+
+proc deny {type stuff} {
+ global control task
+
+ lappend control($task,alrules) [list deny $type $stuff]
+}
+
+proc allow {type stuff} {
+ global control task
+
+ lappend control($task,alrules) [list allow $type $stuff]
+}
+
+proc debug {level} {
+ global debuglevel
+
+ set debuglevel $level
+}
+
+proc task {t} {
+ global tasks task status robotSeq control
+
+ set task $t
+
+ if {[info exists tasks]} {
+ if {[lsearch -exact $tasks $t] >= 0} {
+ return 0
+ }
+ }
+
+ lappend tasks $t
+ set status($t,unvisited) 0
+ set status($t,visited) 0
+ set status($t,bad) 0
+ set status($t,raw) 0
+ set status($t,active) 1
+ set robotSeq($t) 0
+ set control($t,distance) 10
+ return 1
+}
+
+# Little utility that ensures that at least one task is present (main).
+proc chktask {} {
+ global tasks
+ if {![info exist tasks]} {
+ task main
+ }
+}
+
+
+# Parse options
+
+set i 0
+set l [llength $argv]
+
+if {$l < 2} {
+ puts {tclrobot: usage:}
+ puts {tclrobot [-j jobs] [-i idle] [-c count] [-d domain] [-r rules] [url ..]}
+ puts " Example: -c 3 -d '*.dk' http://www.indexdata.dk/"