#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.21 2001/10/26 13:26:11 adam Exp $
+# $Id: robot.tcl,v 1.25 2001/11/07 11:50:07 adam Exp $
#
proc RobotFileNext1 {area lead} {
# puts "RobotFileNext1 area=$area lead=$lead"
}
puts $out {></meta>}
} body {
- regsub -all -nocase {<script([^<]|(<!.*>))*</script>} $body {} abody
- regsub -all {<[^\>]+>} $abody {} nbody
+ regsub -all {<!--[^-]*->} $body { } abody
+ regsub -all -nocase {<script[^<]*</script>} $abody {} bbody
+ regsub -all {<[^\>]+>} $bbody {} nbody
puts $out "<documentcontent>"
puts $out $nbody
puts $out "</documentcontent>"
}
}
-proc Robot200 {url} {
+proc RobotWriteMetadata {url out} {
global URL domains
-
- set out [RobotFileOpen raw $URL($url,hostport) $URL($url,path)]
- puts -nonewline $out $URL($url,buf)
- RobotFileClose $out
-
- if {![checkrule mime $URL($url,head,content-type)]} {
- RobotError $url mimedeny
- return
- }
- set out [RobotFileOpen visited $URL($url,hostport) $URL($url,path)]
puts $out "<zmbot>"
set distance 1000
}
}
puts $out "</zmbot>"
+}
+
+proc Robot200 {url} {
+ global URL domains
+
+ set out [RobotFileOpen raw $URL($url,hostport) $URL($url,path)]
+ puts -nonewline $out $URL($url,buf)
RobotFileClose $out
- # puts "Parsing done"
+
+ if {![checkrule mime $URL($url,head,content-type)]} {
+ RobotError $url mimedeny
+ return
+ }
+ set out [RobotFileOpen visited $URL($url,hostport) $URL($url,path)]
+ RobotWriteMetadata $url $out
+ RobotFileClose $out
+
RobotFileUnlink unvisited $URL($url,hostport) $URL($url,path)
}
}
# consider type
if {[lindex $l 1] != $type} continue
- # consider mask
- if {![string match [lindex $l 2] $this]} continue
+ # consider mask (! negates)
+ set masks [lindex $l 2]
+ set ok 0
+ foreach mask $masks {
+ if {$debuglevel > 4} {
+ puts "consider single mask $mask"
+ }
+ if {[string index $mask 0] == "!"} {
+ set mask [string range $mask 1 end]
+ if {[string match $mask $this]} continue
+ } else {
+ if {![string match $mask $this]} continue
+ }
+ set ok 1
+ }
+ if {$debuglevel > 4} {
+ puts "ok = $ok"
+ }
+ if {!$ok} continue
# OK, we have a match
if {[lindex $l 0] == "allow"} {
if {$debuglevel > 3} {
- puts "CHECKRULE MATH OK"
+ puts "CHECKRULE MATCH OK"
}
return 1
} else {
}
}
if {$debuglevel > 3} {
- puts "CHECKRULE MATH OK"
+ puts "CHECKRULE MATCH OK"
}
return 1
}