added dmoz
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 19 Jun 2002 08:28:55 +0000 (08:28 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 19 Jun 2002 08:28:55 +0000 (08:28 +0000)
test/Makefile.am
test/dmoz/all.sh [new file with mode: 0755]
test/dmoz/con1.pl [new file with mode: 0755]
test/dmoz/con2.pl [new file with mode: 0755]
test/dmoz/fetch.sh [new file with mode: 0755]
test/dmoz/plot.dem [new file with mode: 0755]
test/dmoz/update.sh [new file with mode: 0755]
test/dmoz/zebra-b.cfg [new file with mode: 0644]
test/dmoz/zebra-c.cfg [new file with mode: 0644]

index 2a440d3..facee4d 100644 (file)
@@ -1,2 +1,2 @@
 
-SUBDIRS=gils usmarc api
+SUBDIRS=gils usmarc api dmoz
diff --git a/test/dmoz/all.sh b/test/dmoz/all.sh
new file mode 100755 (executable)
index 0000000..36da4e8
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/sh
+rm zebraidx.log
+./update.sh b
+./update.sh c
+gnuplot plot.dem
diff --git a/test/dmoz/con1.pl b/test/dmoz/con1.pl
new file mode 100755 (executable)
index 0000000..d2591c3
--- /dev/null
@@ -0,0 +1,29 @@
+#!/usr/bin/perl -w
+
+my $state = 'init';
+my $topic = '';
+my $title;
+my $description;
+
+while ($_ = <STDIN>) {
+    if (/<Topic r:id=\"(.*?)\">/) {
+       $topic = $1;
+    }
+    elsif (/<ExternalPage about=\"(.*?)\">/) {
+       $url = $1;
+    }
+    elsif (/<d:Title>(.*?)<\/d:Title>/) {
+       $title = $1;
+    }
+    elsif (/<d:Description>(.*?)<\/d:Description>/) {
+       $description = $1;
+    }
+    elsif (/<\/ExternalPage>/) {
+       print "<meta>\n";
+       print " <title>$title</title>\n";
+       print " <description>$description</description>\n";
+       print " <url>$url</url>\n";
+       print " <topic>$topic</topic>\n";
+       print "</meta>\n";
+    }
+}
diff --git a/test/dmoz/con2.pl b/test/dmoz/con2.pl
new file mode 100755 (executable)
index 0000000..4cecd81
--- /dev/null
@@ -0,0 +1,41 @@
+#!/usr/bin/perl -w
+
+my $state = 'init';
+my $topic = '';
+my $title;
+my $description;
+
+my $no = 0;
+
+while ($_ = <STDIN>) {
+    if (/<Topic r:id=\"(.*?)\">/) {
+       $topic = $1;
+    }
+    elsif (/<ExternalPage about=\"(.*?)\">/) {
+       $url = $1;
+    }
+    elsif (/<d:Title>(.*?)<\/d:Title>/) {
+       $title = $1;
+    }
+    elsif (/<d:Description>(.*?)<\/d:Description>/) {
+       $description = $1;
+    }
+    elsif (/<\/ExternalPage>/) {
+       if (($no % 30000) == 0) {
+           if ($no) {
+                close(XO);
+            }
+           open(XO, ">dmoz." . ($no / 30000) . ".xml");
+       }
+       print XO "<meta>\n";
+       print XO " <title>$title</title>\n";
+       print XO " <description>$description</description>\n";
+       print XO " <url>$url</url>\n";
+       print XO " <topic>$topic</topic>\n";
+       print XO "</meta>\n";
+       $no++;
+    }
+}
+if ($no != 0) {
+    close(XO);
+}
diff --git a/test/dmoz/fetch.sh b/test/dmoz/fetch.sh
new file mode 100755 (executable)
index 0000000..4b32ca4
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/sh
+if test ! -f content.rdf.u8; then
+   wget http://dmoz.org/rdf/content.rdf.u8.gz
+   gunzip content.rdf.u8.gz
+fi
diff --git a/test/dmoz/plot.dem b/test/dmoz/plot.dem
new file mode 100755 (executable)
index 0000000..7be3aba
--- /dev/null
@@ -0,0 +1,12 @@
+set xlabel "runs"
+set ylabel "seconds"
+plot [0:] [0:] 'times-c.log' using 3 title 'ISAMC(user)' with linespoints, 'times-b.log' using 3 title 'ISAMB(user)' with linespoints, 'times-c.log' using 2 title 'ISAMC(total)' with linespoints,'times-b.log' using 2 title 'ISAMB(total)' with linespoints
+set output "times.ps"
+set terminal postscript
+replot
+set output
+set terminal x11
+pause -1 "Hit return to continue"
+
+
+
diff --git a/test/dmoz/update.sh b/test/dmoz/update.sh
new file mode 100755 (executable)
index 0000000..1835eeb
--- /dev/null
@@ -0,0 +1,16 @@
+#!/bin/sh
+t=$1
+test -n "$t" || exit 1
+rm -f *.mf *.LCK *.tmp
+../../index/zebraidx -l zebraidx.log init 
+i=0
+rm -f times-$t.log
+while test -f dmoz.$i.xml; do
+       echo -n "$i " >>times-$1.log
+       /usr/bin/time -f '%e %U %P' -a -o times-$t.log ../../index/zebraidx -l zebraidx.log -c zebra-$t.cfg -f 10 update dmoz.$i.xml
+       ../../index/zebraidx -l zebraidx.log -c zebra-$t.cfg stat
+       i=`expr $i + 1`
+       if test $i = 29; then
+               break
+       fi
+done
diff --git a/test/dmoz/zebra-b.cfg b/test/dmoz/zebra-b.cfg
new file mode 100644 (file)
index 0000000..baa9c1e
--- /dev/null
@@ -0,0 +1,17 @@
+# Simple Zebra configuration file
+# $Id: zebra-b.cfg,v 1.1 2002-06-19 08:28:55 adam Exp $
+#
+# Where the schema files, attribute files, etc are located.
+profilePath: .:../../tab:../../../yaz/tab
+
+# Files that describe the attribute sets supported.
+attset: bib1.att
+attset: gils.att
+attset: explain.att
+
+recordtype: grs.sgml
+
+#storekeys: 1
+#storedata: 1
+#recordId: (bib1,identifier-standard)
+isam: b
diff --git a/test/dmoz/zebra-c.cfg b/test/dmoz/zebra-c.cfg
new file mode 100644 (file)
index 0000000..7229218
--- /dev/null
@@ -0,0 +1,17 @@
+# Simple Zebra configuration file
+# $Id: zebra-c.cfg,v 1.1 2002-06-19 08:28:55 adam Exp $
+#
+# Where the schema files, attribute files, etc are located.
+profilePath: .:../../tab:../../../yaz/tab
+
+# Files that describe the attribute sets supported.
+attset: bib1.att
+attset: gils.att
+attset: explain.att
+
+recordtype: grs.sgml
+
+#storekeys: 1
+#storedata: 1
+#recordId: (bib1,identifier-standard)
+isam: c