Added SOIF-filter.
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 4 Nov 1998 15:13:32 +0000 (15:13 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 4 Nov 1998 15:13:32 +0000 (15:13 +0000)
CHANGELOG
tab/soif.flt [new file with mode: 0644]

index c724816..90558c8 100644 (file)
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,5 @@
+Added SOIF-filter. Thanks to Peter Valkenburg.
+
 For the regx-filter "end element -record" may trigger a mark-of-record
 if outer level is reached.
 
diff --git a/tab/soif.flt b/tab/soif.flt
new file mode 100644 (file)
index 0000000..e1c3cba
--- /dev/null
@@ -0,0 +1,91 @@
+# Crude input-filter for SOIF records -- one record per file.
+# Author: Peter Valkenburg / TERENA (valkenburg@terena.nl)
+# Version 0.2 (09/09/1998).
+# This sort of follows the Nordic Web Index convention of GILS attribute use.
+
+# We'll use GILS structured records.
+BEGIN                                  { begin record gils }
+
+# URL will be GILS' availability/linkage
+/^@[A-Za-z][-.A-Za-z_]* { / BODY /$/   {
+                                         begin element availability
+                                         data -element linkage $1
+                                         end element
+                                       }
+
+# Type will be GILS' availability/linkageType
+/^[tT]ype{[0-9]+}:\t/  BODY /$/ {
+                                         begin element availability
+                                         data -element linkageType $1
+                                         end element
+                                       }
+
+# Last modification time will be Bib-1 Use Attribute 1012
+/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/  BODY /$/        {
+                                         data -element dateOfLastModification $1
+                                       }
+
+# The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007
+/^[mM][dD]5{[0-9]+}:\t/  BODY /$/      { data -element controlIdentifier $1 }
+
+# Description will be Bib-1 Use Attribute 62
+/^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+                                         data -element abstract $1
+                                         unread 2
+                                       }
+
+# Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!)
+/^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+                                         data -element author $1
+                                         unread 2
+                                       }
+
+# Keywords will be GILS' localSubjectIndex/localSubjectTerm
+/^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+                                         begin element localSubjectIndex
+                                         data -element localSubjectTerm $1
+                                         unread 2
+                                         end element
+                                       }
+
+# File-size will be GILS' supplementalInformation/bytes
+/^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ {
+                                         begin element supplementalInformation
+                                         data -element bytes $1
+                                         unread 2
+                                         end element
+                                       }
+
+# Update-Time will be GILS' supplementalInformation/lastChecked
+/^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ {
+                                         begin element supplementalInformation
+                                         data -element lastChecked $1
+                                         unread 2
+                                         end element
+                                       }
+
+# url-references will be GILS' crossReference/linkage
+/^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+                                         begin element crossReference
+                                         data -element linkage $1
+                                         unread 2
+                                         end element
+                                       }
+
+# Title will be Bib-1 Use Attribute 4
+/^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+                                         data -element Title $1
+                                         unread 2
+                                       }
+
+# Body and Partial-Text will be Bib-1 Use Attribute 1010
+/^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+                                         data -element sampleText $1
+                                         unread 2
+                                       }
+/^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+                                         data -element sampleText $1
+                                         unread 2
+                                       }
+
+END                                    { end record }