From: Adam Dickmeiss Date: Wed, 4 Nov 1998 15:13:32 +0000 (+0000) Subject: Added SOIF-filter. X-Git-Tag: ZEBRA.1.0~146 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=c6ab7eba05f6ee40175f89ccf80998cdf6435c38 Added SOIF-filter. --- diff --git a/CHANGELOG b/CHANGELOG index c724816..90558c8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,5 @@ +Added SOIF-filter. Thanks to Peter Valkenburg. + For the regx-filter "end element -record" may trigger a mark-of-record if outer level is reached. diff --git a/tab/soif.flt b/tab/soif.flt new file mode 100644 index 0000000..e1c3cba --- /dev/null +++ b/tab/soif.flt @@ -0,0 +1,91 @@ +# Crude input-filter for SOIF records -- one record per file. +# Author: Peter Valkenburg / TERENA (valkenburg@terena.nl) +# Version 0.2 (09/09/1998). +# This sort of follows the Nordic Web Index convention of GILS attribute use. + +# We'll use GILS structured records. +BEGIN { begin record gils } + +# URL will be GILS' availability/linkage +/^@[A-Za-z][-.A-Za-z_]* { / BODY /$/ { + begin element availability + data -element linkage $1 + end element + } + +# Type will be GILS' availability/linkageType +/^[tT]ype{[0-9]+}:\t/ BODY /$/ { + begin element availability + data -element linkageType $1 + end element + } + +# Last modification time will be Bib-1 Use Attribute 1012 +/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ { + data -element dateOfLastModification $1 + } + +# The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007 +/^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 } + +# Description will be Bib-1 Use Attribute 62 +/^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element abstract $1 + unread 2 + } + +# Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!) +/^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element author $1 + unread 2 + } + +# Keywords will be GILS' localSubjectIndex/localSubjectTerm +/^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + begin element localSubjectIndex + data -element localSubjectTerm $1 + unread 2 + end element + } + +# File-size will be GILS' supplementalInformation/bytes +/^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ { + begin element supplementalInformation + data -element bytes $1 + unread 2 + end element + } + +# Update-Time will be GILS' supplementalInformation/lastChecked +/^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ { + begin element supplementalInformation + data -element lastChecked $1 + unread 2 + end element + } + +# url-references will be GILS' crossReference/linkage +/^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + begin element crossReference + data -element linkage $1 + unread 2 + end element + } + +# Title will be Bib-1 Use Attribute 4 +/^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element Title $1 + unread 2 + } + +# Body and Partial-Text will be Bib-1 Use Attribute 1010 +/^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element sampleText $1 + unread 2 + } +/^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element sampleText $1 + unread 2 + } + +END { end record }