X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=tab%2Fsoif.flt;fp=tab%2Fsoif.flt;h=e1c3cba8a60c0e9350c899bac44104d9faed0ceb;hb=c6ab7eba05f6ee40175f89ccf80998cdf6435c38;hp=0000000000000000000000000000000000000000;hpb=e6cd523a792fca5557b3d9647f7b33986a4c32de;p=idzebra-moved-to-github.git diff --git a/tab/soif.flt b/tab/soif.flt new file mode 100644 index 0000000..e1c3cba --- /dev/null +++ b/tab/soif.flt @@ -0,0 +1,91 @@ +# Crude input-filter for SOIF records -- one record per file. +# Author: Peter Valkenburg / TERENA (valkenburg@terena.nl) +# Version 0.2 (09/09/1998). +# This sort of follows the Nordic Web Index convention of GILS attribute use. + +# We'll use GILS structured records. +BEGIN { begin record gils } + +# URL will be GILS' availability/linkage +/^@[A-Za-z][-.A-Za-z_]* { / BODY /$/ { + begin element availability + data -element linkage $1 + end element + } + +# Type will be GILS' availability/linkageType +/^[tT]ype{[0-9]+}:\t/ BODY /$/ { + begin element availability + data -element linkageType $1 + end element + } + +# Last modification time will be Bib-1 Use Attribute 1012 +/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ { + data -element dateOfLastModification $1 + } + +# The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007 +/^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 } + +# Description will be Bib-1 Use Attribute 62 +/^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element abstract $1 + unread 2 + } + +# Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!) +/^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element author $1 + unread 2 + } + +# Keywords will be GILS' localSubjectIndex/localSubjectTerm +/^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + begin element localSubjectIndex + data -element localSubjectTerm $1 + unread 2 + end element + } + +# File-size will be GILS' supplementalInformation/bytes +/^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ { + begin element supplementalInformation + data -element bytes $1 + unread 2 + end element + } + +# Update-Time will be GILS' supplementalInformation/lastChecked +/^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ { + begin element supplementalInformation + data -element lastChecked $1 + unread 2 + end element + } + +# url-references will be GILS' crossReference/linkage +/^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + begin element crossReference + data -element linkage $1 + unread 2 + end element + } + +# Title will be Bib-1 Use Attribute 4 +/^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element Title $1 + unread 2 + } + +# Body and Partial-Text will be Bib-1 Use Attribute 1010 +/^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element sampleText $1 + unread 2 + } +/^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element sampleText $1 + unread 2 + } + +END { end record }