From c6ab7eba05f6ee40175f89ccf80998cdf6435c38 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 4 Nov 1998 15:13:32 +0000 Subject: [PATCH] Added SOIF-filter. --- CHANGELOG | 2 ++ tab/soif.flt | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 tab/soif.flt diff --git a/CHANGELOG b/CHANGELOG index c724816..90558c8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,5 @@ +Added SOIF-filter. Thanks to Peter Valkenburg. + For the regx-filter "end element -record" may trigger a mark-of-record if outer level is reached. diff --git a/tab/soif.flt b/tab/soif.flt new file mode 100644 index 0000000..e1c3cba --- /dev/null +++ b/tab/soif.flt @@ -0,0 +1,91 @@ +# Crude input-filter for SOIF records -- one record per file. +# Author: Peter Valkenburg / TERENA (valkenburg@terena.nl) +# Version 0.2 (09/09/1998). +# This sort of follows the Nordic Web Index convention of GILS attribute use. + +# We'll use GILS structured records. +BEGIN { begin record gils } + +# URL will be GILS' availability/linkage +/^@[A-Za-z][-.A-Za-z_]* { / BODY /$/ { + begin element availability + data -element linkage $1 + end element + } + +# Type will be GILS' availability/linkageType +/^[tT]ype{[0-9]+}:\t/ BODY /$/ { + begin element availability + data -element linkageType $1 + end element + } + +# Last modification time will be Bib-1 Use Attribute 1012 +/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ { + data -element dateOfLastModification $1 + } + +# The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007 +/^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 } + +# Description will be Bib-1 Use Attribute 62 +/^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element abstract $1 + unread 2 + } + +# Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!) +/^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element author $1 + unread 2 + } + +# Keywords will be GILS' localSubjectIndex/localSubjectTerm +/^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + begin element localSubjectIndex + data -element localSubjectTerm $1 + unread 2 + end element + } + +# File-size will be GILS' supplementalInformation/bytes +/^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ { + begin element supplementalInformation + data -element bytes $1 + unread 2 + end element + } + +# Update-Time will be GILS' supplementalInformation/lastChecked +/^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ { + begin element supplementalInformation + data -element lastChecked $1 + unread 2 + end element + } + +# url-references will be GILS' crossReference/linkage +/^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + begin element crossReference + data -element linkage $1 + unread 2 + end element + } + +# Title will be Bib-1 Use Attribute 4 +/^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element Title $1 + unread 2 + } + +# Body and Partial-Text will be Bib-1 Use Attribute 1010 +/^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element sampleText $1 + unread 2 + } +/^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { + data -element sampleText $1 + unread 2 + } + +END { end record } -- 1.7.10.4