From: Adam Dickmeiss Date: Tue, 17 Jun 2003 22:23:52 +0000 (+0000) Subject: Fixed SOIF filter X-Git-Tag: ZEBRA.1.3.12~40 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=cafda7991cae5042961ec520e87617efc9829194 Fixed SOIF filter --- diff --git a/tab/soif.flt b/tab/soif.flt index da9c0eb..c79e46d 100644 --- a/tab/soif.flt +++ b/tab/soif.flt @@ -9,41 +9,41 @@ BEGIN { begin record gils } # URL will be GILS' availability/linkage -/^@[A-Za-z][-.A-Za-z_]* { / BODY /$/ { +/^@[A-Za-z](-|[.A-Za-z_])* { / BODY /$/ { begin element availability data -element linkage $1 end element } # Type will be GILS' availability/linkageType -/^[tT]ype{[0-9]+}:\t/ BODY /$/ { - begin element availability - data -element linkageType $1 - end element - } +/^[tT]ype{[0-9]+}:\t/ BODY /$/ { + begin element availability + data -element linkageType $1 + end element + } # Last modification time will be Bib-1 Use Attribute 1012 -/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ { - data -element dateOfLastModification $1 - } +/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ { + data -element dateOfLastModification $1 + } # The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007 /^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 } # Description will be Bib-1 Use Attribute 62 -/^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[dD]escription{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element abstract $1 unread 2 } # Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!) -/^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[aA]uthor{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element author $1 unread 2 } # Keywords will be GILS' localSubjectIndex/localSubjectTerm -/^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[kK]eywords{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { begin element localSubjectIndex data -element localSubjectTerm $1 unread 2 @@ -67,7 +67,7 @@ BEGIN { begin record gils } } # url-references will be GILS' crossReference/linkage -/^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { begin element crossReference data -element linkage $1 unread 2 @@ -75,24 +75,28 @@ BEGIN { begin record gils } } # Title will be Bib-1 Use Attribute 4 -/^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[tT]itle{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element Title $1 unread 2 } # Body and Partial-Text will be Bib-1 Use Attribute 1010 # Is Body really commonly used in SOIF? Anyway, Full-Text is used by Harvest. -#/^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +#/^[bB]ody{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { # data -element sampleText $1 # unread 2 # } -/^[fF]ull-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[fF]ull-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element sampleText $1 unread 2 } -/^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element sampleText $1 unread 2 } +/^(-|[a-zA-Z0-9])+{[0-9]+}:\t/ BODY /^((-|[_A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { + unread 2 + } + END { end record }