X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=tab%2Fsoif.flt;h=c79e46d7d64b7fcb4e70cdda52f85a0931bc8654;hp=e1c3cba8a60c0e9350c899bac44104d9faed0ceb;hb=11d386bdb7b6b4a9bba10da749a5770b509c1fbd;hpb=c6ab7eba05f6ee40175f89ccf80998cdf6435c38 diff --git a/tab/soif.flt b/tab/soif.flt index e1c3cba..c79e46d 100644 --- a/tab/soif.flt +++ b/tab/soif.flt @@ -2,46 +2,48 @@ # Author: Peter Valkenburg / TERENA (valkenburg@terena.nl) # Version 0.2 (09/09/1998). # This sort of follows the Nordic Web Index convention of GILS attribute use. +# Modified by Kang-Jin Lee (lee@arco.de) +# 07/10/1999 # We'll use GILS structured records. BEGIN { begin record gils } # URL will be GILS' availability/linkage -/^@[A-Za-z][-.A-Za-z_]* { / BODY /$/ { +/^@[A-Za-z](-|[.A-Za-z_])* { / BODY /$/ { begin element availability data -element linkage $1 end element } # Type will be GILS' availability/linkageType -/^[tT]ype{[0-9]+}:\t/ BODY /$/ { - begin element availability - data -element linkageType $1 - end element - } +/^[tT]ype{[0-9]+}:\t/ BODY /$/ { + begin element availability + data -element linkageType $1 + end element + } # Last modification time will be Bib-1 Use Attribute 1012 -/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ { - data -element dateOfLastModification $1 - } +/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ { + data -element dateOfLastModification $1 + } # The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007 -/^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 } +/^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 } # Description will be Bib-1 Use Attribute 62 -/^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[dD]escription{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element abstract $1 unread 2 } # Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!) -/^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[aA]uthor{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element author $1 unread 2 } # Keywords will be GILS' localSubjectIndex/localSubjectTerm -/^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[kK]eywords{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { begin element localSubjectIndex data -element localSubjectTerm $1 unread 2 @@ -49,7 +51,7 @@ BEGIN { begin record gils } } # File-size will be GILS' supplementalInformation/bytes -/^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ { +/^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ { begin element supplementalInformation data -element bytes $1 unread 2 @@ -57,7 +59,7 @@ BEGIN { begin record gils } } # Update-Time will be GILS' supplementalInformation/lastChecked -/^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ { +/^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ { begin element supplementalInformation data -element lastChecked $1 unread 2 @@ -65,7 +67,7 @@ BEGIN { begin record gils } } # url-references will be GILS' crossReference/linkage -/^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { begin element crossReference data -element linkage $1 unread 2 @@ -73,19 +75,28 @@ BEGIN { begin record gils } } # Title will be Bib-1 Use Attribute 4 -/^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[tT]itle{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element Title $1 unread 2 } # Body and Partial-Text will be Bib-1 Use Attribute 1010 -/^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +# Is Body really commonly used in SOIF? Anyway, Full-Text is used by Harvest. +#/^[bB]ody{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { +# data -element sampleText $1 +# unread 2 +# } +/^[fF]ull-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element sampleText $1 unread 2 } -/^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element sampleText $1 unread 2 } +/^(-|[a-zA-Z0-9])+{[0-9]+}:\t/ BODY /^((-|[_A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { + unread 2 + } + END { end record }