# Crude input-filter for SOIF records -- one record per file. # Author: Peter Valkenburg / TERENA (valkenburg@terena.nl) # Version 0.2 (09/09/1998). # This sort of follows the Nordic Web Index convention of GILS attribute use. # Modified by Kang-Jin Lee (lee@arco.de) # 07/10/1999 # We'll use GILS structured records. BEGIN { begin record gils } # URL will be GILS' availability/linkage /^@[A-Za-z](-|[.A-Za-z_])* { / BODY /$/ { begin element availability data -element linkage $1 end element } # Type will be GILS' availability/linkageType /^[tT]ype{[0-9]+}:\t/ BODY /$/ { begin element availability data -element linkageType $1 end element } # Last modification time will be Bib-1 Use Attribute 1012 /^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ { data -element dateOfLastModification $1 } # The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007 /^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 } # Description will be Bib-1 Use Attribute 62 /^[dD]escription{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element abstract $1 unread 2 } # Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!) /^[aA]uthor{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element author $1 unread 2 } # Keywords will be GILS' localSubjectIndex/localSubjectTerm /^[kK]eywords{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { begin element localSubjectIndex data -element localSubjectTerm $1 unread 2 end element } # File-size will be GILS' supplementalInformation/bytes /^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ { begin element supplementalInformation data -element bytes $1 unread 2 end element } # Update-Time will be GILS' supplementalInformation/lastChecked /^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ { begin element supplementalInformation data -element lastChecked $1 unread 2 end element } # url-references will be GILS' crossReference/linkage /^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { begin element crossReference data -element linkage $1 unread 2 end element } # Title will be Bib-1 Use Attribute 4 /^[tT]itle{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element Title $1 unread 2 } # Body and Partial-Text will be Bib-1 Use Attribute 1010 # Is Body really commonly used in SOIF? Anyway, Full-Text is used by Harvest. #/^[bB]ody{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { # data -element sampleText $1 # unread 2 # } /^[fF]ull-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element sampleText $1 unread 2 } /^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { data -element sampleText $1 unread 2 } /^(-|[a-zA-Z0-9])+{[0-9]+}:\t/ BODY /^((-|[_A-Za-z0-9])+{[0-9]+}:\t.*|})$/ { unread 2 } END { end record }