# Crude input-filter for SOIF records -- one record per file. # Author: Peter Valkenburg / TERENA (valkenburg@terena.nl) # Version 0.2 (09/09/1998). # This sort of follows the Nordic Web Index convention of GILS attribute use. # We'll use GILS structured records. BEGIN { begin record gils } # URL will be GILS' availability/linkage /^@[A-Za-z][-.A-Za-z_]* { / BODY /$/ { begin element availability data -element linkage $1 end element } # Type will be GILS' availability/linkageType /^[tT]ype{[0-9]+}:\t/ BODY /$/ { begin element availability data -element linkageType $1 end element } # Last modification time will be Bib-1 Use Attribute 1012 /^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ { data -element dateOfLastModification $1 } # The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007 /^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 } # Description will be Bib-1 Use Attribute 62 /^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { data -element abstract $1 unread 2 } # Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!) /^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { data -element author $1 unread 2 } # Keywords will be GILS' localSubjectIndex/localSubjectTerm /^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { begin element localSubjectIndex data -element localSubjectTerm $1 unread 2 end element } # File-size will be GILS' supplementalInformation/bytes /^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ { begin element supplementalInformation data -element bytes $1 unread 2 end element } # Update-Time will be GILS' supplementalInformation/lastChecked /^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ { begin element supplementalInformation data -element lastChecked $1 unread 2 end element } # url-references will be GILS' crossReference/linkage /^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { begin element crossReference data -element linkage $1 unread 2 end element } # Title will be Bib-1 Use Attribute 4 /^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { data -element Title $1 unread 2 } # Body and Partial-Text will be Bib-1 Use Attribute 1010 /^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { data -element sampleText $1 unread 2 } /^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { data -element sampleText $1 unread 2 } END { end record }