1 # Crude input-filter for SOIF records -- one record per file.
2 # Author: Peter Valkenburg / TERENA (valkenburg@terena.nl)
3 # Version 0.2 (09/09/1998).
4 # This sort of follows the Nordic Web Index convention of GILS attribute use.
5 # Modified by Kang-Jin Lee (lee@arco.de)
8 # We'll use GILS structured records.
9 BEGIN { begin record gils }
11 # URL will be GILS' availability/linkage
12 /^@[A-Za-z](-|[.A-Za-z_])* { / BODY /$/ {
13 begin element availability
14 data -element linkage $1
18 # Type will be GILS' availability/linkageType
19 /^[tT]ype{[0-9]+}:\t/ BODY /$/ {
20 begin element availability
21 data -element linkageType $1
25 # Last modification time will be Bib-1 Use Attribute 1012
26 /^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ {
27 data -element dateOfLastModification $1
30 # The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007
31 /^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 }
33 # Description will be Bib-1 Use Attribute 62
34 /^[dD]escription{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
35 data -element abstract $1
39 # Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!)
40 /^[aA]uthor{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
41 data -element author $1
45 # Keywords will be GILS' localSubjectIndex/localSubjectTerm
46 /^[kK]eywords{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
47 begin element localSubjectIndex
48 data -element localSubjectTerm $1
53 # File-size will be GILS' supplementalInformation/bytes
54 /^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ {
55 begin element supplementalInformation
56 data -element bytes $1
61 # Update-Time will be GILS' supplementalInformation/lastChecked
62 /^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ {
63 begin element supplementalInformation
64 data -element lastChecked $1
69 # url-references will be GILS' crossReference/linkage
70 /^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
71 begin element crossReference
72 data -element linkage $1
77 # Title will be Bib-1 Use Attribute 4
78 /^[tT]itle{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
79 data -element Title $1
83 # Body and Partial-Text will be Bib-1 Use Attribute 1010
84 # Is Body really commonly used in SOIF? Anyway, Full-Text is used by Harvest.
85 #/^[bB]ody{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
86 # data -element sampleText $1
89 /^[fF]ull-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
90 data -element sampleText $1
93 /^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
94 data -element sampleText $1
98 /^(-|[a-zA-Z0-9])+{[0-9]+}:\t/ BODY /^((-|[_A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {