Added SOIF-filter.
[idzebra-moved-to-github.git] / tab / soif.flt
1 # Crude input-filter for SOIF records -- one record per file.
2 # Author: Peter Valkenburg / TERENA (valkenburg@terena.nl)
3 # Version 0.2 (09/09/1998).
4 # This sort of follows the Nordic Web Index convention of GILS attribute use.
5
6 # We'll use GILS structured records.
7 BEGIN                                   { begin record gils }
8
9 # URL will be GILS' availability/linkage
10 /^@[A-Za-z][-.A-Za-z_]* { / BODY /$/    {
11                                           begin element availability
12                                           data -element linkage $1
13                                           end element
14                                         }
15
16 # Type will be GILS' availability/linkageType
17 /^[tT]ype{[0-9]+}:\t/  BODY /$/ {
18                                           begin element availability
19                                           data -element linkageType $1
20                                           end element
21                                         }
22
23 # Last modification time will be Bib-1 Use Attribute 1012
24 /^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/  BODY /$/ {
25                                           data -element dateOfLastModification $1
26                                         }
27
28 # The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007
29 /^[mM][dD]5{[0-9]+}:\t/  BODY /$/       { data -element controlIdentifier $1 }
30
31 # Description will be Bib-1 Use Attribute 62
32 /^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
33                                           data -element abstract $1
34                                           unread 2
35                                         }
36
37 # Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!)
38 /^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
39                                           data -element author $1
40                                           unread 2
41                                         }
42
43 # Keywords will be GILS' localSubjectIndex/localSubjectTerm
44 /^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
45                                           begin element localSubjectIndex
46                                           data -element localSubjectTerm $1
47                                           unread 2
48                                           end element
49                                         }
50
51 # File-size will be GILS' supplementalInformation/bytes
52 /^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ {
53                                           begin element supplementalInformation
54                                           data -element bytes $1
55                                           unread 2
56                                           end element
57                                         }
58
59 # Update-Time will be GILS' supplementalInformation/lastChecked
60 /^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ {
61                                           begin element supplementalInformation
62                                           data -element lastChecked $1
63                                           unread 2
64                                           end element
65                                         }
66
67 # url-references will be GILS' crossReference/linkage
68 /^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
69                                           begin element crossReference
70                                           data -element linkage $1
71                                           unread 2
72                                           end element
73                                         }
74
75 # Title will be Bib-1 Use Attribute 4
76 /^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
77                                           data -element Title $1
78                                           unread 2
79                                         }
80
81 # Body and Partial-Text will be Bib-1 Use Attribute 1010
82 /^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
83                                           data -element sampleText $1
84                                           unread 2
85                                         }
86 /^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
87                                           data -element sampleText $1
88                                           unread 2
89                                         }
90
91 END                                     { end record }