added lot of examples
[idzebra-moved-to-github.git] / tab / soif.flt
1 # Crude input-filter for SOIF records -- one record per file.
2 # Author: Peter Valkenburg / TERENA (valkenburg@terena.nl)
3 # Version 0.2 (09/09/1998).
4 # This sort of follows the Nordic Web Index convention of GILS attribute use.
5 # Modified by Kang-Jin Lee (lee@arco.de)
6 # 07/10/1999
7
8 # We'll use GILS structured records.
9 BEGIN                                   { begin record gils }
10
11 # URL will be GILS' availability/linkage
12 /^@[A-Za-z](-|[.A-Za-z_])* { / BODY /$/ {
13                                           begin element availability
14                                           data -element linkage $1
15                                           end element
16                                         }
17
18 # Type will be GILS' availability/linkageType
19 /^[tT]ype{[0-9]+}:\t/ BODY /$/          {
20                                           begin element availability
21                                           data -element linkageType $1
22                                           end element
23                                         }
24
25 # Last modification time will be Bib-1 Use Attribute 1012
26 /^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/  {
27                                           data -element dateOfLastModification $1
28                                         }
29
30 # The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007
31 /^[mM][dD]5{[0-9]+}:\t/ BODY /$/        { data -element controlIdentifier $1 }
32
33 # Description will be Bib-1 Use Attribute 62
34 /^[dD]escription{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
35                                           data -element abstract $1
36                                           unread 2
37                                         }
38
39 # Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!)
40 /^[aA]uthor{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/    {
41                                           data -element author $1
42                                           unread 2
43                                         }
44
45 # Keywords will be GILS' localSubjectIndex/localSubjectTerm
46 /^[kK]eywords{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/  {
47                                           begin element localSubjectIndex
48                                           data -element localSubjectTerm $1
49                                           unread 2
50                                           end element
51                                         }
52
53 # File-size will be GILS' supplementalInformation/bytes
54 /^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/  {
55                                           begin element supplementalInformation
56                                           data -element bytes $1
57                                           unread 2
58                                           end element
59                                         }
60
61 # Update-Time will be GILS' supplementalInformation/lastChecked
62 /^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/        {
63                                           begin element supplementalInformation
64                                           data -element lastChecked $1
65                                           unread 2
66                                           end element
67                                         }
68
69 # url-references will be GILS' crossReference/linkage
70 /^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
71                                           begin element crossReference
72                                           data -element linkage $1
73                                           unread 2
74                                           end element
75                                         }
76
77 # Title will be Bib-1 Use Attribute 4
78 /^[tT]itle{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/     {
79                                           data -element Title $1
80                                           unread 2
81                                         }
82
83 # Body and Partial-Text will be Bib-1 Use Attribute 1010
84 # Is Body really commonly used in SOIF? Anyway, Full-Text is used by Harvest.
85 #/^[bB]ody{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/     {
86 #                                         data -element sampleText $1
87 #                                         unread 2
88 #                                       }
89 /^[fF]ull-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
90                                           data -element sampleText $1
91                                           unread 2
92                                         }
93 /^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
94                                           data -element sampleText $1
95                                           unread 2
96                                         }
97
98 /^(-|[a-zA-Z0-9])+{[0-9]+}:\t/  BODY /^((-|[_A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
99                                           unread 2
100                                          }
101
102 END                                     { end record }