Fixed SOIF filter
authorAdam Dickmeiss <adam@indexdata.dk>
Tue, 17 Jun 2003 22:23:52 +0000 (22:23 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Tue, 17 Jun 2003 22:23:52 +0000 (22:23 +0000)
tab/soif.flt

index da9c0eb..c79e46d 100644 (file)
@@ -9,41 +9,41 @@
 BEGIN                                  { begin record gils }
 
 # URL will be GILS' availability/linkage
-/^@[A-Za-z][-.A-Za-z_]* { / BODY /$/   {
+/^@[A-Za-z](-|[.A-Za-z_])* { / BODY /$/        {
                                          begin element availability
                                          data -element linkage $1
                                          end element
                                        }
 
 # Type will be GILS' availability/linkageType
-/^[tT]ype{[0-9]+}:\t/ BODY /$/         {
-                                         begin element availability
-                                         data -element linkageType $1
-                                         end element
-                                       }
+/^[tT]ype{[0-9]+}:\t/ BODY /$/          {
+                                          begin element availability
+                                          data -element linkageType $1
+                                          end element
+                                        }
 
 # Last modification time will be Bib-1 Use Attribute 1012
-/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ {
-                                         data -element dateOfLastModification $1
-                                       }
+/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/  {
+                                          data -element dateOfLastModification $1
+                                        }
 
 # The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007
 /^[mM][dD]5{[0-9]+}:\t/ BODY /$/       { data -element controlIdentifier $1 }
 
 # Description will be Bib-1 Use Attribute 62
-/^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+/^[dD]escription{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
                                          data -element abstract $1
                                          unread 2
                                        }
 
 # Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!)
-/^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/      {
+/^[aA]uthor{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/   {
                                          data -element author $1
                                          unread 2
                                        }
 
 # Keywords will be GILS' localSubjectIndex/localSubjectTerm
-/^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/    {
+/^[kK]eywords{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
                                          begin element localSubjectIndex
                                          data -element localSubjectTerm $1
                                          unread 2
@@ -67,7 +67,7 @@ BEGIN                                 { begin record gils }
                                        }
 
 # url-references will be GILS' crossReference/linkage
-/^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+/^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
                                          begin element crossReference
                                          data -element linkage $1
                                          unread 2
@@ -75,24 +75,28 @@ BEGIN                                       { begin record gils }
                                        }
 
 # Title will be Bib-1 Use Attribute 4
-/^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/       {
+/^[tT]itle{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/    {
                                          data -element Title $1
                                          unread 2
                                        }
 
 # Body and Partial-Text will be Bib-1 Use Attribute 1010
 # Is Body really commonly used in SOIF? Anyway, Full-Text is used by Harvest.
-#/^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/       {
+#/^[bB]ody{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/    {
 #                                        data -element sampleText $1
 #                                        unread 2
 #                                      }
-/^[fF]ull-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+/^[fF]ull-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
                                          data -element sampleText $1
                                          unread 2
                                        }
-/^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+/^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
                                          data -element sampleText $1
                                          unread 2
                                        }
 
+/^(-|[a-zA-Z0-9])+{[0-9]+}:\t/  BODY /^((-|[_A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
+                                         unread 2
+                                         }
+
 END                                    { end record }