projects
/
idzebra-moved-to-github.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
f9ce2a8
)
New SOIF filter
author
Adam Dickmeiss
<adam@indexdata.dk>
Sat, 5 Apr 2003 12:32:43 +0000
(12:32 +0000)
committer
Adam Dickmeiss
<adam@indexdata.dk>
Sat, 5 Apr 2003 12:32:43 +0000
(12:32 +0000)
CHANGELOG
patch
|
blob
|
history
tab/soif.flt
patch
|
blob
|
history
diff --git
a/CHANGELOG
b/CHANGELOG
index
ae76518
..
5710f03
100644
(file)
--- a/
CHANGELOG
+++ b/
CHANGELOG
@@
-1,4
+1,9
@@
+New version of SOIF filter (soif.flt).
+ Kang-Jin Lee <lee@arco.de>
+
+Fixed a bug with >2GB files (overflow in integer expression).
+
--- 1.3.10 2003/04/01
Fix linker error for Perl module.
--- 1.3.10 2003/04/01
Fix linker error for Perl module.
diff --git
a/tab/soif.flt
b/tab/soif.flt
index
e1c3cba
..
da9c0eb
100644
(file)
--- a/
tab/soif.flt
+++ b/
tab/soif.flt
@@
-2,6
+2,8
@@
# Author: Peter Valkenburg / TERENA (valkenburg@terena.nl)
# Version 0.2 (09/09/1998).
# This sort of follows the Nordic Web Index convention of GILS attribute use.
# Author: Peter Valkenburg / TERENA (valkenburg@terena.nl)
# Version 0.2 (09/09/1998).
# This sort of follows the Nordic Web Index convention of GILS attribute use.
+# Modified by Kang-Jin Lee (lee@arco.de)
+# 07/10/1999
# We'll use GILS structured records.
BEGIN { begin record gils }
# We'll use GILS structured records.
BEGIN { begin record gils }
@@
-14,34
+16,34
@@
BEGIN { begin record gils }
}
# Type will be GILS' availability/linkageType
}
# Type will be GILS' availability/linkageType
-/^[tT]ype{[0-9]+}:\t/ BODY /$/ {
+/^[tT]ype{[0-9]+}:\t/ BODY /$/ {
begin element availability
data -element linkageType $1
end element
}
# Last modification time will be Bib-1 Use Attribute 1012
begin element availability
data -element linkageType $1
end element
}
# Last modification time will be Bib-1 Use Attribute 1012
-/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ {
+/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ {
data -element dateOfLastModification $1
}
# The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007
data -element dateOfLastModification $1
}
# The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007
-/^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 }
+/^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 }
# Description will be Bib-1 Use Attribute 62
# Description will be Bib-1 Use Attribute 62
-/^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+/^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
data -element abstract $1
unread 2
}
# Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!)
data -element abstract $1
unread 2
}
# Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!)
-/^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+/^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
data -element author $1
unread 2
}
# Keywords will be GILS' localSubjectIndex/localSubjectTerm
data -element author $1
unread 2
}
# Keywords will be GILS' localSubjectIndex/localSubjectTerm
-/^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+/^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
begin element localSubjectIndex
data -element localSubjectTerm $1
unread 2
begin element localSubjectIndex
data -element localSubjectTerm $1
unread 2
@@
-49,7
+51,7
@@
BEGIN { begin record gils }
}
# File-size will be GILS' supplementalInformation/bytes
}
# File-size will be GILS' supplementalInformation/bytes
-/^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ {
+/^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ {
begin element supplementalInformation
data -element bytes $1
unread 2
begin element supplementalInformation
data -element bytes $1
unread 2
@@
-57,7
+59,7
@@
BEGIN { begin record gils }
}
# Update-Time will be GILS' supplementalInformation/lastChecked
}
# Update-Time will be GILS' supplementalInformation/lastChecked
-/^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ {
+/^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ {
begin element supplementalInformation
data -element lastChecked $1
unread 2
begin element supplementalInformation
data -element lastChecked $1
unread 2
@@
-73,13
+75,18
@@
BEGIN { begin record gils }
}
# Title will be Bib-1 Use Attribute 4
}
# Title will be Bib-1 Use Attribute 4
-/^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+/^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
data -element Title $1
unread 2
}
# Body and Partial-Text will be Bib-1 Use Attribute 1010
data -element Title $1
unread 2
}
# Body and Partial-Text will be Bib-1 Use Attribute 1010
-/^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+# Is Body really commonly used in SOIF? Anyway, Full-Text is used by Harvest.
+#/^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
+# data -element sampleText $1
+# unread 2
+# }
+/^[fF]ull-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ {
data -element sampleText $1
unread 2
}
data -element sampleText $1
unread 2
}