From c3325d2586de28abaf2a4149204bc5d5833bba82 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Sat, 5 Apr 2003 12:32:43 +0000 Subject: [PATCH] New SOIF filter --- CHANGELOG | 5 +++++ tab/soif.flt | 27 +++++++++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index ae76518..5710f03 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,9 @@ +New version of SOIF filter (soif.flt). + Kang-Jin Lee + +Fixed a bug with >2GB files (overflow in integer expression). + --- 1.3.10 2003/04/01 Fix linker error for Perl module. diff --git a/tab/soif.flt b/tab/soif.flt index e1c3cba..da9c0eb 100644 --- a/tab/soif.flt +++ b/tab/soif.flt @@ -2,6 +2,8 @@ # Author: Peter Valkenburg / TERENA (valkenburg@terena.nl) # Version 0.2 (09/09/1998). # This sort of follows the Nordic Web Index convention of GILS attribute use. +# Modified by Kang-Jin Lee (lee@arco.de) +# 07/10/1999 # We'll use GILS structured records. BEGIN { begin record gils } @@ -14,34 +16,34 @@ BEGIN { begin record gils } } # Type will be GILS' availability/linkageType -/^[tT]ype{[0-9]+}:\t/ BODY /$/ { +/^[tT]ype{[0-9]+}:\t/ BODY /$/ { begin element availability data -element linkageType $1 end element } # Last modification time will be Bib-1 Use Attribute 1012 -/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ { +/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ { data -element dateOfLastModification $1 } # The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007 -/^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 } +/^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 } # Description will be Bib-1 Use Attribute 62 -/^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[dD]escription{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { data -element abstract $1 unread 2 } # Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!) -/^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[aA]uthor{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { data -element author $1 unread 2 } # Keywords will be GILS' localSubjectIndex/localSubjectTerm -/^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[kK]eywords{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { begin element localSubjectIndex data -element localSubjectTerm $1 unread 2 @@ -49,7 +51,7 @@ BEGIN { begin record gils } } # File-size will be GILS' supplementalInformation/bytes -/^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ { +/^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ { begin element supplementalInformation data -element bytes $1 unread 2 @@ -57,7 +59,7 @@ BEGIN { begin record gils } } # Update-Time will be GILS' supplementalInformation/lastChecked -/^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ { +/^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ { begin element supplementalInformation data -element lastChecked $1 unread 2 @@ -73,13 +75,18 @@ BEGIN { begin record gils } } # Title will be Bib-1 Use Attribute 4 -/^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +/^[tT]itle{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { data -element Title $1 unread 2 } # Body and Partial-Text will be Bib-1 Use Attribute 1010 -/^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +# Is Body really commonly used in SOIF? Anyway, Full-Text is used by Harvest. +#/^[bB]ody{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { +# data -element sampleText $1 +# unread 2 +# } +/^[fF]ull-[tT]ext{[0-9]+}:\t/ BODY /^([-._A-Za-z0-9]+{[0-9]+}:\t.*|})$/ { data -element sampleText $1 unread 2 } -- 1.7.10.4