# # Input-filter for the Nordic Web Index record syntax. Output is 'gils-like'. # # // { begin record gils } # Ignore meta tags /.*$/ {} / */ BODY /$/ { data -element title $1 } / */ BODY /$/ { data -element dateOfLastModification $1 } / */ BODY /$/ { data -element controlIdentifier $1 } // { begin element supplementalInformation } / */ BODY /$/ { data -element lastChecked $1 } / */ BODY /$/ { data -element bytes $1 } // { begin element availability } / */ BODY /$/ { data -element linkage $1 } / */ BODY /$/ { data -element linkageType $1 } // { begin element localSubjectIndex } / */ BODY /$/ { data -element localSubjectTerm $1 } # Don't want to have inside of LocalSubjectIndex # Since we end localsubjectindex, we consume the end-tag for that as well. #/[ \n]*/ BODY /<\/ip>[ \n]*<\/lsi>/ { # end element; # data -element sampleText $1 # } /[ \n]*/ BODY /<\/ip>/ { end element; data -element sampleText $1 } // { begin element crossReference } /
  • */ BODY /$/ { data -element linkage $1 } / */ BODY /$/ { data -element title $1 } /<\/nwi>/ { end record } # Generic end-marker /<\/[^>]*>/ { end element } /\n/ { } /./ {}