From 04c67d9769e6e18b1f454999252d5959041adbfc Mon Sep 17 00:00:00 2001 From: Dennis Schafroth Date: Thu, 16 Feb 2012 12:41:53 +0100 Subject: [PATCH] Implemented base xml writer. Support both MARCXML and TurboMARC --- src/org/marc4j/BaseMarcXmlWriter.java | 423 +++++++++++++++++++++++++++++++++ 1 file changed, 423 insertions(+) create mode 100644 src/org/marc4j/BaseMarcXmlWriter.java diff --git a/src/org/marc4j/BaseMarcXmlWriter.java b/src/org/marc4j/BaseMarcXmlWriter.java new file mode 100644 index 0000000..2d66629 --- /dev/null +++ b/src/org/marc4j/BaseMarcXmlWriter.java @@ -0,0 +1,423 @@ +package org.marc4j; + +import java.io.IOException; +import java.io.Writer; +import java.util.Iterator; +import java.util.List; + +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Result; +import javax.xml.transform.Source; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.sax.SAXTransformerFactory; +import javax.xml.transform.sax.TransformerHandler; + +import org.marc4j.converter.CharConverter; +import org.marc4j.marc.ControlField; +import org.marc4j.marc.DataField; +import org.marc4j.marc.Leader; +import org.marc4j.marc.Record; +import org.marc4j.marc.Subfield; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +import com.ibm.icu.text.Normalizer; + +public class BaseMarcXmlWriter implements MarcWriter { + + /** + * Character encoding. Default is UTF-8. + */ + protected CharConverter converter = null; + protected Writer writer = null; + protected boolean indent = false; + protected TransformerHandler handler = null; + boolean normalize = false; + private String namespaceURI; + private String collectionName; + private String prefix; + private String qualifiedCollectionName; + private String recordName; + private String qualifiedRecordName; + private String leaderName; + private String qualifiedLeaderName; + private String subfieldTemplate; + private String qualifiedSubfieldTemplate; + private String datafieldTemplate; + private String qualifiedDatafieldTemplate; + private String controlfieldTemplate; + private String qualifiedControlfieldTemplate; + private boolean useTurboMarc = true; + + /** + * Returns the character converter. + * + * @return CharConverter the character converter + */ + public CharConverter getConverter() { + return converter; + } + + /** + * Sets the character converter. + * + * @param converter + * the character converter + */ + public void setConverter(CharConverter converter) { + this.converter = converter; + } + + /** + * Writes a Record object to the result. + * + * @param record - + * the Record object + * @throws SAXException + */ + public void write(Record record) { + try { + toXml(record); + } catch (SAXException e) { + throw new MarcException("SAX error occured while writing record", e); + } + } + + public void close() { + writeEndDocument(); + try { + if (writer != null) + writer.close(); + } catch (IOException e) { + throw new MarcException(e.getMessage(), e); + } + } + + /** + * If set to true this writer will perform Unicode normalization on data + * elements using normalization form C (NFC). The default is false. + * + * The implementation used is ICU4J 2.6. This version is based on Unicode + * 4.0. + * + * @param normalize + * true if this writer performs Unicode normalization, false + * otherwise + */ + public void setUnicodeNormalization(boolean normalize) { + this.normalize = normalize; + } + + /** + * Returns true if this writer will perform Unicode normalization, false + * otherwise. + * + * @return boolean - true if this writer performs Unicode normalization, + * false otherwise. + */ + public boolean getUnicodeNormalization() { + return normalize; + } + + protected void setHandler(Result result, Source stylesheet) throws MarcException { + try { + TransformerFactory factory = TransformerFactory.newInstance(); + if (!factory.getFeature(SAXTransformerFactory.FEATURE)) + throw new UnsupportedOperationException( + "SAXTransformerFactory is not supported"); + + SAXTransformerFactory saxFactory = (SAXTransformerFactory) factory; + if (stylesheet == null) + handler = saxFactory.newTransformerHandler(); + else + handler = saxFactory.newTransformerHandler(stylesheet); + handler.getTransformer() + .setOutputProperty(OutputKeys.METHOD, "xml"); + handler.setResult(result); + + } catch (Exception e) { + throw new MarcException(e.getMessage(), e); + } + } + + /** + * Writes the root start tag to the result. + * + * @throws SAXException + */ + protected void writeStartDocument() { + try { + AttributesImpl atts = new AttributesImpl(); + handler.startDocument(); + // Add a new line after + handler.ignorableWhitespace("\n".toCharArray(), 0, 1); + // The next line duplicates the namespace declaration for Marc XML + handler.startPrefixMapping(prefix, namespaceURI); + // add namespace declaration using attribute - need better solution + //atts.addAttribute(namespaceURI, "xmlns", "xmlns:" + prefix, "CDATA", namespaceURI); + handler.startElement(namespaceURI, collectionName, qualifiedCollectionName, atts); + } catch (SAXException e) { + throw new MarcException( + "SAX error occured while writing start document", e); + } + } + + /** + * Writes the root end tag to the result. + * + * @throws SAXException + */ + protected void writeEndDocument() { + try { + if (indent) + handler.ignorableWhitespace("\n".toCharArray(), 0, 1); + + handler.endElement(namespaceURI, collectionName, qualifiedCollectionName); + handler.endPrefixMapping(""); + handler.endDocument(); + } catch (SAXException e) { + throw new MarcException( + "SAX error occured while writing end document", e); + } + } + + /** + * Returns true if indentation is active, false otherwise. + * + * @return boolean + */ + public boolean hasIndent() { + return indent; + } + + /** + * Activates or deactivates indentation. Default value is false. + * + * @param indent + */ + public void setIndent(boolean indent) { + this.indent = indent; + } + + protected char[] getDataElement(String data) { + String dataElement = null; + if (converter == null) + return data.toCharArray(); + dataElement = converter.convert(data); + if (normalize) + dataElement = Normalizer.normalize(dataElement, Normalizer.NFC); + return dataElement.toCharArray(); + } + + public String getNamespaceURI() { + return namespaceURI; + } + + public void setNamespaceURI(String namespaceURI) { + this.namespaceURI = namespaceURI; + } + + public String getCollectionName() { + return collectionName; + } + + public void setCollectionName(String collectionName) { + this.collectionName = collectionName; + qualifiedCollectionName = setPrefixedName(collectionName); + } + + private String setPrefixedName(String name) { + if (prefix != null && name != null) + return prefix + name; + return name; + } + + protected void handleDataField(DataField field) throws SAXException { + AttributesImpl atts = new AttributesImpl(); + if (!useTurboMarc) + atts.addAttribute("", "tag", "tag", "CDATA", field.getTag()); + atts.addAttribute("", "ind1", "ind1", "CDATA", String.valueOf(field.getIndicator1())); + atts.addAttribute("", "ind2", "ind2", "CDATA", String.valueOf(field.getIndicator2())); + + if (indent) + handler.ignorableWhitespace("\n ".toCharArray(), 0, 5); + StringBuffer elementName = new StringBuffer(datafieldTemplate); + StringBuffer qElementName = new StringBuffer(qualifiedDatafieldTemplate); + if (useTurboMarc) { + elementName.append(field.getTag()); + qElementName.append(field.getTag()); + } + handler.startElement(namespaceURI, elementName.toString(), qElementName.toString(), atts); + + handleSubfields(field.getSubfields()); + if (indent) + handler.ignorableWhitespace("\n ".toCharArray(), 0, 5); + handler.endElement(namespaceURI, elementName.toString(), qElementName.toString()); + } + + protected void handleSubfields(List subfields) throws SAXException { + Iterator si = subfields.iterator(); + while (si.hasNext()) { + Subfield subfield = (Subfield) si.next(); + handleSubfield(subfield); + } + } + + protected void handleSubfield(Subfield subfield) throws SAXException { + AttributesImpl atts = new AttributesImpl(); + StringBuffer subfieldName = new StringBuffer(subfieldTemplate); + StringBuffer qSubfieldName = new StringBuffer(qualifiedSubfieldTemplate); + + char code = subfield.getCode(); + // if [a-zA-Z0-9] append to elementName, otherwise use a attribute + if (code >= '0' && code <= '9' || + code >= 'a' && code <= 'z' || + code >= 'A' && code <= 'Z') { + subfieldName.append(code); + qSubfieldName.append(code); + } + else { + atts = new AttributesImpl(); + atts.addAttribute("", "code", "code", "CDATA", String + .valueOf(subfield.getCode())); + } + if (indent) + handler.ignorableWhitespace("\n ".toCharArray(), 0, 7); + + handler.startElement(namespaceURI, subfieldName.toString(), qSubfieldName.toString(), atts); + char[] temp = getDataElement(subfield.getData()); + handler.characters(temp, 0, temp.length); + handler.endElement(namespaceURI, subfieldName.toString(), qSubfieldName.toString()); + } + + protected void toXml(Record record) throws SAXException { + AttributesImpl atts = new AttributesImpl(); + if (indent) + handler.ignorableWhitespace("\n ".toCharArray(), 0, 3); + + handler.startElement(namespaceURI, recordName, qualifiedRecordName, atts); + + if (indent) + handler.ignorableWhitespace("\n ".toCharArray(), 0, 5); + + handleLeader(record, atts); + + handleControlfields(record.getControlFields()); + Iterator di = record.getDataFields().iterator(); + while (di.hasNext()) { + DataField field = di.next(); + handleDataField(field); + } + + if (indent) + handler.ignorableWhitespace("\n ".toCharArray(), 0, 3); + + handler.endElement(namespaceURI, recordName, qualifiedRecordName); + } + + protected void handleControlfields(List controlFields) throws SAXException { + Iterator ci = controlFields.iterator(); + while (ci.hasNext()) { + ControlField field = (ControlField) ci.next(); + handleControlField(field); + } + } + + protected void handleLeader(Record record, AttributesImpl atts) throws SAXException { + char[] temp; + handler.startElement(namespaceURI, leaderName, qualifiedLeaderName, atts); + Leader leader = record.getLeader(); + temp = leader.toString().toCharArray(); + handler.characters(temp, 0, temp.length); + handler.endElement(namespaceURI, leaderName, qualifiedLeaderName); + } + + protected void handleControlField(ControlField field) throws SAXException { + AttributesImpl atts = new AttributesImpl(); + + if (!useTurboMarc) + atts.addAttribute("", "tag", "tag", "CDATA", field.getTag()); + + if (indent) + handler.ignorableWhitespace("\n ".toCharArray(), 0, 5); + StringBuffer elementName = new StringBuffer(controlfieldTemplate); + StringBuffer qElementName = new StringBuffer(qualifiedControlfieldTemplate); + if (useTurboMarc) { + elementName.append(field.getTag()); + qElementName.append(field.getTag()); + } + handler.startElement(namespaceURI, elementName.toString(), qElementName.toString(), atts); + char[] temp = getDataElement(field.getData()); + handler.characters(temp, 0, temp.length); + handler.endElement(namespaceURI, elementName.toString(), qElementName.toString()); + } + + public String getPrefix() { + return prefix; + } + + public void setPrefix(String prefix) { + this.prefix = prefix; + // Update the prefixed names + qualifiedCollectionName = setPrefixedName(collectionName); + qualifiedRecordName = setPrefixedName(recordName); + qualifiedControlfieldTemplate = setPrefixedName(controlfieldTemplate); + qualifiedLeaderName = setPrefixedName(leaderName); + qualifiedDatafieldTemplate = setPrefixedName(datafieldTemplate); + qualifiedDatafieldTemplate = setPrefixedName(datafieldTemplate); + qualifiedSubfieldTemplate = setPrefixedName(subfieldTemplate); + } + + public String getRecordName() { + return recordName; + } + + public void setRecordName(String recordName) { + this.recordName = recordName; + qualifiedRecordName = setPrefixedName(recordName); + } + + public String getLeaderName() { + return leaderName; + } + + public void setLeaderName(String leaderName) { + this.leaderName = leaderName; + qualifiedLeaderName = setPrefixedName(leaderName); + } + + public String getSubfieldTemplate() { + return subfieldTemplate; + } + + public void setSubfieldTemplate(String subfieldTemplate) { + this.subfieldTemplate = subfieldTemplate; + qualifiedSubfieldTemplate = setPrefixedName(subfieldTemplate); + } + + public String getDatafieldTemplate() { + return datafieldTemplate; + } + + public void setDatafieldTemplate(String datafieldTemplate) { + this.datafieldTemplate = datafieldTemplate; + qualifiedDatafieldTemplate = setPrefixedName(datafieldTemplate); + } + + public String getControlfieldTemplate() { + return controlfieldTemplate; + } + + public void setControlfieldTemplate(String controlfieldTemplate) { + this.controlfieldTemplate = controlfieldTemplate; + qualifiedControlfieldTemplate = setPrefixedName(controlfieldTemplate); + } + + public boolean isUseTurboMarc() { + return useTurboMarc; + } + + public void setUseTurboMarc(boolean useTurboMarc) { + this.useTurboMarc = useTurboMarc; + } +} -- 1.7.10.4