1 //$Id: MarcXmlWriter.java,v 1.9 2008/10/17 19:11:49 haschart Exp $
\r
3 * Copyright (C) 2004 Bas Peters
\r
5 * This file is part of MARC4J
\r
7 * MARC4J is free software; you can redistribute it and/or
\r
8 * modify it under the terms of the GNU Lesser General Public
\r
9 * License as published by the Free Software Foundation; either
\r
10 * version 2.1 of the License, or (at your option) any later version.
\r
12 * MARC4J is distributed in the hope that it will be useful,
\r
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
15 * Lesser General Public License for more details.
\r
17 * You should have received a copy of the GNU Lesser General Public
\r
18 * License along with MARC4J; if not, write to the Free Software
\r
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
23 import java.io.BufferedWriter;
\r
24 import java.io.OutputStream;
\r
25 import java.io.OutputStreamWriter;
\r
26 import java.io.UnsupportedEncodingException;
\r
28 import javax.xml.transform.Result;
\r
29 import javax.xml.transform.Source;
\r
30 import javax.xml.transform.stream.StreamResult;
\r
31 import javax.xml.transform.stream.StreamSource;
\r
33 import org.xml.sax.SAXException;
\r
37 * Class for writing MARC record objects in Turbo MARC (XML) format. This class outputs a
\r
38 * SAX event stream to the given {@link java.io.OutputStream} or
\r
39 * {@link javax.xml.transform.Result} object. It can be used in a SAX
\r
40 * pipeline to postprocess the result. By default this class uses a null
\r
41 * transform. It is strongly recommended to use a dedicated XML serializer.
\r
44 * This class requires a JAXP compliant XML parser and XSLT processor. The
\r
45 * underlying SAX2 parser should be namespace aware. In addition this class
\r
46 * requires <a href="http://icu.sourceforge.net/">ICU4J </a> to perform Unicode
\r
47 * normalization. A stripped down version of 2.6 originating from the <a
\r
48 * href="http://www.cafeconleche.org/XOM/">XOM </a> project is included in this
\r
52 * The following example reads a file with MARC records and writes MARCXML
\r
53 * records in UTF-8 encoding to the console:
\r
58 * InputStream input = new FileInputStream("input.mrc")
\r
59 * MarcReader reader = new MarcStreamReader(input);
\r
61 * MarcWriter writer = new MarcXmlWriter(System.out, true);
\r
62 * while (reader.hasNext()) {
\r
63 * Record record = reader.next();
\r
64 * writer.write(record);
\r
71 * To perform a character conversion like MARC-8 to UCS/Unicode register a
\r
72 * <code>CharConverter</code>:
\r
76 * writer.setConverter(new AnselToUnicode());
\r
80 * In addition you can perform Unicode normalization. This is for example not
\r
81 * done by the MARC-8 to UCS/Unicode converter. With Unicode normalization text
\r
82 * is transformed into the canonical composed form. For example "a�bc"
\r
83 * is normalized to "�bc". To perform normalization set Unicode
\r
84 * normalization to true:
\r
88 * writer.setUnicodeNormalization(true);
\r
92 * Please note that it's not garanteed to work if you try to convert normalized
\r
93 * Unicode back to MARC-8 encoding using
\r
94 * {@link org.marc4j.converter.impl.UnicodeToAnsel}.
\r
97 * This class provides very basic formatting options. For more advanced options
\r
98 * create an instance of this class with a
\r
99 * {@link javax.xml.transform.sax.SAXResult} containing a
\r
100 * {@link org.xml.sax.ContentHandler} derived from a dedicated XML
\r
105 * The following example uses
\r
106 * <code>org.apache.xml.serialize.XMLSerializer</code> to write MARC records
\r
107 * to XML using MARC-8 to UCS/Unicode conversion and Unicode normalization:
\r
112 * InputStream input = new FileInputStream("input.mrc")
\r
113 * MarcReader reader = new MarcStreamReader(input);
\r
115 * OutputFormat format = new OutputFormat("xml","UTF-8", true);
\r
116 * OutputStream out = new FileOutputStream("output.xml");
\r
117 * XMLSerializer serializer = new XMLSerializer(out, format);
\r
118 * Result result = new SAXResult(serializer.asContentHandler());
\r
120 * MarcXmlWriter writer = new MarcXmlWriter(result);
\r
121 * writer.setConverter(new AnselToUnicode());
\r
122 * while (reader.hasNext()) {
\r
123 * Record record = reader.next();
\r
124 * writer.write(record);
\r
131 * You can post-process the result using a <code>Source</code> object pointing
\r
132 * to a stylesheet resource and a <code>Result</code> object to hold the
\r
133 * transformation result tree. The example below converts MARC to MARCXML and
\r
134 * transforms the result tree to MODS using the stylesheet provided by The
\r
135 * Library of Congress:
\r
140 * String stylesheetUrl = "http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3.xsl";
\r
141 * Source stylesheet = new StreamSource(stylesheetUrl);
\r
143 * Result result = new StreamResult(System.out);
\r
145 * InputStream input = new FileInputStream("input.mrc")
\r
146 * MarcReader reader = new MarcStreamReader(input);
\r
147 * MarcXmlWriter writer = new MarcXmlWriter(result, stylesheet);
\r
148 * writer.setConverter(new AnselToUnicode());
\r
149 * while (reader.hasNext()) {
\r
150 * Record record = (Record) reader.next();
\r
151 * writer.write(record);
\r
158 * It is also possible to write the result into a DOM Node:
\r
163 * InputStream input = new FileInputStream("input.mrc")
\r
164 * MarcReader reader = new MarcStreamReader(input);
\r
165 * DOMResult result = new DOMResult();
\r
166 * MarcXmlWriter writer = new MarcXmlWriter(result);
\r
167 * writer.setConverter(new AnselToUnicode());
\r
168 * while (reader.hasNext()) {
\r
169 * Record record = (Record) reader.next();
\r
170 * writer.write(record);
\r
174 * Document doc = (Document) result.getNode();
\r
178 * @author Bas Peters
\r
179 * @version $Revision: 1.9 $
\r
182 public class TurboMarcXmlWriter extends BaseMarcXmlWriter {
\r
185 public static final String TURBO_MARCXML_NS_URI = "http://www.indexdata.com/turbomarc";
\r
187 protected static final String prefix = "tmarc:";
\r
189 protected static final String CONTROLFIELD = "c";
\r
190 protected static final String Q_CONTROL_FIELD = prefix + CONTROLFIELD;
\r
192 protected static final String DATAFIELD = "d";
\r
193 protected static final String Q_DATA_FIELD = prefix + DATAFIELD;
\r
195 protected static final String SUBFIELD = "s";
\r
196 protected static final String Q_SUBFIELD = prefix + SUBFIELD;
\r
198 protected static final String COLLECTION = "collection";
\r
199 protected static final String Q_COLLECTION = prefix + COLLECTION;
\r
201 protected static final String RECORD = "r";
\r
202 protected static final String Q_RECORD = prefix + RECORD;
\r
204 protected static final String LEADER = "l";
\r
205 protected static final String Q_LEADER = prefix + LEADER;
\r
208 * Constructs an instance with the specified output stream.
\r
210 * The default character encoding for UTF-8 is used.
\r
212 * @throws MarcException
\r
214 public TurboMarcXmlWriter(OutputStream out) {
\r
219 * Constructs an instance with the specified output stream and indentation.
\r
221 * The default character encoding for UTF-8 is used.
\r
223 * @throws MarcException
\r
225 public TurboMarcXmlWriter(OutputStream out, boolean indent) {
\r
226 this(out, "UTF8", indent);
\r
230 * Constructs an instance with the specified output stream and character
\r
233 * @throws MarcException
\r
235 public TurboMarcXmlWriter(OutputStream out, String encoding) {
\r
236 this(out, encoding, false);
\r
240 * Constructs an instance with the specified output stream, character
\r
241 * encoding and indentation.
\r
243 * @throws MarcException
\r
245 public TurboMarcXmlWriter(OutputStream out, String encoding, boolean indent) {
\r
248 throw new NullPointerException("null OutputStream");
\r
250 if (encoding == null) {
\r
251 throw new NullPointerException("null encoding");
\r
255 writer = new OutputStreamWriter(out, encoding);
\r
256 writer = new BufferedWriter(writer);
\r
257 // this.encoding = encoding;
\r
258 setHandler(new StreamResult(writer), null);
\r
259 } catch (UnsupportedEncodingException e) {
\r
260 throw new MarcException(e.getMessage(), e);
\r
262 writeStartDocument();
\r
265 private void setup() {
\r
267 setNamespaceURI(TURBO_MARCXML_NS_URI);
\r
268 setCollectionName(COLLECTION);
\r
269 setRecordName(RECORD);
\r
270 setLeaderName(LEADER);
\r
271 setControlfieldTemplate(CONTROLFIELD);
\r
272 setDatafieldTemplate(DATAFIELD);
\r
273 setSubfieldTemplate(SUBFIELD);
\r
274 setUseTurboMarc(true);
\r
278 * Constructs an instance with the specified result.
\r
281 * @throws SAXException
\r
283 public TurboMarcXmlWriter(Result result) {
\r
285 if (result == null)
\r
286 throw new NullPointerException("null Result");
\r
287 setHandler(result, null);
\r
288 writeStartDocument();
\r
292 * Constructs an instance with the specified stylesheet location and result.
\r
295 * @throws SAXException
\r
297 public TurboMarcXmlWriter(Result result, String stylesheetUrl) {
\r
298 this(result, new StreamSource(stylesheetUrl));
\r
302 * Constructs an instance with the specified stylesheet source and result.
\r
305 * @throws SAXException
\r
307 public TurboMarcXmlWriter(Result result, Source stylesheet) {
\r
309 if (stylesheet == null)
\r
310 throw new NullPointerException("null Source");
\r
311 if (result == null)
\r
312 throw new NullPointerException("null Result");
\r
313 setHandler(result, stylesheet);
\r
314 writeStartDocument();
\r