1 //$Id: MarcXmlDriver.java,v 1.5 2006/12/04 18:46:39 bpeters Exp $
\r
3 * Copyright (C) 2005 Bas Peters
\r
5 * This file is part of MARC4J
\r
7 * MARC4J is free software; you can redistribute it and/or
\r
8 * modify it under the terms of the GNU Lesser General Public
\r
9 * License as published by the Free Software Foundation; either
\r
10 * version 2.1 of the License, or (at your option) any later version.
\r
12 * MARC4J is distributed in the hope that it will be useful,
\r
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
15 * Lesser General Public License for more details.
\r
17 * You should have received a copy of the GNU Lesser General Public
\r
18 * License along with MARC4J; if not, write to the Free Software
\r
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
21 package org.marc4j.util;
\r
23 import java.io.BufferedWriter;
\r
24 import java.io.FileInputStream;
\r
25 import java.io.FileNotFoundException;
\r
26 import java.io.FileOutputStream;
\r
27 import java.io.InputStream;
\r
28 import java.io.OutputStream;
\r
29 import java.io.OutputStreamWriter;
\r
30 import java.io.UnsupportedEncodingException;
\r
31 import java.io.Writer;
\r
33 import javax.xml.transform.Result;
\r
34 import javax.xml.transform.Source;
\r
35 import javax.xml.transform.stream.StreamResult;
\r
36 import javax.xml.transform.stream.StreamSource;
\r
38 import org.marc4j.Constants;
\r
39 import org.marc4j.MarcStreamReader;
\r
40 import org.marc4j.MarcXmlWriter;
\r
41 import org.marc4j.converter.CharConverter;
\r
42 import org.marc4j.converter.impl.AnselToUnicode;
\r
43 import org.marc4j.converter.impl.Iso5426ToUnicode;
\r
44 import org.marc4j.converter.impl.Iso6937ToUnicode;
\r
45 import org.marc4j.marc.Record;
\r
48 * Provides a basic driver to convert MARC records to MARCXML. Output is encoded in UTF-8.
\r
50 * The following example reads input.mrc and writes output to the console:
\r
53 * java org.marc4j.util.MarcXmlDriver input.mrc
\r
57 * The following example reads input.mrc, converts MARC-8 and writes output in
\r
58 * UTF-8 to output.xml:
\r
61 * java org.marc4j.util.MarcXmlDriver -convert MARC8 -out output.xml input.mrc
\r
65 * It is possible to post-process the result using an XSLT stylesheet. The
\r
66 * following example converts MARC to MODS:
\r
69 * java org.marc4j.util.MarcXmlDriver -convert MARC8 -xsl http://www.loc.gov/standards/mods/v3/MARC21slim2MODS3.xsl -out modsoutput.xml input.mrc
\r
73 * For usage, run from the command-line with the following command:
\r
76 * java org.marc4j.util.MarcXmlDriver -usage
\r
80 * Check the home page for <a href="http://www.loc.gov/standards/marcxml/">
\r
81 * MARCXML </a> for more information about the MARCXML format.
\r
83 * @author Bas Peters
\r
84 * @version $Revision: 1.5 $
\r
87 public class MarcXmlDriver {
\r
90 * Provides a static entry point.
\r
96 * <li>-xsl <stylesheet URL> - post-process using XSLT-stylesheet
\r
98 * <li>-out <output file> - write to output file</li>
\r
99 * <li>-convert <encoding> - convert <encoding> to UTF-8 (Supported encodings: MARC8, ISO5426, ISO6937)</li>
\r
100 * <li>-encode <encoding> - read data using encoding <encoding></li>
\r
101 * <li>-normalize - perform Unicode normalization</li>
\r
102 * <li>-usage - show usage</li>
\r
103 * <li><input file> - input file with MARC records
\r
106 public static void main(String args[]) {
\r
107 long start = System.currentTimeMillis();
\r
109 String input = null;
\r
110 String output = null;
\r
111 String stylesheet = null;
\r
112 String convert = null;
\r
113 String encoding = "ISO_8859_1";
\r
114 boolean normalize = false;
\r
116 for (int i = 0; i < args.length; i++) {
\r
117 if (args[i].equals("-xsl")) {
\r
118 if (i == args.length - 1) {
\r
121 stylesheet = args[++i].trim();
\r
122 } else if (args[i].equals("-out")) {
\r
123 if (i == args.length - 1) {
\r
126 output = args[++i].trim();
\r
127 } else if (args[i].equals("-convert")) {
\r
128 if (i == args.length - 1) {
\r
131 convert = args[++i].trim();
\r
132 } else if (args[i].equals("-encoding")) {
\r
133 if (i == args.length - 1) {
\r
136 encoding = args[++i].trim();
\r
137 } else if (args[i].equals("-normalize")) {
\r
139 } else if (args[i].equals("-usage")) {
\r
141 } else if (args[i].equals("-help")) {
\r
144 input = args[i].trim();
\r
146 // Must be last arg
\r
147 if (i != args.length - 1) {
\r
152 if (input == null) {
\r
156 InputStream in = null;
\r
158 in = new FileInputStream(input);
\r
159 } catch (FileNotFoundException e) {
\r
160 e.printStackTrace();
\r
162 MarcStreamReader reader = null;
\r
163 if (encoding != null)
\r
164 reader = new MarcStreamReader(in, encoding);
\r
166 reader = new MarcStreamReader(in);
\r
168 OutputStream out = null;
\r
169 if (output != null)
\r
171 out = new FileOutputStream(output);
\r
172 } catch (FileNotFoundException e) {
\r
173 e.printStackTrace();
\r
178 MarcXmlWriter writer = null;
\r
180 if (stylesheet == null) {
\r
181 if (convert != null)
\r
182 writer = new MarcXmlWriter(out, "UTF8");
\r
184 writer = new MarcXmlWriter(out, "UTF8");
\r
186 Writer outputWriter = null;
\r
187 if (convert != null) {
\r
189 outputWriter = new OutputStreamWriter(out, "UTF8");
\r
190 } catch (UnsupportedEncodingException e) {
\r
191 e.printStackTrace();
\r
193 outputWriter = new BufferedWriter(outputWriter);
\r
195 outputWriter = new OutputStreamWriter(out);
\r
196 outputWriter = new BufferedWriter(outputWriter);
\r
198 Result result = new StreamResult(outputWriter);
\r
199 Source source = new StreamSource(stylesheet);
\r
200 writer = new MarcXmlWriter(result, source);
\r
202 writer.setIndent(true);
\r
204 if (convert != null) {
\r
205 CharConverter charconv = null;
\r
206 if (Constants.MARC_8_ENCODING.equals(convert))
\r
207 charconv = new AnselToUnicode();
\r
208 else if (Constants.ISO5426_ENCODING.equals(convert))
\r
209 charconv = new Iso5426ToUnicode();
\r
210 else if (Constants.ISO6937_ENCODING.equals(convert))
\r
211 charconv = new Iso6937ToUnicode();
\r
213 System.err.println("Unknown character set");
\r
216 writer.setConverter(charconv);
\r
220 writer.setUnicodeNormalization(true);
\r
222 while (reader.hasNext()) {
\r
223 Record record = reader.next();
\r
224 if (Constants.MARC_8_ENCODING.equals(convert))
\r
225 record.getLeader().setCharCodingScheme('a');
\r
226 writer.write(record);
\r
230 System.err.println("Total time: "
\r
231 + (System.currentTimeMillis() - start) + " miliseconds");
\r
234 private static void usage() {
\r
235 System.err.println("MARC4J, Copyright (C) 2002-2006 Bas Peters");
\r
237 .println("Usage: org.marc4j.util.MarcXmlDriver [-options] <file.mrc>");
\r
239 .println(" -convert <encoding> = Converts <encoding> to UTF-8");
\r
240 System.err.println(" Valid encodings are: MARC8, ISO5426, ISO6937");
\r
241 System.err.println(" -normalize = perform Unicode normalization");
\r
243 .println(" -xsl <file> = Post-process MARCXML using XSLT stylesheet <file>");
\r
244 System.err.println(" -out <file> = Output using <file>");
\r
245 System.err.println(" -usage or -help = this message");
\r
246 System.err.println("The program outputs well-formed MARCXML");
\r
248 .println("See http://marc4j.tigris.org for more information.");
\r