From ba2b6b4c02d80cc553eaba38ded067836f65baa2 Mon Sep 17 00:00:00 2001 From: Marc Cromme Date: Thu, 7 Feb 2008 12:38:39 +0000 Subject: [PATCH] pretty format XML source code --- doc/tutorial.xml | 371 +++++++++++++++++++++++++++--------------------------- 1 file changed, 183 insertions(+), 188 deletions(-) diff --git a/doc/tutorial.xml b/doc/tutorial.xml index 3d8ecb3..341c738 100644 --- a/doc/tutorial.xml +++ b/doc/tutorial.xml @@ -1,100 +1,100 @@ - - - Tutorial - - - - A first &acro.oai; indexing example - - - In this section, we will test the system by indexing a small set of - sample &acro.oai; records that are included with the &zebra; distribution, - running a &zebra; server against the newly created database, and - searching the indexes with a client that connects to that server. - - - Go to the examples/oai-pmh subdirectory of the - distribution archive, or make a deep copy of the Debian installation - directory - /usr/share/idzebra-2.0.-examples/oai-pmh. - An XML file containing multiple &acro.oai; - records is located in the sub - directory examples/oai-pmh/data. - - + + + Tutorial + + + + A first &acro.oai; indexing example + + + In this section, we will test the system by indexing a small set of + sample &acro.oai; records that are included with the &zebra; distribution, + running a &zebra; server against the newly created database, and + searching the indexes with a client that connects to that server. + + + Go to the examples/oai-pmh subdirectory of the + distribution archive, or make a deep copy of the Debian installation + directory + /usr/share/idzebra-2.0.-examples/oai-pmh. + An XML file containing multiple &acro.oai; + records is located in the sub + directory examples/oai-pmh/data. + + Additional OAI test records can be downloaded by running a shell script (you may want to abort the script when you have waitet longer than your coffe brews ..). - + cd data ./fetch_OAI_data.sh cd ../ - - - + + + To index these &acro.oai; records, type: - - zebraidx-2.0 -c conf/zebra.cfg init - zebraidx-2.0 -c conf/zebra.cfg update data - zebraidx-2.0 -c conf/zebra.cfg commit - - In case you have not installed zebra yet but have compiled the + + zebraidx-2.0 -c conf/zebra.cfg init + zebraidx-2.0 -c conf/zebra.cfg update data + zebraidx-2.0 -c conf/zebra.cfg commit + + In case you have not installed zebra yet but have compiled the binaries from this tarball, use the following command form: - - ../../index/zebraidx -c conf/zebra.cfg this and that - - On some systems the &zebra; binaries are installed under the - generic names, you need to use the following command form: - - zebraidx -c conf/zebra.cfg this and that - - - - - In this command, the word update is followed - by the name of a directory: zebraidx updates all - files in the hierarchy rooted at data. - The command option - -c conf/zebra.cfg points to the proper - configuration file. - - - - You might ask yourself how &acro.xml; content is indexed using &acro.xslt; - stylesheets: to satisfy your curiosity, you might want to run the - indexing transformation on an example debugging &acro.oai; record. - - xsltproc conf/oai2index.xsl data/debug-record.xml - + + ../../index/zebraidx -c conf/zebra.cfg this and that + + On some systems the &zebra; binaries are installed under the + generic names, you need to use the following command form: + + zebraidx -c conf/zebra.cfg this and that + + + + + In this command, the word update is followed + by the name of a directory: zebraidx updates all + files in the hierarchy rooted at data. + The command option + -c conf/zebra.cfg points to the proper + configuration file. + + + + You might ask yourself how &acro.xml; content is indexed using &acro.xslt; + stylesheets: to satisfy your curiosity, you might want to run the + indexing transformation on an example debugging &acro.oai; record. + + xsltproc conf/oai2index.xsl data/debug-record.xml + Here you see the &acro.oai; record transformed into the indexing &acro.xml; format. &zebra; is creating several inverted indexes, and their name and type are clearly visible in the indexing &acro.xml; format. - - - - If your indexing command was successful, you are now ready to - fire up a server. To start a server on port 9999, type: - - zebrasrv-2.0 -c conf/zebra.cfg @:9999 - - - - - The &zebra; index that you have just created has a single database - named Default. - The database contains several &acro.oai; records, and the server will - return records in the &acro.xml; format only. The indexing machine - did the splitting into individual records just behind the scenes. - - - - - - - Searching the &acro.oai; database by web service + + + + If your indexing command was successful, you are now ready to + fire up a server. To start a server on port 9999, type: + + zebrasrv-2.0 -c conf/zebra.cfg @:9999 + + + + + The &zebra; index that you have just created has a single database + named Default. + The database contains several &acro.oai; records, and the server will + return records in the &acro.xml; format only. The indexing machine + did the splitting into individual records just behind the scenes. + + + + + + + Searching the &acro.oai; database by web service - + &zebra; has a build-in web service, which is close to the &acro.sru; standard web service. We use it to access our new database using any &acro.xml; enabled web browser. @@ -110,8 +110,8 @@ search for the term the. Just point your browser at this link: - http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=the + url="http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=the"> + http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=the @@ -124,31 +124,31 @@ In case we actually want to retrieve one record, we need to alter our URl to the following - - http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=the&startRecord=1&maximumRecords=1&recordSchema=dc - + + http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=the&startRecord=1&maximumRecords=1&recordSchema=dc + This way we can page through our result set in chunks of records, for example, we access the 6th to the 10th record using the URL - - http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=the&startRecord=6&maximumRecords=5&recordSchema=dc - - + + http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=the&startRecord=6&maximumRecords=5&recordSchema=dc + + - - + &x-pquery=title%3Cthe + --> + - - Presenting search results in different formats + + Presenting search results in different formats &zebra; uses &acro.xslt; stylesheets for both &acro.xml;record @@ -174,7 +174,7 @@ xsltproc conf/oai2dc.xsl data/debug-record.xml xsltproc conf/oai2zebra.xsl data/debug-record.xml - + Notice also that the &zebra; specific parameters are injected by the engine when retrieving data, therefore some of the attributes in the zebra retrieval schema are not filled @@ -200,10 +200,10 @@ - + - - More interesting searches + + More interesting searches The &acro.oai; indexing example defines many different index @@ -226,10 +226,10 @@ correct &acro.pqf; query. For example, to search in titles only, we use + url="http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=@attr + 1=dc_title the&startRecord=1&maximumRecords=1&recordSchema=dc"> http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=@attr - 1=dc_title the&startRecord=1&maximumRecords=1&recordSchema=dc + 1=dc_title the&startRecord=1&maximumRecords=1&recordSchema=dc @@ -241,10 +241,10 @@ dc_description using the query @and @attr 1=dc_title the @attr 1=dc_description fish. + url="http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=@and + @attr 1=dc_title the + @attr 1=dc_description + fish&startRecord=1&maximumRecords=1&recordSchema=dc"> http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=@and @attr 1=dc_title the @attr 1=dc_description fish&startRecord=1&maximumRecords=1&recordSchema=dc @@ -252,10 +252,10 @@ - + - - Investigating the content of the indexes + + Investigating the content of the indexes How doess the magic work? What is inside the indexes? Why is a certain @@ -302,19 +302,19 @@ - + - - Setting up a correct &acro.sru; web service + + Setting up a correct &acro.sru; web service - The &acro.sru; specification mandates that the &acro.cql; query - language is supported and properly configure. Also, the server - needs to be able to emmit a proper &acro.explain; &acro.xml; - record, which is used to determine the capabilities of the - specific server instance. - + The &acro.sru; specification mandates that the &acro.cql; query + language is supported and properly configure. Also, the server + needs to be able to emmit a proper &acro.explain; &acro.xml; + record, which is used to determine the capabilities of the + specific server instance. + In this example configuration we expoit the similarities between @@ -332,8 +332,8 @@ server configuration - just type zebrasrv -f conf/yazserver.xml - - + + First, we'd like to be sure that we can see the &acro.explain; @@ -352,11 +352,11 @@ Now we can issue true &acro.sru; requests. For example, dc.title=the - and dc.description=fish results in the following page + and dc.description=fish results in the following page + url="http://localhost:9999/?version=1.1&operation=searchRetrieve&query=dc.title=the + and dc.description=fish + &startRecord=1&maximumRecords=1&recordSchema=dc"> http://localhost:9999/?version=1.1&operation=searchRetrieve&query=dc.title=the and dc.description=fish &startRecord=1&maximumRecords=1&recordSchema=dc @@ -367,14 +367,14 @@ scanning the dc.title index gives us an idea what search terms are found there + url="http://localhost:9999/?version=1.1&operation=scan&scanClause=dc.title=fish"> http://localhost:9999/?version=1.1&operation=scan&scanClause=dc.title=fish , whereas - -http://localhost:9999/?version=1.1&operation=scan&scanClause=dc.identifier=fish - + + http://localhost:9999/?version=1.1&operation=scan&scanClause=dc.identifier=fish + accesses the indexed indentifiers. @@ -383,9 +383,9 @@ http://localhost:9999/?version=1.1&operation=scan&scanClause=dc.identifi schema's of the form zebra:: just work right out of the box + url="http://localhost:9999/?version=1.1&operation=searchRetrieve&query=dc.title=the + and dc.description=fish + &startRecord=1&maximumRecords=1&recordSchema=zebra::snippet"> http://localhost:9999/?version=1.1&operation=searchRetrieve&query=dc.title=the and dc.description=fish &startRecord=1&maximumRecords=1&recordSchema=zebra::snippet @@ -393,12 +393,12 @@ http://localhost:9999/?version=1.1&operation=scan&scanClause=dc.identifi - + Searching the &acro.oai; database by &acro.z3950; protocol - + In this section we repeat the searches and presents we have done so far using the binary &acro.z3950; protocol, you can use any @@ -408,7 +408,7 @@ http://localhost:9999/?version=1.1&operation=scan&scanClause=dc.identifi Connecting to the server is done by the command - + yaz-client localhost:9999 @@ -461,7 +461,7 @@ http://localhost:9999/?version=1.1&operation=scan&scanClause=dc.identifi Z> elements zebra::facet::dc_publisher:p,dc_title:p Z> show 1+1 - + @@ -486,7 +486,7 @@ http://localhost:9999/?version=1.1&operation=scan&scanClause=dc.identifi http://resolver.caltech.edu/CaltechCSTR:1986.5228-tr-86 Z> show 1+1 - etc, etc. + etc, etc. @@ -501,77 +501,72 @@ http://localhost:9999/?version=1.1&operation=scan&scanClause=dc.identifi Z> Z> scan @attr 1=dc_title communication Z> scan @attr 1=dc_identifier @attr 4=3 a - + &acro.z3950; search using server-side CQL conversion: - Z> format xml - Z> querytype cql - Z> elements dc - Z> - Z> find harry - Z> - Z> find dc.creator = the - Z> find dc.creator = the - Z> find dc.title = the - Z> - Z> find dc.description < the - Z> find dc.title > some - Z> - Z> find dc.identifier="http://resolver.caltech.edu/CaltechCSTR:1978.2276-tr-78" - Z> find dc.relation = something - + Z> format xml + Z> querytype cql + Z> elements dc + Z> + Z> find harry + Z> + Z> find dc.creator = the + Z> find dc.creator = the + Z> find dc.title = the + Z> + Z> find dc.description < the + Z> find dc.title > some + Z> + Z> find dc.identifier="http://resolver.caltech.edu/CaltechCSTR:1978.2276-tr-78" + Z> find dc.relation = something + - - &acro.z3950; scan using server side CQL conversion - - unfortunately, this will _never_ work as it is not supported by the - &acro.z3950; standard. - If you want to use scan using server side CQL conversion, you need to - make an SRW connection using yaz-client, or a - SRU connection using REST Web Services - any browser will do. - + + &acro.z3950; scan using server side CQL conversion - + unfortunately, this will _never_ work as it is not supported by the + &acro.z3950; standard. + If you want to use scan using server side CQL conversion, you need to + make an SRW connection using yaz-client, or a + SRU connection using REST Web Services - any browser will do. + - - All indexes defined by 'type="0"' in the - indexing style sheet must be searched using the '@attr 4=3' - structure attribute instruction. - + + All indexes defined by 'type="0"' in the + indexing style sheet must be searched using the '@attr 4=3' + structure attribute instruction. + - Notice that searching and scan on indexes - dc_contributor, dc_language, - dc_rights, and dc_source - might fail, simply because none of the records in the small example set - have these fields set, and consequently, these indexes might not - been created. + Notice that searching and scan on indexes + dc_contributor, dc_language, + dc_rights, and dc_source + might fail, simply because none of the records in the small example set + have these fields set, and consequently, these indexes might not + been created. - - - - - - + + + - -