Omit sort key by default in yaz-icu's output
authorAdam Dickmeiss <adam@indexdata.dk>
Tue, 13 Jan 2009 14:59:17 +0000 (15:59 +0100)
committerAdam Dickmeiss <adam@indexdata.dk>
Tue, 13 Jan 2009 14:59:17 +0000 (15:59 +0100)
Omit sort key by default in yaz-icu's output. It may be enabled with
option -s. The sort key was used in previous tests. But since it is
encoded differently between ICU version we omit it in our tests.

doc/yaz-icu-man.xml
test/tsticu-0.output
test/tsticu-1.output
util/yaz-icu.c

index f488504..110bd65 100644 (file)
@@ -30,6 +30,7 @@
    <arg choice="opt" rep="repeat">commands</arg>
    <arg>-c <replaceable>config</replaceable></arg>
    <arg>-p <replaceable>opt</replaceable></arg>
    <arg choice="opt" rep="repeat">commands</arg>
    <arg>-c <replaceable>config</replaceable></arg>
    <arg>-p <replaceable>opt</replaceable></arg>
+   <arg>-s</arg>
    <arg>-x</arg>
   </cmdsynopsis>
  </refsynopsisdiv>
    <arg>-x</arg>
   </cmdsynopsis>
  </refsynopsisdiv>
    </varlistentry>
 
    <varlistentry>
    </varlistentry>
 
    <varlistentry>
-    <term>-x <replaceable>config</replaceable></term>
+    <term>-s</term>
+    <listitem><para>
+      Specifies that output should include sort key as well. Note that
+      sort key differs between ICU versions.
+     </para></listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>-x</term>
     <listitem><para>
       Specifies that output should be XML based rather than
       "text" based.
     <listitem><para>
       Specifies that output should be XML based rather than
       "text" based.
index e00ae22..2a9dc2e 100644 (file)
@@ -1,7 +1,7 @@
-1 1 'børn' 'Børn' '+EKC\x01\x85\xA5\x06\x01\x09'
-2 2 'le' 'Le' '?1\x01\x06\x01\x06'
-3 2 'carré' 'Carré' '-)KK1\x01\x82\x8D\x01\x0A'
-4 3 'le' 'Le' '?1\x01\x06\x01\x06'
-5 3 'carre' 'Carre' '-)KK1\x01\x09\x01\x09'
-6 4 'le' 'Le' '?1\x01\x06\x01\x06'
-7 4 'carré' 'Carré' '-)KK1\x01\x82\x8D\x01\x0A'
+1 1 'børn' 'Børn'
+2 2 'le' 'Le'
+3 2 'carré' 'Carré'
+4 3 'le' 'Le'
+5 3 'carre' 'Carre'
+6 4 'le' 'Le'
+7 4 'carré' 'Carré'
index 3989b6a..78b2967 100644 (file)
@@ -1,7 +1,7 @@
-1 1 'børn' 'Børn' '+EKC\x01\x85\xA5\x06\x01\x09'
-2 2 'le' 'Le' '?1\x01\x06\x01\x06'
-3 2 'carre' 'Carre' '-)KK1\x01\x09\x01\x09'
-4 3 'le' 'Le' '?1\x01\x06\x01\x06'
-5 3 'carre' 'Carre' '-)KK1\x01\x09\x01\x09'
-6 4 'le' 'Le' '?1\x01\x06\x01\x06'
-7 4 'carre' 'Carre' '-)KK1\x01\x09\x01\x09'
+1 1 'børn' 'Børn'
+2 2 'le' 'Le'
+3 2 'carre' 'Carre'
+4 3 'le' 'Le'
+5 3 'carre' 'Carre'
+6 4 'le' 'Le'
+7 4 'carre' 'Carre'
index 044afcb..c510617 100644 (file)
@@ -30,6 +30,7 @@ static struct config_t {
     char conffile[1024];
     char print[1024];
     int xmloutput;
     char conffile[1024];
     char print[1024];
     int xmloutput;
+    int sortoutput;
     yaz_icu_chain_t chain;
     FILE * infile;
     FILE * outfile;
     yaz_icu_chain_t chain;
     FILE * infile;
     FILE * outfile;
@@ -43,6 +44,7 @@ void print_option_error(const struct config_t *p_config)
     fprintf(stderr, "yaz-icu\n"
             "   [-c (path/to/config/file.xml)]\n"
             "   [-p (a|c|l|t)] print ICU info \n"
     fprintf(stderr, "yaz-icu\n"
             "   [-c (path/to/config/file.xml)]\n"
             "   [-p (a|c|l|t)] print ICU info \n"
+            "   [-s] Show sort normalization key\n"
             "   [-x] XML output\n"
             "\n"
             "Examples:\n"
             "   [-x] XML output\n"
             "\n"
             "Examples:\n"
@@ -71,13 +73,14 @@ void read_params(int argc, char **argv, struct config_t *p_config)
     p_config->conffile[0] = 0;
     p_config->print[0] = 0;
     p_config->xmloutput = 0;
     p_config->conffile[0] = 0;
     p_config->print[0] = 0;
     p_config->xmloutput = 0;
+    p_config->sortoutput = 0;
     p_config->chain = 0;
     p_config->infile = stdin;
     p_config->outfile = stdout;
     
     /* set up command line parameters */
     
     p_config->chain = 0;
     p_config->infile = stdin;
     p_config->outfile = stdout;
     
     /* set up command line parameters */
     
-    while ((ret = options("c:p:x", argv, argc, &arg)) != -2)
+    while ((ret = options("c:p:xs", argv, argc, &arg)) != -2)
     {
         switch (ret)
         {
     {
         switch (ret)
         {
@@ -87,10 +90,14 @@ void read_params(int argc, char **argv, struct config_t *p_config)
         case 'p':
             strcpy(p_config->print, arg);
             break;
         case 'p':
             strcpy(p_config->print, arg);
             break;
+        case 's':
+            p_config->sortoutput = 1;
+            break;
         case 'x':
             p_config->xmloutput = 1;
             break;
         default:
         case 'x':
             p_config->xmloutput = 1;
             break;
         default:
+            printf("Got %d\n", ret);
             print_option_error(p_config);
         }
     }
             print_option_error(p_config);
         }
     }
@@ -482,20 +489,31 @@ static void process_text_file(const struct config_t *p_config)
                     /* should XML encode this. Bug #1902 */
                     fprintf(config.outfile, 
                             "<token id=\"%lu\" line=\"%lu\""
                     /* should XML encode this. Bug #1902 */
                     fprintf(config.outfile, 
                             "<token id=\"%lu\" line=\"%lu\""
-                            " norm=\"%s\" display=\"%s\" sortkey=\"%s\"/>\n",
+                            " norm=\"%s\" display=\"%s\"",
                             token_count,
                             line_count,
                             icu_chain_token_norm(config.chain),
                             token_count,
                             line_count,
                             icu_chain_token_norm(config.chain),
-                            icu_chain_token_display(config.chain),
-                            wrbuf_cstr(sw));
+                            icu_chain_token_display(config.chain));
+                    if (p_config->sortoutput)
+                    {
+                        fprintf(config.outfile, " sortkey=\"%s\"",
+                                wrbuf_cstr(sw));
+                    }
+                    fprintf(config.outfile, "/>\n");
                 }
                 else
                 }
                 else
-                    fprintf(config.outfile, "%lu %lu '%s' '%s' '%s'\n",
+                {
+                    fprintf(config.outfile, "%lu %lu '%s' '%s'",
                             token_count,
                             line_count,
                             icu_chain_token_norm(config.chain),
                             token_count,
                             line_count,
                             icu_chain_token_norm(config.chain),
-                            icu_chain_token_display(config.chain),
-                            wrbuf_cstr(sw));
+                            icu_chain_token_display(config.chain));
+                    if (p_config->sortoutput)
+                    {
+                        fprintf(config.outfile, " '%s'", wrbuf_cstr(sw));
+                    }
+                    fprintf(config.outfile, "\n");
+                }
             }
             wrbuf_destroy(sw);
         }
             }
             wrbuf_destroy(sw);
         }
@@ -546,6 +564,7 @@ int main(int argc, char **argv)
 /*
  * Local variables:
  * c-basic-offset: 4
 /*
  * Local variables:
  * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
  * indent-tabs-mode: nil
  * End:
  * vim: shiftwidth=4 tabstop=8 expandtab
  * indent-tabs-mode: nil
  * End:
  * vim: shiftwidth=4 tabstop=8 expandtab