X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fcharconv.tcl;h=5732f8ddac6bea05a6f3b90ad79b0d34edebd27d;hb=3fec27d8561b3634b4c1c6e32f09395d9690509f;hp=f43c8152eaadaa6d19077844a299e16bf9b36ba5;hpb=8626b7019b5d3d9c9594f20025e97d06a2d590fc;p=yaz-moved-to-github.git diff --git a/src/charconv.tcl b/src/charconv.tcl index f43c815..5732f8d 100755 --- a/src/charconv.tcl +++ b/src/charconv.tcl @@ -1,19 +1,21 @@ -#!/bin/sh -# the next line restats using tclsh \ -exec tclsh "$0" "$@" -# -# $Id: charconv.tcl,v 1.12 2006-04-19 23:15:39 adam Exp $ +#!/usr/bin/tclsh +# $Id: charconv.tcl,v 1.21 2008-01-06 13:02:48 adam Exp $ proc usage {} { puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... } exit 1 } -proc preamble_trie {ofilehandle} { +proc preamble_trie {ofilehandle ifiles ofile} { set f $ofilehandle set totype {unsigned } + puts $f "/** \\file $ofile" + puts $f " \\brief Character conversion, generated from [lindex $ifiles 0]" + puts $f "" + puts $f " Generated automatically by charconv.tcl" + puts $f "*/" puts $f "\#include " puts $f " struct yaz_iconv_trie_flat { @@ -142,7 +144,18 @@ proc ins_trie_r {from to combining codename this} { set trie($this,type) f } if {$trie($this,type) == "f"} { - lappend trie($this,content) [list $from $to $combining $codename] + set dup 0 + if {[info exists trie($this,content)]} { + foreach e $trie($this,content) { + set efrom [lindex $e 0] + if { $efrom == $from } { + set dup 1 + } + } + } + if { $dup == 0 } { + lappend trie($this,content) [list $from $to $combining $codename] + } # split ? if {[llength $trie($this,content)] > $trie(split)} { @@ -160,9 +173,11 @@ proc ins_trie_r {from to combining codename this} { } ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch) } else { - set trie($this,to,$ch) $to - set trie($this,combining,$ch) $combining - set trie($this,codename,$ch) $codename + if {![info exist trie($this,to,$ch)]} { + set trie($this,to,$ch) $to + set trie($this,combining,$ch) $combining + set trie($this,codename,$ch) $codename + } } } } @@ -261,34 +276,26 @@ proc readfile {fname ofilehandle prefix omits reverse} { set marc_lines 0 set ucs_lines 0 set utf_lines 0 + set altutf_lines 0 set codename_lines 0 set lineno 0 set f [open $fname r] set tablenumber x set combining 0 set codename {} + set altutf {} while {1} { incr lineno set cnt [gets $f line] if {$cnt < 0} { break } - if {[regexp {} $line s]} { - reset_trie - set trie(prefix) "${prefix}" - } elseif {[regexp {} $line s]} { + if {[regexp {} $line s]} { dump_trie $ofilehandle - } elseif {[regexp {([0-9A-Fa-f]*)} $line s hex ucs]} { - ins_trie $hex $ucs $combining {} - unset hex - } elseif {[regexp {} $line s]} { - if {[lsearch $omits $tablenumber] == -1} { - dump_trie $ofilehandle - } } elseif {[regexp {} $line s]} { if {[string length $ucs]} { if {$reverse} { @@ -298,6 +305,7 @@ proc readfile {fname ofilehandle prefix omits reverse} { # puts "ins_trie $hex $marc ins_trie $hex $marc $combining $codename unset hex + } else { for {set i 0} {$i < [string length $marc]} {incr i 2} { lappend hex [string range $marc $i [expr $i+1]] @@ -307,10 +315,20 @@ proc readfile {fname ofilehandle prefix omits reverse} { unset hex } } + if {$reverse && [string length $marc]} { + for {set i 0} {$i < [string length $altutf]} {incr i 2} { + lappend hex [string range $altutf $i [expr $i+1]] + } + if {[info exists hex]} { + ins_trie $hex $marc $combining $codename + unset hex + } + } set marc {} set uni {} set codename {} set combining 0 + set altutf {} } elseif {[regexp {([0-9A-Fa-f]*)} $line s marc]} { incr marc_lines } elseif {[regexp {(.*)} $line s codename]} { @@ -331,6 +349,8 @@ proc readfile {fname ofilehandle prefix omits reverse} { incr ucs_lines } elseif {[regexp {([0-9A-Fa-f]*)} $line s utf]} { incr utf_lines + } elseif {[regexp {([0-9A-Fa-f]*)} $line s altutf]} { + incr altutf_lines } } close $f @@ -390,7 +410,7 @@ if {![info exists ifiles]} { } set ofilehandle [open $ofile w] -preamble_trie $ofilehandle +preamble_trie $ofilehandle $ifiles $ofile foreach ifile $ifiles { readfile $ifile $ofilehandle $prefix $omits $reverse_map