X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Fcharconv.tcl;h=b37b0d80bd08aaa756bf97ab49db17c01248588f;hb=765b94caaa93566a9792019ec5d2bf56fc8100a3;hp=36fed01247da50ad6c8e7da8b9b814f8c2ca6835;hpb=9f5de9011e59569e0bae5b484defe53bda38a457;p=yaz-moved-to-github.git diff --git a/src/charconv.tcl b/src/charconv.tcl index 36fed01..b37b0d8 100755 --- a/src/charconv.tcl +++ b/src/charconv.tcl @@ -1,8 +1,8 @@ #!/bin/sh -# the next line restats using tclsh \ +# the next line restarts using tclsh \ if [ -f /usr/local/bin/tclsh8.4 ]; then exec tclsh8.4 "$0" "$@"; else exec tclsh "$0" "$@"; fi # -# $Id: charconv.tcl,v 1.15 2006-05-22 19:08:38 adam Exp $ +# $Id: charconv.tcl,v 1.20 2007-09-22 18:55:02 adam Exp $ proc usage {} { puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... } @@ -147,7 +147,18 @@ proc ins_trie_r {from to combining codename this} { set trie($this,type) f } if {$trie($this,type) == "f"} { - lappend trie($this,content) [list $from $to $combining $codename] + set dup 0 + if {[info exists trie($this,content)]} { + foreach e $trie($this,content) { + set efrom [lindex $e 0] + if { $efrom == $from } { + set dup 1 + } + } + } + if { $dup == 0 } { + lappend trie($this,content) [list $from $to $combining $codename] + } # split ? if {[llength $trie($this,content)] > $trie(split)} { @@ -165,9 +176,11 @@ proc ins_trie_r {from to combining codename this} { } ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch) } else { - set trie($this,to,$ch) $to - set trie($this,combining,$ch) $combining - set trie($this,codename,$ch) $codename + if {![info exist trie($this,to,$ch)]} { + set trie($this,to,$ch) $to + set trie($this,combining,$ch) $combining + set trie($this,codename,$ch) $codename + } } } } @@ -266,34 +279,26 @@ proc readfile {fname ofilehandle prefix omits reverse} { set marc_lines 0 set ucs_lines 0 set utf_lines 0 + set altutf_lines 0 set codename_lines 0 set lineno 0 set f [open $fname r] set tablenumber x set combining 0 set codename {} + set altutf {} while {1} { incr lineno set cnt [gets $f line] if {$cnt < 0} { break } - if {[regexp {} $line s]} { - reset_trie - set trie(prefix) "${prefix}" - } elseif {[regexp {} $line s]} { + if {[regexp {} $line s]} { dump_trie $ofilehandle - } elseif {[regexp {([0-9A-Fa-f]*)} $line s hex ucs]} { - ins_trie $hex $ucs $combining {} - unset hex - } elseif {[regexp {} $line s]} { - if {[lsearch $omits $tablenumber] == -1} { - dump_trie $ofilehandle - } } elseif {[regexp {} $line s]} { if {[string length $ucs]} { if {$reverse} { @@ -303,6 +308,7 @@ proc readfile {fname ofilehandle prefix omits reverse} { # puts "ins_trie $hex $marc ins_trie $hex $marc $combining $codename unset hex + } else { for {set i 0} {$i < [string length $marc]} {incr i 2} { lappend hex [string range $marc $i [expr $i+1]] @@ -312,10 +318,20 @@ proc readfile {fname ofilehandle prefix omits reverse} { unset hex } } + if {$reverse && [string length $marc]} { + for {set i 0} {$i < [string length $altutf]} {incr i 2} { + lappend hex [string range $altutf $i [expr $i+1]] + } + if {[info exists hex]} { + ins_trie $hex $marc $combining $codename + unset hex + } + } set marc {} set uni {} set codename {} set combining 0 + set altutf {} } elseif {[regexp {([0-9A-Fa-f]*)} $line s marc]} { incr marc_lines } elseif {[regexp {(.*)} $line s codename]} { @@ -336,6 +352,8 @@ proc readfile {fname ofilehandle prefix omits reverse} { incr ucs_lines } elseif {[regexp {([0-9A-Fa-f]*)} $line s utf]} { incr utf_lines + } elseif {[regexp {([0-9A-Fa-f]*)} $line s altutf]} { + incr altutf_lines } } close $f