Updates for destination database generation and related tools.

Makefile.in  With cdb as database format the dest.cdb is now rebuild
             automatically when the input files change (targets data
             and alldata).  New target dest.gdbm.

i4lconf.pm   Locates the countryfile country(-de).dat in order to
             handle different installation schemes for that file.

pp_rate      Use of i4lconf.pm if present.  New command `i' at
             interactive prompt for unknown destinations names.
             Comments at the end of A: lines are now preserved.

makedest     Use of i4lconf.pm if present.
             Fixed function of the -g command line switch.

wld.pm       Speed enhancements for weighted Levenshtein distance.

Note: i4lconf.pm, pp_rate and wld.pm originate unchanged from the
rates4linux CVS where there are needed to build rate-files like
rate-de.dat independent from the isdnlog sources.
This commit is contained in:
tobiasb 2003-07-25 21:23:15 +00:00
parent 6d14cc8aad
commit 74d853c0df
5 changed files with 231 additions and 45 deletions

View File

@ -9,7 +9,7 @@ SHELL = /bin/sh
CFLAGS = -Wall -g
INCLUDES =
LDFLAGS = @DBMLIB@
DBEXT = @DBEXT@
export DBEXT = @DBEXT@
RDBEXT = @RDBEXT@
CDBEXTRALIBS = @CDBEXTRALIBS@
PROGRAM = dest
@ -70,7 +70,7 @@ clean:
rm -f *.o *~ $(PROGRAM)
dataclean:
rm -f *.gdbm *.db *.cdb
rm -f *.gdbm *.db *.cdb *.cdb.dump
distclean: clean dataclean
rm -f config.status config.cache config.log *.man Makefile
@ -81,12 +81,34 @@ ifeq ($(CONFIG_ISDN_LOG_CC_EN),y)
DEST_LANG= -en
endif
data:
ifeq ($(DBEXT),.cdb)
@if [ -e dest.cdb.dump ] ; then \
rm dest.cdb.dump ; \
fi
# set source dependencies for destination database
# works only for data when $(DBEXT) = .cdb, otherwhise existing destination
# database blocks rebuilding and requires a manual make dataclean before.
DEST_SRC_GLOBAL := ../../country-de.dat cities.dat
DEST_SRC_ALLNATIONAL := $(shell ls ../zone/??/code)
DEST_SRC_NATIONAL :=
ifeq ($(CONFIG_ISDN_LOG_DEST_AT),y)
DEST_SRC_NATIONAL += ../zone/at/code
endif
ifeq ($(CONFIG_ISDN_LOG_DEST_DE),y)
DEST_SRC_NATIONAL += ../zone/de/code
endif
ifeq ($(CONFIG_ISDN_LOG_DEST_NL),y)
DEST_SRC_NATIONAL += ../zone/nl/code
endif
ifeq ($(CONFIG_ISDN_LOG_DEST_CH),y)
DEST_SRC_NATIONAL += ../zone/ch/code
endif
ifeq ($(DBEXT),.cdb)
data: dest.cdb
# allow explicit request for dest.gbdm with configured DBEXT=".cdb"
dest.gdbm: $(DEST_SRC_GLOBAL) $(DEST_SRC_NATIONAL)
$(MAKE) DBEXT=".gdbm" data
else
data: $(DEST_SRC_GLOBAL) $(DEST_SRC_NATIONAL)
@if [ ! -e dest$(DBEXT) ] ; then \
./makedest -v $(DEST_LANG) ; \
./makedest -v -gcities.dat -a $(DEST_LANG) ; \
@ -103,16 +125,36 @@ endif
./makedest -v -a ch || true; \
fi ; \
fi
ifeq ($(DBEXT),.cdb)
@if [ ! -e dest.cdb ] ; then \
echo "" >> dest.cdb.dump ; \
echo Running cdbmake ; \
../cdb/cdbmake < dest.cdb.dump dest.cdb dest.temp ; \
rm dest.cdb.dump ;\
fi
endif
alldata:
dest.cdb: $(DEST_SRC_GLOBAL) $(DEST_SRC_NATIONAL)
@echo "Building destination database $@ ..."
@if [ -e dest.cdb.dump ] ; then \
rm dest.cdb.dump ; \
fi;
@if [ -e dest.cdb ] ; then \
rm dest.cdb ; \
fi;
./makedest -v $(DEST_LANG)
./makedest -v -gcities.dat -a $(DEST_LANG)
ifeq ($(CONFIG_ISDN_LOG_DEST_AT),y)
./makedest -v -a at
endif
ifeq ($(CONFIG_ISDN_LOG_DEST_DE),y)
./makedest -v -a de
endif
ifeq ($(CONFIG_ISDN_LOG_DEST_NL),y)
./makedest -v -a nl
endif
ifeq ($(CONFIG_ISDN_LOG_DEST_CH),y)
./makedest -v -a ch || true
endif
@echo "" >> dest.cdb.dump
../cdb/cdbmake < dest.cdb.dump dest.cdb dest.temp
@rm dest.cdb.dump
@echo "$@ complete."
alldata: $(DEST_SRC_GLOBAL) $(DEST_SRC_ALLNATIONAL)
ifeq ($(DBEXT),.cdb)
if [ -e dest.cdb.dump ] ; then \
rm dest.cdb.dump ; \

View File

@ -0,0 +1,88 @@
=pod
$Id: i4lconf.pm,v 1.1 2003/07/25 21:23:15 tobiasb Exp $
Copyright 2002..2003 Tobias Becker <i4l-projects@talypso.de>
License terms of isdnlog apply.
locate_countryfile() returns the location of the system's countryfile.
This file is used by pp_rate for the preparation of rate-files und by
makedest for the generation of the destination database. Usually the
location is specified with the COUNTRYFILE keyword in the ISDNLOG
section of /etc/isdn/isdn.conf. If this fails, locate_countryfile()
will look for a file named country.dat or country-de.dat in some
usual directories. If this fails too, an empty string will be returned.
Please note, that locate_countryfile() does not prove whether the
returned file is a valid countryfile or not.
=cut
package i4lconf;
use strict;
use vars qw($VERSION @ISA @EXPORT);
$VERSION=1.01;
require Exporter;
@ISA=qw(Exporter);
@EXPORT=qw(locate_countryfile);
sub locate_countryfile(;$) {
my $countryfile = "";
# Step 0 - check filename from caller if submitted
if ( defined($_[0]) ) {
$countryfile = $_[0];
if ( -e $countryfile and -f _ and -r _ and -s _ ) {
return $countryfile;
}
}
# Step 1 - /etc/isdn/isdn.conf
if ( open(CONF, "</etc/isdn/isdn.conf") ) {
my ($line, $section, $keyword, $content);
while ($line = <CONF>) {
chomp $line;
$line =~ s/(\s*[^\\]|^)\#.*//;
next unless ($line);
if ( $line =~ /\s*\[([^\]]+)\]/ ) {
$section = uc($1);
next;
}
next unless ($section eq "ISDNLOG");
if ( $line =~ /^\s*(\S+)\s*=\s*(.*)\s*$/ ) {
$keyword = uc($1);
$content = $2;
if ($keyword eq "COUNTRYFILE") {
$countryfile = $content;
last;
}
}
}
close(CONF);
}
if ( -e $countryfile and -f _ and -r _ and -s _ ) {
return $countryfile;
}
# step 2 - searching
$countryfile = "";
my @dirs = ( '/usr/lib/isdn', # default installation
'/usr/share/isdn', # seen on debian
'/usr/local/lib/isdn', # my choose
'../..' # the isdnlog directory of the i4l-CVS
);
my @names = ( 'country.dat', # canonical name since 2000
'country-de.dat' # former name, still used in source
);
my ($name, $dir);
foreach $dir (@dirs) {
foreach $name (@names) {
$countryfile = "$dir/$name";
if ( -e $countryfile and -f _ and -r _ and -s _ ) {
return $countryfile;
}
}
}
# nothing found at all
$countryfile = "";
return $countryfile;
} # end of sub locate_countryfile
1;

View File

@ -1,12 +1,16 @@
#! /usr/bin/perl
# make dest.gdbm
# necessary parts: /usr/lib/isdn/country-de.dat
# necessary parts: ../../country-de.dat
# or installed countryfile, e. g. /usr/lib/isdn/country.dat
# optional parts: zone/CC/code
#
# usage:
# makedest [-en] [-v] [-a] [cc ...] [-cCC file...] [-gFile]
#
# When DBEXT in the environment is set to something other than ".cdb"
# the old gdbm format is used instead of the current standard cdb.
#
# these entries are written as 0-terminated strings
# vErSiO\0 => 1.0 int[ cc...]
#
@ -23,7 +27,8 @@
package main;
use wld;
BEGIN {
if (-e "../cdb/i4l_cdb.c") {
if ( (! exists $ENV{DBEXT} or $ENV{DBEXT} eq ".cdb") and
-e "../cdb/i4l_cdb.c" ) {
@AnyDBM_File::ISA = qw( CDB_File_Dump GDBM_File NDBM_File DB_File );
}
else {
@ -34,7 +39,12 @@ use AnyDBM_File;
use POSIX;
use strict;
$|=1;
my $co_dat = '/usr/lib/isdn/country-de.dat';
# locate countryfile (country.dat or country-de.dat)
my $co_dat = '';
eval 'use i4lconf; $co_dat = locate_countryfile();';
unless ($co_dat) { # old behaviour as last ressort
$co_dat = '/usr/lib/isdn/country-de.dat';
}
my $dest_gdbm = './dest.gdbm';
if (@AnyDBM_File::ISA eq @CDB_File_Dump::ISA) {
$dest_gdbm = './dest.cdb.dump';
@ -90,7 +100,8 @@ if (!$tied) {
unless($append) {
$db{$vers}="Dest $VERSION int\x00";
write_global($co_dat);
# priority for country-de.dat in source tree against installed countryfile
write_global($co_dat, '../../country-de.dat');
}
foreach $C (@ARGV) {
@ -99,12 +110,17 @@ foreach $C (@ARGV) {
untie(%db);
print "End.\n" if($verbose);
sub write_global {
# $co_dat can be countryfile or global file (-gfile)
my($co_dat) = $_[0];
print "Adding global $co_dat...\n" if($verbose);
open(IN, '../../country-de.dat') ||
open(IN, $co_dat) ||
die("Cant find country-de.dat");
if ( $_[1] and open(IN, $_[1]) ) {
$co_dat = $_[1];
}
else {
open(IN, $co_dat) || die("Cant find $co_dat");
}
print "Adding global $co_dat ...\n" if($verbose);
$i=0;
while (<IN>) {
print "$i\r" if (++$i % 10==0 && $verbose && $outistty);

View File

@ -1,11 +1,26 @@
#! /usr/bin/perl
# This version of pp_rate originates from rates4linux:
# $Id: pp_rate,v 1.6 2003/07/25 21:23:15 tobiasb Exp $
# The rates4linux homepage: http://www.sourceforge.net/projects/rates4linux/
# It bases on Rev. 1.5 of isdn4k-utils/isdnlog/tools/dest/pp_rate
# in the CVS at http://www.isdn4linux.de.
#
use wld;
use strict;
use IO::File;
$|=1;
use vars qw( %names $COUNTRY %myalias );
use vars qw( %names $COUNTRY %myalias $co_dat );
$COUNTRY = '../country';
$COUNTRY = '' unless (-x $COUNTRY); # disable country query if not available
# unique global determination of countryfile to use:
# The guessed filename country.dat works used within rates4linux.
$co_dat = '';
eval 'use i4lconf; $co_dat = locate_countryfile(\'country.dat\');';
unless ($co_dat) { # old behaviour as last ressort
$co_dat = '/usr/lib/isdn/country-de.dat';
}
&get_country;
&get_alias;
@ -24,7 +39,6 @@ sub get_country {
$names{lc $a}=$key;
}
}
my $co_dat = '/usr/lib/isdn/country-de.dat';
open(IN,$co_dat) || die("Cant read $co_dat");
while (<IN>) {
chomp;
@ -44,7 +58,7 @@ sub get_country {
}
&add1;
close IN;
print "$nn Countrys $na Aliases loaded\n";
print "$nn Countrys $na Aliases loaded from $co_dat\n";
}
sub get_alias {
@ -126,7 +140,11 @@ ragain:
}
elsif (/^A:(.*)/) {
$a=$1;
$a =~ s/\s*#.*$//;
my $acmt = '';
if ( $a =~ s/(\s*#.*$)// ) { # preserve comments in A: lines
$acmt = $1;
$acmt = '' unless ( $acmt =~/#\s*\S/ ); # only non-empty
}
$a =~ s/[,\s]+$//;
@a=split(/\s*,\s*/, $a);
foreach $c (@a) {
@ -155,11 +173,13 @@ again:
}
else {
foreach $name (keys(%names)) {
if (($r=wld($name,$c)) < $m) {
if (($r=wld($name,$c,$m)) < $m) {
$m=$r; $tf=$name;
print "\r'$c' searching (d<$m) ..";
last if($m==0);
}
}
print "\r'$c'";
}
if ($tf && $m<=1) {
push(@keys,$names{$tf});
@ -168,8 +188,9 @@ again:
print " -> $tf\n";
}
else {
choice:
my($x);
print "\nLine $l: $c => $tf ($m)\n ? [j,q,[+|=]...,/] : ";
print "\nLine $l: $c => $tf ($m)\n ? [j,q,i,[+|=]...,/] : ";
$x = <STDIN>;
chomp($x);
if ($x =~ /^=(.*)/) {
@ -181,8 +202,8 @@ again:
goto again;
}
elsif ($x =~ /^\/(.*)/) {
print `grep -3 -i $1 /usr/lib/isdn/country-de.dat`;
goto again;
print `grep -3 -i $1 $co_dat`;
goto choice;
}
if ($x eq 'j') {
$c = $tf;
@ -193,17 +214,22 @@ again:
elsif ($x eq 'h') {
print "j => Vorschlag annehmen\n";
print "q => abbrechen\n";
print "n => unbekannt ignorieren (gilt dann fuer die gesamte Datei)\n";
print "n => [ohne Funktion] unbekannt ignorieren (gilt dann fuer die gesamte Datei)\n";
print "i => Eintrag ignorieren\n";
print "+xx => xx an unbekannt anhaengen\n";
print "= SO => SO uebernehmen\n";
print "= => unbekannt uebernehmen\n";
print "/xx => in country-de.dat nach xx greppen\n";
$co_dat =~ m{/([^/]*)$};
print "/xx => in $1 nach xx greppen\n";
print "xx => xx ausprobieren\n";
goto again;
goto choice;
}
elsif ($x eq 'q') {
exit;
}
elsif ($x eq 'i') {
next;
}
else {
$c = $x;
}
@ -211,7 +237,7 @@ again:
}
} # else found
} # foreach
$outf[$wrinclude]->print ("A:", join(',',@keys),"\n") || die("can' write");
$outf[$wrinclude]->print ("A:", join(',',@keys),"$acmt\n") || die("can' write");
} # if A
else {
$outf[$wrinclude]->print( $_);

View File

@ -1,41 +1,55 @@
package wld;
use strict;
use integer;
use vars qw($VERSION @ISA @EXPORT);
$VERSION=1.0;
$VERSION=1.1;
require Exporter;
@ISA=qw(Exporter);
@EXPORT=qw(wld);
sub min3 {
my ($x, $y, $z)=@_;
$y = $x if ($x lt $y);
$z = $y if ($y lt $z);
$y = $x if ($x < $y);
$z = $y if ($y < $z);
$z;
}
sub wld { #/* weighted Levenshtein distance */
my ($needle, $haystack) = @_;
# $dmax is the smallest distance already found, only
# smaller distances are computed, otherwise a number
# equal or less than the real distance is returned
my ($needle, $haystack, $dmax) = @_;
$dmax = 98 unless (defined($dmax)); # default when 3rd arg is missing
my($i, $j);
my $l1 = length($needle);
my $l2 = length($haystack);
my @s1 = (0, unpack('A' x $l1, $needle));
my @s2 = (0, unpack('A' x $l2, $haystack));
my $ldiff = abs($l1-$l2);
# the distance can not be less than the length difference
return $ldiff unless ($ldiff < $dmax);
my @dw;
my ($WMAX,$P,$Q,$R);
$WMAX=$l1>$l2?$l1:$l2;
my $imin; # minimum value of column $i, also lower limit of result
my ($P,$Q,$R);
$P=1;
$Q=1;
$R=1;
$dw[0][0]=0;
for ($j=1; $j<=$WMAX; $j++) {
for ($j=1; $j<=$l2; $j++) {
$dw[0][$j]=$dw[0][$j-1]+$Q;
}
for ($i=1; $i<=$WMAX; $i++) {
for ($i=1; $i<=$l1; $i++) {
$dw[$i][0]=$dw[$i-1][0]+$R;
}
for ($i=1; $i<=$l1; $i++) {
$imin = $dw[$i][0];
for($j=1; $j<=$l2; $j++) {
$dw[$i][$j]=&min3($dw[$i-1][$j-1]+((substr($needle,$i-1,1) eq
substr($haystack,$j-1,1))?0:$P),$dw[$i][$j-1]+$Q,$dw[$i-1][$j]+$R);
$dw[$i][$j] = &min3( $dw[$i-1][$j-1] + ( ($s1[$i] eq $s2[$j])?0:$P ),
$dw[$i][$j-1]+$Q, $dw[$i-1][$j]+$R );
$imin = $dw[$i][$j] if ($dw[$i][$j]<$imin);
}
# abort if complete column makes results less than $dmax impossible
return ($imin) unless ($imin<$dmax);
}
return($dw[$l1][$l2]);
}