isdn4k-utils/areacode/areacode.c

584 lines
19 KiB
C

/*****************************************************************************/
/* */
/* AREACODE.C */
/* */
/* Portable library module to search for an area code in a database. */
/* */
/* */
/* */
/* (C) 1996,97 Ullrich von Bassewitz */
/* Wacholderweg 14 */
/* D-70597 Stuttgart */
/* EMail: uz@musoftware.com */
/* */
/* */
/* This software is provided 'as-is', without any express or implied */
/* warranty. In no event will the authors be held liable for any damages */
/* arising from the use of this software. */
/* */
/* Permission is granted to anyone to use this software for any purpose, */
/* including commercial applications, and to alter it and redistribute it */
/* freely, subject to the following restrictions: */
/* */
/* 1. The origin of this software must not be misrepresented; you must not */
/* claim that you wrote the original software. If you use this software */
/* in a product, an acknowledgment in the product documentation would be */
/* appreciated but is not required. */
/* 2. Altered source versions must be plainly marked as such, and must not */
/* be misrepresented as being the original software. */
/* 3. This notice may not be removed or altered from any source */
/* distribution. */
/* */
/*****************************************************************************/
/*
* The code assumes
* - 8 bit bytes
* - unsigned long is 32 bit. This may be changed by #defining u32 to
* a data type that is an 32 bit unsigned when compiling this module.
* - ascii character set
*
* The code does *not* assume
* - a specific byte order. Currently the code autoadjusts to big or
* little endian data. If you have something more weird than that,
* you have to add conversion code.
*
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <limits.h>
#include "areacode.h"
/*****************************************************************************/
/* Externally visible data */
/*****************************************************************************/
/* The name of the areacode data file. The default is what is #defined as
* DATA_FILENAME. If this is not #defined, the default is "areacode.dat",
* which is probably not what you want. In the latter case set this to
* the correct filename *before* your first call to GetAreaCodeInfo.
*/
#ifdef DATA_FILENAME
char* acFileName = DATA_FILENAME;
#else
char* acFileName = "areacode.dat";
#endif
/* How much dynamic memory is GetAreaCodeInfo allowed to consume? Having less
* memory means more disk access and vice versa. The function does even work
* if you set this value to zero. For maximum performance, the function needs
* 4 byte per area code stored in the data file. The default is 32KB.
*/
unsigned long acMaxMem = 0x8000L;
/*****************************************************************************/
/* Data and structures */
/*****************************************************************************/
/* Define an unsigned quantity with 32 bits. Try to make some clever
* assumptions using the data from limits.h. This may break some older
* (non ISO compliant) compilers, but I can't help...
*/
#if !defined(u32) && defined(ULONG_MAX)
# if ULONG_MAX == 4294967295UL
# define u32 unsigned long
# endif
#endif
#if !defined(u32) && defined(UINT_MAX)
# if UINT_MAX == 4294967295UL
# define u32 unsigned
# endif
#endif
#if !defined(u32) && defined(USHRT_MAX)
# if USHRT_MAX == 4294967295UL
# define u32 unsigned short
# endif
#endif
#if !defined(u32)
# define u32 unsigned long
#endif
/* The version of the data file we support (major only, minor is ignored) */
#define acVersion 0x100
/* The magic words in little and big endian format */
#define LittleMagic 0x35465768L
#define BigMagic 0x68574635L
/* Defining the byte ordering */
#define boLittleEndian 0
#define boBigEndian 1
/* The byte order used in the file is little endian (intel) format */
#define FileByteOrder boLittleEndian
/* This is the header data of the data file. It is not used anywhere in
* the code, just have a look at it since it describes the layout in the
* file.
*/
typedef struct {
u32 Magic;
u32 Version; /* Version in hi word, build in lo word */
u32 Count;
u32 AreaCodeStart;
u32 NameIndexStart;
u32 NameStart;
u32 AreaCodeLenStart; /* Version 1.02 and higher */
} PrefixHeader;
/* This is what's really used: */
typedef struct {
/* The file we read from */
FILE* F;
/* Machine byte order */
unsigned ByteOrder;
/* Stuff from the file header */
unsigned Version;
unsigned Build;
u32 Count;
u32 AreaCodeStart;
u32 NameIndexStart;
u32 NameStart;
u32 AreaCodeLenStart;
/* Control data */
long First;
long Last;
u32* Table;
} AreaCodeDesc;
/* Translation table for translation CP850 --> ISO-8859-1. To save some space,
* the table covers only values > 127
*/
#ifdef CHARSET_ISO
static char ISOMap [128] = {
0xC7, 0xFC, 0xE9, 0xE2, 0xE4, 0xE0, 0xE5, 0xE7,
0xEA, 0xEB, 0xE8, 0xEF, 0xEE, 0xEC, 0xC4, 0xC5,
0xC9, 0xE6, 0xC6, 0xF4, 0xF6, 0xF2, 0xFC, 0xF9,
0xFF, 0xD6, 0xDC, 0xA2, 0xA3, 0xA5, 0x50, 0x66,
0xE1, 0xED, 0xF3, 0xFA, 0xF1, 0xD1, 0xAA, 0xBA,
0xBF, 0x2D, 0xAC, 0xC6, 0xBC, 0xA1, 0xAB, 0xBB,
0xFE, 0xFE, 0xFE, 0x7C, 0x2B, 0x2B, 0x2B, 0x2B,
0x2B, 0x2B, 0x7C, 0x2B, 0x2B, 0x2B, 0x2B, 0x2B,
0x2B, 0x2B, 0x2B, 0x2B, 0x2D, 0x2B, 0x2B, 0x2B,
0x2B, 0x2B, 0x2B, 0x2B, 0x2B, 0x2D, 0x2B, 0x2B,
0x2B, 0x2B, 0x2B, 0x2B, 0x2B, 0x2B, 0x2B, 0x2B,
0x2B, 0x2B, 0x2B, 0xFE, 0xFE, 0xFE, 0xFE, 0xFE,
0x61, 0xDF, 0x63, 0x70, 0x5A, 0x73, 0xB5, 0x74,
0x70, 0x54, 0x4F, 0x64, 0x38, 0x30, 0x65, 0x55,
0x3D, 0xB1, 0x3E, 0x3C, 0x66, 0x4A, 0xF7, 0x7E,
0xB0, 0xB7, 0xB7, 0x2F, 0x6E, 0xB2, 0xFE, 0xFF
};
#endif
/* Macro to convert from big endian to little endian format and vice versa.
* Beware: The macro evaluates its parameter more than once!
*/
#define _ByteSwap(__V) ((((__V) & 0x000000FF) << 24) | \
(((__V) & 0xFF000000) >> 24) | \
(((__V) & 0x0000FF00) << 8) | \
(((__V) & 0x00FF0000) >> 8))
/*****************************************************************************/
/* Helper functions */
/*****************************************************************************/
static u32 _ByteSwapIfNeeded (u32 D, unsigned ByteOrder)
/* Put the bytes into the correct order according to ByteOrder */
{
/* Swap bytes if needed and return the result */
switch (ByteOrder) {
case boLittleEndian: return D;
default: return _ByteSwap (D);
}
}
static u32 ByteSwapIfNeeded (u32 D, const AreaCodeDesc* Desc)
/* Put the bytes into the correct order according to ByteOrder in Desc */
{
/* Swap bytes if needed and return the result */
return _ByteSwapIfNeeded (D, Desc->ByteOrder);
}
static u32 _Load_u32 (FILE* F, unsigned ByteOrder)
/* Load an u32 from the current file position and swap it if needed */
{
u32 D;
/* Read the data from the file */
fread (&D, sizeof (D), 1, F);
/* Swap bytes if needed and return the result */
return _ByteSwapIfNeeded (D, ByteOrder);
}
static u32 Load_u32 (const AreaCodeDesc* Desc)
/* Load an u32 from the current file position and swap it if needed */
{
return _Load_u32 (Desc->F, Desc->ByteOrder);
}
static unsigned LoadFileHeader (AreaCodeDesc* Desc)
/* Load the header of a data file. Return one of the acXXX codes. */
{
u32 Version;
/* Load the magic word in the format used int the file (do not convert) */
u32 Magic = _Load_u32 (Desc->F, FileByteOrder);
/* Check what we got from the file, determine the byte order */
switch (Magic) {
case BigMagic:
Desc->ByteOrder = boBigEndian;
break;
case LittleMagic:
Desc->ByteOrder = boLittleEndian;
break;
default:
/* OOPS - the file is probably not a valid data file */
return acInvalidFile;
}
/* Now read the rest of the header data */
Version = Load_u32 (Desc);
Desc->Version = (Version >> 16);
Desc->Build = (Version & 0xFFFF);
Desc->Count = Load_u32 (Desc);
Desc->AreaCodeStart = Load_u32 (Desc);
Desc->NameIndexStart = Load_u32 (Desc);
Desc->NameStart = Load_u32 (Desc);
if (Desc->Version >= 0x101) {
/* Beginning with version 1.01 we have an additional table that is
* ignored by older versions.
*/
Desc->AreaCodeLenStart = Load_u32 (Desc);
}
/* Check for some error conditions */
if (ferror (Desc->F)) {
/* Some sort of file problem */
return acFileError;
} else if (feof (Desc->F) || Desc->Count == 0) {
/* This should not happen on a valid file */
return acInvalidFile;
} else if ((Desc->Version & 0xFF00) != acVersion) {
return acWrongVersion;
} else {
/* Data is sane */
return acOk;
}
}
static u32 EncodeNumber (const char* Phone)
/* Encode the number we got from the caller into the internally used BCD
* format.
*/
{
unsigned I;
unsigned Len;
u32 P = 0; /* Initialize to make gcc happy */
/* Get the amount of characters to convert */
Len = strlen (Phone);
if (Len > 8) {
Len = 8;
}
/* Convert the characters */
for (I = 0; I < Len; I++) {
P = (P << 4) | ((unsigned) ((unsigned char) Phone [I]) & 0x0F);
}
/* Fill the rest of the number with 0x0F */
I = 8 - Len;
while (I--) {
P = (P << 4) | 0x0F;
}
/* Done - return the result */
return P;
}
static u32 ReadPhone (const AreaCodeDesc* Desc, long Index)
/* Read the phone number that is located at the given index. If we have a
* part of the table already loaded into memory, use the memory copy, else
* read the phone number from disk.
*/
{
if (Desc->Table && Index >= Desc->First && Index <= Desc->Last) {
/* Use the already loaded table, but don't forget to swap bytes */
return ByteSwapIfNeeded (Desc->Table [Index - Desc->First], Desc);
} else {
/* Load the value from the file */
fseek (Desc->F, Desc->AreaCodeStart + Index * sizeof (u32), SEEK_SET);
return Load_u32 (Desc);
}
}
static void LoadTable (AreaCodeDesc* Desc)
/* Load a part of the table into memory */
{
u32 SpaceNeeded = (Desc->Last - Desc->First + 1) * sizeof (u32);
Desc->Table = malloc (SpaceNeeded);
if (Desc->Table == 0) {
/* Out of memory. There is no problem with this now since we do
* not really need the table in core memory (it speeds things up,
* that's all). In addition to that, the memory requirement halves
* with each iteration, so maybe we have more luck next time.
*/
return;
}
/* Seek to the correct position in the file */
fseek (Desc->F, Desc->AreaCodeStart + Desc->First * sizeof (u32), SEEK_SET);
/* Read the data */
fread (Desc->Table, SpaceNeeded, 1, Desc->F);
}
static unsigned char CalcCodeLen (u32 Code)
/* Calculate the length of a given (encoded) area code in characters */
{
u32 Mask;
unsigned char Len = 0;
for (Mask = 0xF0000000L; Mask; Mask >>= 4) {
if ((Code & Mask) != Mask) {
Len++;
} else {
break;
}
}
return Len;
}
/*****************************************************************************/
/* Code */
/*****************************************************************************/
unsigned GetAreaCodeInfo (acInfo* AC, const char* PhoneNumber)
/* Return - if possible - an information for the area code of the given number.
* The function returns one of the error codes defined in areacode.h. If the
* returned value is acOk, the AC struct is filled with the data of the
* area code found. If we did not have an error, but there is no area code
* that corresponds to the given number, the function returns acOk, but the
* AC struct is filled with an empty Info field and a AreaCodeLen of zero.
*/
{
u32 Phone; /* PhoneNumber encoded in BCD */
long First, Last, Current; /* For binary search */
u32 CurrentVal; /* The value at Table [Current] */
unsigned char AreaCodeLen; /* The length of the area code found */
unsigned char InfoLen; /* Length of info string */
unsigned RC = acOk; /* Result code of the function */
u32 Mask;
AreaCodeDesc Desc;
/* Clear the fields of the AC struct. Write a zero to the last field of
* Info - this field is never written to by the rest of the code. So by
* setting this to zero, we will assure a terminated string in case some
* problem prevents the code below from executing correctly.
*/
AC->Info [0] = '\0';
AC->Info [sizeof (AC->Info) - 1] = '\0';
AC->AreaCodeLen = 0;
/* If the number is empty, return immidiately */
if (strlen (PhoneNumber) == 0) {
return acOk;
}
/* Open the database file, check for errors */
Desc.F = fopen (acFileName, "rb");
if (Desc.F == 0) {
/* We had an error opening the file */
return acFileError;
}
/* Initialize descriptor data where needed */
Desc.Table = 0;
/* Read the header from the file */
RC = LoadFileHeader (&Desc);
if (RC != acOk) {
/* Wrong file or file read error */
goto ExitWithClose;
}
/* Convert the phone number into the internal representation */
Phone = EncodeNumber (PhoneNumber);
/* Add dead code to work around gcc warnings */
Current = 0;
CurrentVal = 0;
/* Now do a binary search over the data */
First = 0;
Last = (long) Desc.Count - 1;
while (First <= Last) {
/* If we don't have read the table into memory, check if we can do
* so now.
*/
if (Desc.Table == 0) {
u32 NeedMemory = (Last - First + 1) * sizeof (u32);
if (NeedMemory <= acMaxMem) {
/* Ok, the current part of the table is now small enough to
* load it into memory.
*/
Desc.First = First;
Desc.Last = Last;
LoadTable (&Desc);
}
}
/* Set current to mid of range */
Current = (Last + First) / 2;
/* Get the phone number from that place */
CurrentVal = ReadPhone (&Desc, Current);
/* Do a compare */
if (Phone > CurrentVal) {
First = Current + 1;
} else {
Last = Current - 1;
if (Phone == CurrentVal) {
/* Set the condition to terminate the loop */
First = Current;
}
}
}
/* First is the index of the area code, we eventually found. Put the index
* into Current and the value into CurrentVal.
*/
if (Current != First) {
Current = First;
CurrentVal = ReadPhone (&Desc, Current);
}
/*
* We may now delete an eventually allocated table space since it is
* not needed any more.
*/
free (Desc.Table);
Desc.Table = 0;
/* If Current points behind Last, we did not find anything */
if (Current >= (long) Desc.Count) {
/* Not found */
goto ExitWithClose;
}
/* Calculate the length of the area code */
AreaCodeLen = CalcCodeLen (CurrentVal);
/* Check if the Prefix is actually the first part of the phone number */
Mask = 0xFFFFFFFFL << ((8 - AreaCodeLen) * 4);
if ((Phone & Mask) != (CurrentVal & Mask)) {
/* They are different */
goto ExitWithClose;
}
/* Ok, we have now definitely found the code. Current is the index of the
* area code. Seek to the corresponding position in the name index, get
* the name position from there and seek to that place.
*/
fseek (Desc.F, Desc.NameIndexStart + Current * sizeof (u32), SEEK_SET);
fseek (Desc.F, Desc.NameStart + Load_u32 (&Desc), SEEK_SET);
/* Read the length of the name and add the trailing zero to the info
* field in the result struct.
*/
fread (&InfoLen, 1, 1, Desc.F);
AC->Info [InfoLen] = '\0';
/* Read the info into the result struct */
fread (AC->Info, 1, InfoLen, Desc.F);
#ifdef CHARSET_ISO
/* Translate the info to the ISO-8859-1 charset */
{
unsigned I;
for (I = 0; I < InfoLen; I++) {
unsigned char C = (unsigned char) AC->Info [I];
if (C >= 128) {
AC->Info [I] = ISOMap [C - 128];
}
}
}
#endif
/* If the areacode file is version 1.01 or greater, there is an additional
* table with the length of the "real" area code. Older versions use the
* length of the area code. This enables dividing of number spaces, e.g.
* 49212[0-8] = Solingen, 492129 = Haan. With the old data file, the
* areacode of Solingen would be 492120 but the official code is just
* 49212 which needs an additional length byte.
*/
if (Desc.Version >= 0x101) {
fseek (Desc.F, Desc.AreaCodeLenStart + Current, SEEK_SET);
fread (&AreaCodeLen, 1, sizeof (AreaCodeLen), Desc.F);
}
AC->AreaCodeLen = AreaCodeLen;
ExitWithClose:
/* Close the data file */
fclose (Desc.F);
/* Done, return the result */
return RC;
}