/*
 * lev100ph.c
 * ----------
 *
 * Phonetically extended Levenshtein function and the like.
 *
 * Copyright (c):
 * 2007-2008:  Joerg MICHAEL, Adalbert-Stifter-Str. 11, 30655 Hannover, Germany
 *
 * SCCS: @(#) lev100ph.c  1.2  2008-11-30
 *
 * This file is subject to the GNU Lesser General Public License (LGPL)
 * (formerly known as GNU Library General Public Licence)
 * as published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 * This file is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Library General Public License
 * along with this file; if not, write to the
 * Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * Actually, the LGPL is __less__ restrictive than the better known GNU General
 * Public License (GPL). See the GNU Library General Public License or the file
 * LIB_GPLA.TXT for more details and for a DISCLAIMER OF ALL WARRANTIES.
 *
 * There is one important restriction: If you modify this program in any way
 * (e.g. modify the underlying logic or translate this program into another
 * programming language), you must also release the changes under the terms
 * of the LGPL.
 * That means you have to give out the source code to your changes,
 * and a very good way to do so is mailing them to the address given below.
 * I think this is the best way to promote further development and use
 * of this software.
 *
 * If you have any remarks, feel free to e-mail to:
 *     ct@ct.heise.de
 *
 * The author's email address is:
 *    astro.joerg@googlemail.com
 */


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "umlaut_a.h"
#define  __IS_LEV100_C__

#include "addr_ext.h"

/****  for standalone test of the Levenshtein function:  ***/
/****
  #define LEV100_EXECUTABLE
****/


#define MATCHES_ALL   -1000
#define WORD_SEPARATORS   "+-/*( )&,'`.:"



/****  character conversion  ****/
int  conv_strings_initialized = 0;
char up_and_conv[HASH_COUNT];
char sortchar [HASH_COUNT];
char sortchar2[HASH_COUNT];
char upperchar[HASH_COUNT];
char lowerchar[HASH_COUNT];
int  isxletter[HASH_COUNT];




/************************************************************/
/****  phonetic rules for the Levenshtein function  *********/
/************************************************************/

struct PHONETIC_RULES
  {
    char *text_1;
    char *text_2;
    int len_1;
    int len_2;
    int ph_diff;     /***  difference in hundredths of points  ***/
    int hash_group;
  };

static int ph_hash_group_begin[17];
static int ph_hash_group_end[17];
static int ph_rules_hash_del_ins [HASH_COUNT];
static int ph_rules_hash_replace [HASH_COUNT];

static struct PHONETIC_RULES ph_rules_german[] =
  {
    { "IJ",  "I",   0, 0,   10,  1 },
    { "IJ",  "J",   0, 0,   30,  1 },
    { "IY",  "I",   0, 0,   10,  1 },
    { "IY",  "Y",   0, 0,   30,  1 },
    { "I",   "J",   0, 0,   30,  1 },
    { "I",   "Y",   0, 0,   30,  1 },
    { "J",   "Y",   0, 0,   20,  1 },
    { "IE",  "I",   0, 0,    5,  1 },
    { "IE",  "",    0, 0,  120,  1 },

    { "EU",  "OI",  0, 0,   10,  2 },
    { "EU",  "OY",  0, 0,   10,  2 },
    { "OI",  "OY",  0, 0,    5,  2 },
    { "OU",  "U",   0, 0,    5,  2 },

    { "AH",  "A",   0, 0,    5,  3 },
    { "H",   "",    0, 0,   50,  3 },
    { "",   "AA",  0, 0,    0,  3 },
    { "",   "AE",  0, 0,    0,  3 },
    { "",   "A",   0, 0,   40,  3 },
    { "AE",  "A",   0, 0,   60,  3 },

    { "",   "E",   0, 0,   20,  4 },
    { "",   "E",   0, 0,   20,  4 },
    { "A",   "ER",  0, 0,   70,  4 },
    { "AE",  "E",   0, 0,   20,  4 },
    { "AE",  "",    0, 0,  110,  4 },

    { "AI",  "AY",  0, 0,    5,  4 },
    { "AI",  "EI",  0, 0,    5,  4 },
    { "AI",  "EY",  0, 0,   10,  4 },
    { "AY",  "EI",  0, 0,   10,  4 },
    { "AY",  "EY",  0, 0,    5,  4 },
    { "EI",  "EY",  0, 0,    5,  4 },

    { "",   "O",   0, 0,   70,  5 },
    { "",   "OE",  0, 0,    0,  5 },
    { "",   "O",   0, 0,   40,  5 },
    { "",   "",   0, 0,    0,  5 },
    { "",   "OE",  0, 0,   10,  5 },
    { "",   "O",   0, 0,   30,  5 },
    { "",   "",   0, 0,    5,  5 },
    { "",   "OE",  0, 0,    0,  5 },
    { "OE",  "O",   0, 0,   60,  5 },
    { "OE",  "",    0, 0,  110,  5 },

    { "CHS", "X",   0, 0,  100,  6 },
    { "CKS", "X",   0, 0,   30,  6 },
    { "CK",  "K",   0, 0,    5,  6 },
    { "C",   "K",   0, 0,   30,  6 },
    { "CHS", "",    0, 0,  200,  6 },
    { "CH",  "",    0, 0,  130,  6 },
    { "CKS", "",    0, 0,  200,  6 },
    { "CK",  "",    0, 0,  110,  6 },

    { "DT",  "T",   0, 0,    5,  7 },
    { "D",   "T",   0, 0,   30,  7 },
    { "TH",  "T",   0, 0,    5,  7 },
    { "DT",  "",    0, 0,  110,  7 },

    { "KS",  "X",   0, 0,    5,  8 },
    { "GS",  "X",   0, 0,   10,  8 },
    { "G",   "K",   0, 0,   50,  8 },
    { "QU",  "KW",  0, 0,   10,  8 },
    { "Q",   "K",   0, 0,   10,  8 },

    { "NCH", "NSCH",0, 0,   10,  9 },
    { "NCH", "NSH", 0, 0,   10,  9 },
    { "NTX", "NCH", 0, 0,   20,  9 },
    { "NTX", "NSCH",0, 0,   20,  9 },
    { "NTX", "NSH", 0, 0,   20,  9 },
    { "NG",  "NK",  0, 0,   20,  9 },

    { "",   "SS",  0, 0,    0, 10 },
    { "",   "S",   0, 0,    5, 10 },
    { "SCH", "SH",  0, 0,    5, 10 },
    { "SCH", "SZ",  0, 0,   20, 10 },
    { "SCH", "S",   0, 0,  100, 10 },
    { "SCH", "",    0, 0,  200, 10 },

    { "TZ",  "Z",   0, 0,    5, 11 },
    { "TIA", "ZIA", 0, 0,   20, 11 },
    { "Z",   "C",   0, 0,   40, 11 },
    { "Z",   "S",   0, 0,   50, 11 },

    { "M",   "N",   0, 0,   70, 12 },
    { "N",   "U",   0, 0,   70, 12 },

    { "PH",  "F",   0, 0,    5, 13 },
    { "PF",  "F",   0, 0,    5, 13 },
    { "B",   "P",   0, 0,   40, 13 },

    { "F",   "V",   0, 0,   20, 14 },
    { "W",   "V",   0, 0,   20, 14 },

    { "",   "UE",  0, 0,    0, 15 },
    { "",   "U",   0, 0,   40, 15 },
    { "",   "Y",   0, 0,   30, 15 },
    { "UE",  "U",   0, 0,   60, 15 },
    { "UE",  "",    0, 0,  110, 15 },

    { NULL,  NULL,  0, 0,    0,  0 }
  };


static char *zip_german_cities[] =
     {
       "10 BERLIN",
       "12 BERLIN",
       "13 BERLIN",
       "14 BERLIN",
       "20 HAMBURG",
       "21 HAMBURG",
       "22 HAMBURG",
       "50 KOELN",
       "51 KOELN",
       "60 FRANKFURT",
       "65 FRANKFURT",
       "68 MANNHEIM",
       "69 MANNHEIM",
       "80 MUENCHEN",
       "81 MUENCHEN",
       NULL
     };




/************************************************************/
/****  private (static) functions  **************************/
/************************************************************/

static struct LEV_RESULT calculate_points (int diff, int limit,
           int trigrams, int unknown, int empty_diff, int max_points)
{
  struct LEV_RESULT l_res = { 0, 0, 0, 0 };
  int points = 0;

  if (empty_diff != 0)
    {
     if (empty_diff == MATCHES_ALL  ||  empty_diff == 3)
       {
        diff = 0;
        max_points = 0;
        points = max_points;
        empty_diff = 0;
       }
     else
       {
        diff = 0;
        max_points = (int) ((max_points+2) / 8);
        points = 0;
        empty_diff = max_points;
       }
    }
  else
    {
     if (trigrams > diff  &&  trigrams >= 300)
       {
        trigrams = limit - (int)(trigrams / 3);

        if (trigrams < 100)
          {
           trigrams = 100;
          }

        if (trigrams < diff)
          {
           diff = trigrams;
          }
       }

     if (diff <= 0)
       {
        points = 1000;
       }
     else if (diff + diff <= limit)
       {
        points = 700 + (int) (300L * (limit - diff - diff) / limit);
       }
     else if (diff <= limit)
       {
        points = 300 + (int) (800L * (limit - diff) / limit);
       }
     else if (diff <= limit +50  &&  limit >= 150)
       {
        points = 200 + (int) (100L * (limit + 50L - diff) / 50L);
       }

     if (points > 0)
       {
        if (limit < 70)
          {
           max_points = (int) ((long)max_points * (long)limit / 70L);
          }

        if (unknown > 0)
          {
           points = (int) ((long)points * (1000L - unknown) / 1000L);
          }
        points = (int) ((long)points * (long)max_points / 1000L);
       }

     if (unknown > 0)
       {
        unknown = (int) ((long)unknown * (long)max_points / 1000L);
       }
    }

  l_res.points = points;
  l_res.max_points = max_points;
  l_res.diff = diff;
  l_res.empty_diff = empty_diff;

  return (l_res);
}




int lev_diff
  (char *pattern, char *text, int limit, int *trigrams, int run_mode)

/****  Function for calculating Levenshtein distance         ****/
/****  (in hundredths of points) with respect to wildcards   ****/
/****  and phonetic rules.                                   ****/
/****  "text" is compared with "pattern"; allowed wildcards  ****/
/****  are <wildcard_any_string> and <wildcard_any_char>.    ****/
/****  (function works for arbitrary string lengths)         ****/

/****  This function does not discriminate between           ****/
/****  lower / upper char, including umlauts                 ****/
/****  and is also "symmetric": shorter string = pattern;    ****/
/****  "limit" may be re-calculated                          ****/

/****  If "limit >= 0", the function returns "no match",     ****/
/****  if the minimum distance (as given by "col_min")       ****/
/****  is greater than limit.                                ****/

/****  Available values for "run_mode" (bit mask):           ****/
/****  LEV_COMPARE_NORMAL :  "normal" Levenshtein distance   ****/
/****  LEV_SKIP_UPEXPAND  :  do not convert strings to       ****/
/****                       upper char (incl. umlaut_expand) ****/
/****  LEV_COMPARE_GERMAN :  use simple phonetic algorithm   ****/
/****                        for German                      ****/
/****  TRACE_LEV          :  activate trace option           ****/
{
 struct LEV_RESULT l_res;
 int  col_min,
      lim_1,
      p,q,r,
      lt,lp,
      d1,d2,
      i,k,n,
      x1,x2,x3,x4,
      y1,y2,y3;
 int  *d;
 int  *pos;
 int  *rep;
 int  *tri;
 char *pp;
 char *tt;
 char ct,cp,cp2,
      *s1,*s2,*s3,*s4;

 char wildcard_any_string = MATCHES_ANY_STRING;
 char wildcard_any_char = MATCHES_ANY_CHAR;

 static char p_string[81],
             t_string[81];
 static int  d_array[81];
 static int  pos_array[81];
 static int  rep_array[81];
 static int  tri_array[81];

 if (run_mode & DB_WILDCARDS_FOR_LIKE)
   {
    wildcard_any_string = DB_LIKE_ANY_STRING;
    wildcard_any_char = DB_LIKE_ANY_CHAR;
   }

 d = d_array;
 pos = pos_array;
 rep = rep_array;
 tri = tri_array;
 pp = p_string;
 tt = t_string;

 if (! (conv_strings_initialized & CONV_STRINGS_ARE_INITIALIZED))
   {
     initialize_conv_strings (run_mode);

     if (! (conv_strings_initialized & CONV_STRINGS_ARE_INITIALIZED))
       {
        /****  internal error  ****/
        return (10000);
       }
   }

 if (run_mode & LEV_SKIP_UPEXPAND)
   {
    lt = (int) strlen (text);
    lp = (int) strlen (pattern);
   }
 else
   {
    /****  convert strings to upper char  ****/
    lt = up_conv (tt, text, 81);
    lp = up_conv (pp, pattern, 81);

    if (lt >= 80)
      {
       lt = 2 * (int) strlen (text) + 3;
       tt = (char *) malloc ((unsigned) lt);

       if (tt == NULL)
         {
          if (run_mode & (TRACE_ADDR | TRACE_ERRORS | TRACE_LEV))
            {
             printf ("Error: \"malloc\" for %d Bytes failed.\n", lt);
            }
          return (10000);
         }
       lt = up_conv (tt, text, lt);
      }

    if (lp >= 80)
      {
       lp = (int) strlen (pattern) + 2;
       pp = (char *) malloc ((unsigned) lp);

       if (pp == NULL)
         {
          if (run_mode & (TRACE_ADDR | TRACE_ERRORS | TRACE_LEV))
            {
             printf ("Error: \"malloc\" for %d Bytes failed.\n", lp);
            }
          if (tt != t_string)
            {
             free (tt);
            }
          return (10000);
         }
       lp = up_conv (pp, pattern, lp);
      }

    text = tt;
    pattern = pp;
   }

 if (lt < 0  ||  lp < 0)
   {
     /****  internal error  ****/
     if (run_mode & (TRACE_ADDR | TRACE_LEV))
       {
        printf ("Internal error: could not initialize conv strings.\n");
       }
     return (10000);
   }

 if (3*lt < 2*lp
 && (strchr (text,wildcard_any_string) != NULL
 ||  strchr (text,' ') != NULL  ||  strchr (text,'.') != NULL))
   {
    /****  switch "text" and "pattern"  ****/
    s1 = text;
    text = pattern;
    pattern = s1;
    i = lt;
    lt = lp;
    lp = i;

    if (limit > 0  &&  limit < 10000)
      {
       /****  re-calculate limit  ****/
       l_res = calculate_limit (pattern, NULL,0);
       if (l_res.points < limit)
         {
          limit = l_res.points;
         }
      }
   }

 if (limit < 0)
   {
    limit = 0;
   }
 lim_1 = limit;
 if (lim_1 >= 7  &&  lim_1 <= 30)
   {
     lim_1 += 10;
   }

 if (lt >= 80)
   {
    i = (lt+1) * sizeof (int);
                      /***  factor "4" includes arrays "pos", "rep" and "tri"  ***/
    d = (int *) malloc ((unsigned) (4*i));

    if (d == NULL)
      {
       if (run_mode & (TRACE_ADDR | TRACE_ERRORS | TRACE_LEV))
         {
          printf ("Error: \"malloc\" for %d Bytes failed.\n", (4*i));
         }
       if (tt != t_string  &&  tt != p_string)
         {
          free (tt);
         }
       if (pp != p_string  &&  pp != t_string)
         {
          free (pp);
         }
       return (10000);
      }

    pos = d + i;
    rep = d + (2*i);
    tri = d + (3*i);
   }

 if (run_mode & TRACE_LEV)
   {
    printf ("\nLevenshtein:  Strings = '%s'%s'", pattern, text);
    print_number (",  Limit", limit,-1);
    printf ("\n\n         ");

    for (k=1; k<=lt; k++)
      {
       printf ("   '%c'", text[k-1]);
      }
    printf ("\n");
   }

 /****  calculate initial values ( = zero'th column)  ****/
 d[0] = 0;
 pos[0] = -1;
 rep[0] = 10000;
 tri[0] = 0;
 for (k=1; k<=lt; k++)
   {
    d[k] = 10000;
    pos[k] = -1;
    rep[k] = 10000;
    tri[k] = 0;
   }

 /****  calculate distance matrix  ****/
 for (i=0; i<=lp; i++)
   {
    cp = (i == 0) ?  '\0' : *(pattern +i-1);

    if (cp == wildcard_any_string  ||  cp == '.')
      {
        p = 0;
        q = 0;
        r = 0;
      }
    else
      {
        p = (cp == wildcard_any_char) ?  0 : 100;
        q = 100;
        r = 100;
      }

    if (q > 0)
      {
       /****  calculate error points for "delete"  ***/
       if (i >= 2  &&  cp == pattern[i-2])
         {
          /****  "<x><x>"  -->  "<x>"  ****/
          q = 5;
          if (run_mode & LEV_COMPARE_NUMBER)
            {
              q = 30;
            }
         }

       switch (cp)
         {
          case ' ':
          case '-':
          case '+':
          case '/':
          case '&':
          case '\'':  q = 5;
                 break;
         }

       if ((run_mode & LEV_COMPARE_GERMAN)
       &&  i > 0)
         {
           /****  check phonetic rules for "delete"  ***/
           x3 = ph_rules_hash_del_ins [(unsigned char) cp];

           if (x3 >= 0)
             {
               /****  evaluate phonetic rules  ****/
               while (ph_rules_german[x3].text_2 != NULL
               &&  strcmp (ph_rules_german[x3].text_2,"") == 0)
                 {
                   s1 = ph_rules_german[x3].text_1;
                   y1 = ph_rules_german[x3].len_1;

                   s4 = pattern+i-1;

                   if (cp == *s1  &&  y1 > 0)
                     {
                       /****  check phonetic rule  ****/
                       if (strncmp (s4, s1,y1) == 0)
                         {
                           y3 = 0;

                           if (pos[y3] < 0
                           ||  rep[y3]-100 > d[0] + ph_rules_german[x3].ph_diff)
                             {
                               pos[y3] = i + y1-1;
                               rep[y3] = d[0] + ph_rules_german[x3].ph_diff;

                               if (y1 >= 2  &&  y3 < lt
                               &&  ph_rules_german[x3].ph_diff >= 100)
                                 {
                                   pos[y3] |= 2048;
                                 }
                             }
                         }
                     }

                   x3++;
                 }
             }
         }
      }

    d2 = d[0];
    d[0] = (i == 0) ?  0 : (d2 + q);

    if ((run_mode & LEV_COMPARE_GERMAN)
    &&  (pos[0] & ~(1024+2048)) == i)
      {
        x1 = rep[0];
        if (x1 < d[0])
          {
            d[0] = x1;
          }

        if ((pos[0] & 2048)  &&  rep[1] > rep[0])
          {
            pos[1] = pos[0] & ~2048;
            rep[1] = rep[0];
          }
        pos[0] = -1;
        rep[0] = 10000;
      }

    col_min = d[0];

    for (k=1; k<=lt; k++)
      {
       /****  d[k] = minimum of three numbers  ****/
       d1 = d2;
       d2 = d[k];
       ct = text[k-1];
       n = (cp == ct) ?  0 : p;

       if (tri[k-1] > tri[k])
         {
          tri[k] = tri[k-1];
         }

       if (n == 0  &&  p > 0  &&  i <= lp-2  &&  k <= lt-2)
         {
          /****  look for trigram  ****/
          if (pattern[i] == text[k]
          &&  pattern[i+1] == text[k+1]
          &&  text[k] != wildcard_any_string  &&  text[k] != '.'
          &&  text[k] != wildcard_any_char
          &&  text[k+1] != wildcard_any_string  &&  text[k+1] != '.'
          &&  text[k+1] != wildcard_any_char)
            {
              tri[k] += 100;
            }
         }

       if (i >= 2  &&  k >= 2  &&  n != 0  &&  limit > 0)
         {
          /****  look for transpositions (e.g. "AB" --> "BA")  ****/
          cp2 = pattern[i-2];

          if (cp == text[k-2]  &&  cp2 == text[k-1]
          &&  cp != wildcard_any_string  &&  cp != '.'
           &&  cp != wildcard_any_char
          &&  cp2 != wildcard_any_string  &&  cp2 != '.'
           &&  cp2 != wildcard_any_char)
            {
              /****  transposition found  ****/
              n = 0;
            }
         }

       if (n > 0  &&  limit > 0)
         {
          /****  calculate error points for "replacement"  ***/
          switch (cp)
            {
             case ' ':
             case '-':
             case '+':
             case '/':
             case '&':
             case '\'': if (ct == ' '  ||  ct == '-'  ||  ct == '+'
                        ||  ct == '/'  ||  ct == '&'  ||  ct == '\''
                        ||  ct == wildcard_any_string)
                          {
                           /****  separator found  ****/
                           n = 5;
                          }
                   break;
            }
         }

       if ((run_mode & LEV_COMPARE_GERMAN)
       &&  i > 0)
         {
           /****  check phonetic rules for "replacement"  ***/
           x1 = (ph_rules_hash_replace [(unsigned char) ct])
               & (ph_rules_hash_replace [(unsigned char) cp]);

           x2 = 1;

           if (x1 != 0)
             {
               if (! (x1 & (1+2+4+8+16+32+64+128)))
                 {
                   x1 = x1 >> 8;
                   x2 += 8;
                 }
               if (! (x1 & (1+2+4+8)))
                 {
                   x1 = x1 >> 4;
                   x2 += 4;
                 }
               if (! (x1 & (1+2)))
                 {
                   x1 = x1 >> 2;
                   x2 += 2;
                 }
             }

           while (x1 != 0)
             {
               if (x1 & 1)
                 {
                   /****  hash_group found  ****/
                   x3 = ph_hash_group_begin[x2];
                   x4 = ph_hash_group_end[x2];

                   while (x3 <= x4  &&  x3 >= 0)
                     {
                       s1 = ph_rules_german[x3].text_1;
                       s2 = ph_rules_german[x3].text_2;

                       y1 = ph_rules_german[x3].len_1;
                       y2 = ph_rules_german[x3].len_2;

                       y3 = y2;
                       if (y1 > y3)
                         {
                           y3 = y1;
                         }

                       s3 = text+k-1;
                       s4 = pattern+i-1;

                       if (ct == *s1  &&  cp == *s2
                       &&  y1 > 0  &&  y2 > 0)
                         {
                           /****  check phonetic rule  ****/
                           if (strncmp (s3, s1,y1) == 0
                           &&  strncmp (s4, s2,y2) == 0
                           &&  strncmp (s3,s4, y3) != 0)
                             {
                               y3 = k + y1-1;

                               if (pos[y3] < 0
                               ||  rep[y3] > d1 + ph_rules_german[x3].ph_diff)
                                 {
                                   pos[y3] = i + y2-1;
                                   rep[y3] = d1 + ph_rules_german[x3].ph_diff;
                                 }
                             }
                         }

                       if (ct == *s2  &&  cp == *s1
                       &&  y1 > 0  &&  y2 > 0)
                         {
                           /****  check phonetic rule  ****/
                           if (strncmp (s3, s2,y2) == 0
                           &&  strncmp (s4, s1,y1) == 0
                           &&  strncmp (s3,s4, y3) != 0)
                             {
                               y3 = k + y2-1;

                               if (pos[y3] < 0
                               ||  rep[y3] > d1 + ph_rules_german[x3].ph_diff)
                                 {
                                   pos[y3] = i + y1-1;
                                   rep[y3] = d1 + ph_rules_german[x3].ph_diff;
                                 }
                             }
                         }

                       x3++;
                     }
                 }

               x1 = x1 >> 1;
               x2++;
             }

           /****  check phonetic rules for "delete"  ***/
           x3 = ph_rules_hash_del_ins [(unsigned char) cp];

           if (x3 >= 0)
             {
               while (ph_rules_german[x3].text_2 != NULL
               &&  strcmp (ph_rules_german[x3].text_2,"") == 0)
                 {
                   s1 = ph_rules_german[x3].text_1;
                   y1 = ph_rules_german[x3].len_1;

                   s4 = pattern+i-1;

                   if (cp == *s1  &&  y1 > 0)
                     {
                       /****  check phonetic rule  ****/
                       if (strncmp (s4, s1,y1) == 0)
                         {
                           y3 = k;

                           if (pos[y3] < 0
                           ||  rep[y3]-100 > d2 + ph_rules_german[x3].ph_diff)
                             {
                               pos[y3] = i + y1-1;
                               rep[y3] = d2 + ph_rules_german[x3].ph_diff;

                               if (y1 >= 2  &&  y3 < lt
                               &&  ph_rules_german[x3].ph_diff >= 100)
                                 {
                                   pos[y3] |= 2048;
                                 }
                             }
                         }
                     }

                   x3++;
                 }
             }
         }

       if (run_mode & LEV_COMPARE_GERMAN)
         {
           /****  check phonetic rules for "insert"  ***/
           x3 = ph_rules_hash_del_ins [(unsigned char) ct];

           if (x3 >= 0)
             {
               while (ph_rules_german[x3].text_2 != NULL
               &&  strcmp (ph_rules_german[x3].text_2,"") == 0)
                 {
                   s1 = ph_rules_german[x3].text_1;
                   y1 = ph_rules_german[x3].len_1;

                   s3 = text+k-1;

                   if (ct == *s1  &&  y1 > 0)
                     {
                       /****  check phonetic rule  ****/
                       if (strncmp (s3, s1,y1) == 0)
                         {
                           y3 = k + y1-1;

                           if (pos[y3] < 0
                           ||  rep[y3] > d[k-1] + ph_rules_german[x3].ph_diff)
                             {
                               pos[y3] = i;
                               rep[y3] = d[k-1] + ph_rules_german[x3].ph_diff;

                               if (y1 >= 2  &&  i < lp
                               &&  ph_rules_german[x3].ph_diff >= 100)
                                 {
                                   pos[y3] |= 1024;
                                 }
                             }
                         }
                     }

                   x3++;
                 }
             }
         }

       if (n >= 100
       &&  up_and_conv [(unsigned char) ct]
        == up_and_conv [(unsigned char) cp])
         {
           n = 20;
           if (ct == up_and_conv [(unsigned char) cp]
           ||  cp == up_and_conv [(unsigned char) ct])
             {
               n = 5;
             }
         }

       x1 = d1 + n;
       x2 = d2 + q;

       if (x2 < x1)
         {
          x1 = x2;
         }

       n = r;
       if (n > 0  &&  limit > 0)
         {
          /****  calculate error points for "insert"  ***/
          if (k >= 2  &&  ct == text[k-2])
            {
             /****  "<x>"  -->  "<x><x>"  ****/
             n = 5;
             if (run_mode & LEV_COMPARE_NUMBER)
               {
                 n = 30;
               }
            }

          if (ct == wildcard_any_string)
            {
             n = 5;
            }

          switch (ct)
            {
          /** case wildcard_any_string: **/
             case ' ':
             case '-':
             case '+':
             case '/':
             case '&':
             case '\'':  n = 5;
                   break;
            }
         }

       x2 = d[k-1] + n;
       d[k] = (x1 < x2) ?  x1 : x2;

       if ((run_mode & LEV_COMPARE_GERMAN)
       &&  (pos[k] & ~(1024+2048)) == i)
         {
           x1 = rep[k];
           if (x1 < d[k])
             {
               d[k] = x1;
             }

           if (pos[k] & 1024)
             {
               pos[k] = (pos[k] + 1) & ~(1024+2048);
             }
           else
             {
               if ((pos[k] & 2048)  &&  rep[k+1] > rep[k])
                 {
                   pos[k+1] = pos[k] & ~2048;
                   rep[k+1] = rep[k];
                 }
               pos[k] = -1;
               rep[k] = 10000;
             }
         }

       if (d[k] < col_min)
         {
          col_min = d[k];
         }
      }

    if (run_mode & TRACE_LEV)
      {
       if (i == 0)
         {
          printf ("    ");
         }
       else
         {
          printf ("'%c' ",cp);
         }

       for (k=0; k<=lt; k++)
         {
          if (d[k] <= limit)
            {
             printf (" %2d.%02d", (int)(d[k]/100), d[k]%100);
            }
          else
            {
             printf ("  ----");
            }
         }
       printf ("\n");
      }

    if (col_min > limit + tri[lt])
      {
       break;
      }
   }


 if (d[lt] >= 150  &&  d[lt] <= 800
 &&  d[lt] <= lim_1)
   {
     /****  check for multiple insert  ****/
     k = lt - lp;
     s1 = text;
     s2 = pattern;

     if (k < 0)
       {
         k = - k;
         s1 = pattern;
         s2 = text;
       }

     i = 0;
     while (s1[i] == s2[i]  &&  s1[i] != '\0')
       {
         i++;
       }

     if (strcmp (s1+i+k, s2+i) == 0)
       {
         /****  multiple insert  ****/
         if ((i > 0  &&  s2[i] == '\0'
          && (s1[i] == ' '  ||  s1[i] == '-'))
         ||  (i == 0  &&  k >= 2
          && (s1[k-1] == ' '  ||  s1[k-1] == '-')))
           {
             k = 50 * k;    /** 100 * (k-1 + 1) / 2 **/

             if (k < d[lt])
               {
                 d[lt] = k;

                 if (run_mode & TRACE_LEV)
                   {
                     printf ("\nMultiple insert found (1):  lev = %d.%02d\n",
                         (int) (d[lt] / 100), d[lt] % 100);
                   }
               }
           }
         else if (i >= 2  &&  k >= 3)
           {
             d[lt] -= 50;
             if (k >= 5)
               {
                 d[lt] -= 50;
               }

             if (run_mode & TRACE_LEV)
               {
                 printf ("\nMultiple insert found (2):  lev = %d.%02d\n",
                     (int) (d[lt] / 100), d[lt] % 100);
               }
           }
       }
     else
       {
         i = lt;
         k = lp;
         while (text[i-1] == pattern[k-1]  &&  i > 0  &&  k > 0)
           {
             i--;
             k--;
           }

         if (100 * i == d[lt]  ||  100 * k == d[lt])
           {
             i = 0;
             while (text[i] == pattern[i]  &&  text[i] != '\0')
               {
                 i++;
               }

             if (i == 0)
               {
                 /****  begin of strings differ       ****/
                 /****  (e.g. "Hedwig" and "Ludwig")  ****/
                 d[lt] += 50;

                 if (run_mode & TRACE_LEV)
                   {
                     printf ("\nBeginning of strings differ:  lev = %d.%02d\n",
                         (int) (d[lt] / 100), d[lt] % 100);
                   }
               }
           }
       }
   }

 /****  check trigrams  ****/
 x1 = 15000;
 if (tri[lt] >= d[lt]  &&  d[lt] > 0)
   {
     i = (int) (15 * (lt+lp));

     x1 = i - (int)(tri[lt] / 3);
     if (x1 < 100)
       {
         x1 = 100;
       }
   }
 else if (tri[lt] >= 200  &&  d[lt] > 500)
   {
     x1 = 1000 - (int)(tri[lt] / 2);
     if (x1 < 400)
       {
         x1 = 400;
       }
   }

 if (x1 <= limit + 100  &&  x1 < d[lt] - 100)
   {
     d[lt] = x1 + 100;
     if (d[lt] > limit)
       {
         limit = d[lt];
       }

     if (run_mode & TRACE_LEV)
       {
         printf ("\nTrigams:  lev = %d.%02d\n",
             (int)(d[lt]/100), d[lt]%100);
       }
   }

 /****  calculate final result  ****/
 k = tri[lt];
 n = d[lt];
 if (n > limit  &&  n > k)
   {
    n = 10000;
   }

 if (tt != t_string  &&  tt != p_string)
   {
    free (tt);
   }
 if (pp != p_string  &&  pp != t_string)
   {
    free (pp);
   }
 if (d != d_array)
   {
    free (d);
   }

 if (run_mode & TRACE_LEV)
   {
    printf ("\n");
    printf ("Levenshtein distance = %d.%02d\n", (int)(n/100), (n%100));
    print_number ("trigram count", k,-1);
    printf ("\n\n");
   }

 if (trigrams != NULL)
   {
    *trigrams = k;
   }
 return (n);
}





static struct LEV_RESULT lev_2_name
      (char text1[], char text2[], int run_mode)
{
  struct LEV_RESULT l_res = { 0, 0, 0, 0 };
  int  i,n,limit,mp;
  int  len_1,len_2;
  char c,*s,*s2;
  char tmp_string [LENGTH_WHOLE_NAME+1];
  char *tt = tmp_string;

  /****  check for name like "John" vs. "John Carl"  ****/
  i = (int) strlen (text1);
  n = (int) strlen (text2);

  if (i >= n+3  &&  n >= 4)
    {
     s = text1;
     s2 = text2;
     len_1 = i;
     len_2 = n;
    }
  else if (n >= i+3  &&  i >= 4)
    {
     s = text2;
     s2 = text1;
     len_1 = n;
     len_2 = i;
    }
  else
    {
     /****  return "no match"  ****/
     l_res.diff = 10000;
     return (l_res);
    }

  if (len_1 > LENGTH_WHOLE_NAME)
    {
     tt = (char *) malloc ((unsigned) (i+1));
     if (tt == NULL)
       {
        if (run_mode & (TRACE_ADDR | TRACE_ERRORS | TRACE_LEV))
          {
           printf ("Error: \"malloc\" for %d Bytes failed.\n", i+1);
          }

        l_res.diff = 10000;
        return (l_res);
       }
    }

  strcpy (tt,s);
  s = tt;

  l_res = calculate_limit (text2, text1, run_mode);
  limit = l_res.points;

  if (limit > 0  &&  l_res.empty_diff != MATCHES_ALL)
    {
     limit = (int)(limit/2) + 100;
    }

  /****  search for word separator  ****/
  if (i-n >= 4)
    {
     s += i-n - 4;
    }
  else if (n-i >= 4)
    {
     s += n-i - 4;
    }

  i = 10000;
  while (*s != '\0'  &&  *s != '.'  &&  *s != '-'  &&  *s != ' ')
    {
     s++;
    }

  if (*s != '\0'  &&  *(s+1) != '\0')
    {
     /****  word separator (e.g. in "John Carl") found  ****/
     mp = 1;

     /****  evaluate leading name  ****/
     if (*s == '.')
       {
        s++;
       }
     c = *s;
     *s = '\0';

     if ((int)strlen (tt) >= 4)
       {
        n = lev_diff (s2,tt, limit, NULL, (run_mode & ~TRACE_LEV));

        if (n <= limit)
          {
           i = n;
          }
       }
     *s = c;

     /****  search for trailing name  ****/
     while (*s == '.'  ||  *s == '-'  ||  *s == ' ')
       {
        if (*s == '-'  ||  *s == ' ')
          {
           len_1--;
          }
        s++;
       }

     if (*s != '\0'  &&  (int) strlen (s) >= 4)
       {
        /****  evaluate trailing name  ****/
        n = lev_diff (s2,s, limit, NULL, (run_mode & ~TRACE_LEV));

        if (n < i  &&  n <= limit)
          {
           i = n;
          }
       }
    }
  else
    {
     /****  no word separator found      -->               ****/
     /****  look for name like (e.g.) "Mary" vs. "Maryann" ****/
     mp = 2;

     /****  look for leading name  ****/
     s = tt + len_2;
     c = *s;
     *s = '\0';

     n = lev_diff (s2,tt, limit, NULL, (run_mode & ~TRACE_LEV));

     if (n <= limit)
       {
        i = n;
       }
     *s = c;

     /****  look for trailing name  ****/
     s = tt + len_1 - len_2;

     n = lev_diff (s2,s, limit, NULL, (run_mode & ~TRACE_LEV));

     if (n < i  &&  n <= limit)
       {
        i = n;
       }
    }

  if (tt != tmp_string)
    {
     free (tt);
    }

  if (run_mode & (TRACE_ADDR | TRACE_LEV))
    {
     print_number ("lev_2_name: diff", i,-1);
     printf ("\n");
    }

  if (i <= 50  &&  len_2 >= 8  &&  len_1 >= 2 * len_2)
    {
      n = (int) ((len_1 - 2 * len_2) / 2);
      len_1 = 2 * len_2 + n;
    }

  l_res.points = len_2;
  l_res.max_points = len_1;
  l_res.diff = i;
  l_res.empty_diff = mp;

  return (l_res);
}





/************************************************************/
/****  "external" functions  ********************************/
/************************************************************/


int initialize_conv_strings (int run_mode)
{
  int  i,k,n;
  char *s,*s2,*s3;

  if (! (conv_strings_initialized & CONV_STRINGS_ARE_INITIALIZED))
    {
      if ((int)strlen (letters_a_to_z) != 26)
        {
          if (run_mode & (TRACE_ADDR | TRACE_LEV))
            {
              printf ("Error: %s  is not allowed\n",
                   "strlen (letters_a_to_z) != 26");
            }
          return (-1);
        }
      if ((int)strlen (letters_a_to_z) != (int)strlen (letters_A_to_Z))
        {
          if (run_mode & (TRACE_ADDR | TRACE_LEV))
            {
              printf ("Error: %s  is not allowed\n",
                  "strlen(letters_a_to_z) != strlen(letters_a_to_z)");
            }
          return (-1);
        }

      if ((int)strlen (umlaut_lower) != (int)strlen (umlaut_upper))
        {
          if (run_mode & (TRACE_ADDR | TRACE_LEV))
            {
              printf ("Error: %s  is not allowed\n",
                   "strlen(umlaut_lower) != strlen(umlaut_upper)");
            }
          return (-1);
        }

      if ((int)strlen (umlaut_lower) != (int)strlen (umlaut_sort)
      ||  (int)strlen (umlaut_lower) != (int)strlen (umlaut_sort2))
        {
          if (run_mode & (TRACE_ADDR | TRACE_LEV))
            {
              printf ("Error: %s  is not allowed\n",
                   "strlen(umlaut_sort*) != strlen(umlaut_lower)");
            }
          return (-1);
        }

      conv_strings_initialized |= CONV_STRINGS_ARE_INITIALIZED;

      /****  generate arrays "isxletter", "sortchar", "sortchar2", ...  ****/
      for (i=0; i< HASH_COUNT; i++)
        {
          isxletter[i] = 0;
          sortchar[i] = (char) i;
          sortchar2[i] = '\0';
          up_and_conv[i] = (char) i;
          upperchar[i] = (char) i;
          lowerchar[i] = (char) i;
        }
      sortchar [(unsigned char) '-']  = ' ';
      up_and_conv [(unsigned char) '-'] = ' ';
      upperchar [(unsigned char) '-']  = ' ';
      lowerchar [(unsigned char) '-']  = ' ';

      s = letters_a_to_z;
      s2 = letters_A_to_Z;

      for (i=0; s[i] != '\0'; i++)
        {
          k = (unsigned char) s2[i];  /** "s2" **/
          isxletter[k] |= _IS_UPPER_;
          sortchar[k] = s2[i];
          up_and_conv[k] = s2[i];
          upperchar[k] = s2[i];
          lowerchar[k] = s[i];

          k = (unsigned char) s[i];   /** "s" **/
          isxletter[k] |= _IS_LOWER_;
          sortchar[k] = s2[i];
          up_and_conv[k] = s2[i];
          upperchar[k] = s2[i];
          lowerchar[k] = s[i];
        }

      s = umlaut_lower;
      s2 = umlaut_sort;
      s3 = umlaut_sort2;

      for (i=0; s[i] != '\0'; i++)
        {
         n = (unsigned char) umlaut_conv[i];

         k = (unsigned char) umlaut_upper[i];
         isxletter[k] |= _IS_UPPER_ + _IS_UMLAUT_;
         up_and_conv[k] = (char) n;
         upperchar[k] = (char) k;
         lowerchar[k] = s[i];

         sortchar[k] = s2[i];
         isxletter [(unsigned char) s2[i]] |= _IS_SORTCHAR_;
         if (s3[i] != ' ')
           {
             sortchar2[k] = s3[i];
             isxletter [(unsigned char) s3[i]] |= _IS_SORTCHAR2_;
           }

         k = (unsigned char) s[i];   /** "s" **/
         isxletter[k] |= _IS_LOWER_ + _IS_UMLAUT_;
         up_and_conv[k] = (char) n;
         lowerchar[k] = s[i];

         sortchar[k] = s2[i];
         n = lowerchar [(unsigned char) s2[i]];
         isxletter [(unsigned char) n] |= _IS_SORTCHAR_;
         if (s3[i] != ' ')
           {
             sortchar2[k] = s3[i];
             n = lowerchar [(unsigned char) s3[i]];
             isxletter [(unsigned char) n] |= _IS_SORTCHAR2_;
           }

         n = (unsigned char) umlaut_upper[i];
         upperchar[k] = (char) n;
       }

      s = "0123456789";
      for (i=0; s[i] != '\0'; i++)
        {
          k = (unsigned char) s[i];
          isxletter[k] |= _IS_DIGIT_;
        }

      /****  initialize hash values for phonetic rules  ****/
      for (i=0; i<17; i++)
        {
          ph_hash_group_begin[i] = -1;
          ph_hash_group_end[i] = -1;
        }
      for (i=0; i< HASH_COUNT; i++)
        {
          ph_rules_hash_del_ins[i] = -1;
          ph_rules_hash_replace[i] = 0;
        }

      for (i=0; ph_rules_german[i].text_1 != NULL; i++)
        {
          k = ph_rules_german[i].hash_group;

          if (ph_hash_group_begin[k] < 0)
            {
              ph_hash_group_begin[k] = i;
            }

          if (ph_hash_group_end[k] < 0
          ||  strcmp (ph_rules_german[i].text_2,"") != 0)
            {
              ph_hash_group_end[k] = i;
            }

          n = (unsigned char) ph_rules_german[i].text_1[0];
          ph_rules_hash_replace [n] |= 1 << (k-1);

          if (ph_rules_hash_del_ins[n] < 0
          &&  strcmp (ph_rules_german[i].text_2,"") == 0)
            {
              ph_rules_hash_del_ins[n] = i;
            }

          n = (unsigned char) ph_rules_german[i].text_2[0];
          ph_rules_hash_replace [n] |= 1 << (k-1);

          n = (int)strlen (ph_rules_german[i].text_1);
          ph_rules_german[i].len_1 = n;
          n = (int)strlen (ph_rules_german[i].text_2);
          ph_rules_german[i].len_2 = n;
        }
    }

  return (0);
}




void print_number (char *text, int number, int mode)
{
  char *s;

  if (mode >= 0  &&  number == MATCHES_ALL)
    {
     printf ("%s = %s", text, "MATCHES_ALL");
     return;
    }

  s = "";
  if (number < 0)
    {
     s = "-";
     number = -number;
    }
  printf ("%s = %s%d", text,s, (int)(number/100));

  if (number % 100 != 0)
    {
     number = number % 100;
     printf (".%d", (int)(number/10));
     
     if (number % 10 != 0)
       {
        printf ("%d", number % 10);
       }
    }
}




struct LEV_RESULT calculate_limit 
            (char pattern[], char text[], int run_mode)
/****  calculate limit for the Levenshtein function  ****/
{
  struct LEV_RESULT l_res = { 0, 0, 0, 0 };
  char *s;
  int  i,j,k,n,x;
  int  max_points, limit;
  int  asterisk_found, unknown;

  char wildcard_any_string = MATCHES_ANY_STRING;
  char wildcard_any_char = MATCHES_ANY_CHAR;

  if (run_mode & DB_WILDCARDS_FOR_LIKE)
    {
     wildcard_any_string = DB_LIKE_ANY_STRING;
     wildcard_any_char = DB_LIKE_ANY_CHAR;
    }

  if (pattern == NULL)
    {
     pattern = text;
     text = NULL;
    }
  max_points = 0;
  unknown = 0;

  for (j=1; j<=2; j++)
    {
     s = pattern;
     if (j == 2)
       {
        s = text;
       }

     if (s == NULL)
       {
        continue;
       }

     if (*s == '\0')
       {
        l_res.empty_diff += j;
       }

     asterisk_found = 0;
     x = 0;
     i = 0;
     while (s[i] != '\0')
       {
        /****  look for word separators and wildcard_any_string/char  ****/
        k = 0;
        n = 0;
        while (s[i] == wildcard_any_string
        ||  s[i] == '.'  ||  s[i] == wildcard_any_char)
          {
           if (s[i] == wildcard_any_string
           ||  s[i] == '.')
             {
              asterisk_found = 1;
              n = 300;
             }
           if (s[i] == wildcard_any_char)
             {
              k += 100;
             }

           if (s[i+1] != wildcard_any_string
           &&  s[i+1] != '.'  &&  s[i] != wildcard_any_char)
             {
              break;
             }
           i++;
          }

        if (n > k)
          {
           k = n;
          }
        unknown += k;

        if (strchr (WORD_SEPARATORS, s[i]) == NULL)
          {
           /****  count "letters"  ****/
           x += 100;
          }
        else if (s[i] != wildcard_any_string
        &&  s[i] != '.'  &&  s[i] != wildcard_any_char)
          {
           x += 40;
          }

        i++;
       }

     if (asterisk_found  &&  unknown < 600 - x)
       {
        unknown = 600 - x;
       }

     if (j == 1  ||  x < max_points)
       {
        max_points = x;
       }
     if (j == 1  ||  unknown > l_res.diff)
       {
        l_res.diff = unknown;
       }

     if (run_mode & DATABASE_SELECT)
       {
        /****  evaluate "pattern" only  ****/
        if (i == 0
        || (x == 0  &&  strchr (pattern,wildcard_any_string) != NULL))
          {
           l_res.empty_diff = MATCHES_ALL;
          }

        break;
       }
    }

  limit = max_points;
  if (limit > 0)
    {
     if (limit > 1200)
       {
        limit = 1200;
       }

     limit = (int) ((limit +40) / 3);
    }

  l_res.max_points = max_points;
  l_res.points = limit;
  l_res.diff = unknown;

#ifdef qweqweasd
  if (min_length <= l_res.unknown)
    {
     l_res.unknown = 1000;
    }
  else if (l_res.unknown > 0)
    {
      l_res.unknown = (int)((long) l_res.unknown * 1000L / (long) min_length);
    }
#endif

  return (l_res);
}




int up_conv (char dest[], char src[], int len)

/****  Function converts "src" to upper case.               ****/
/****  The conversion is done using the array "up_and_conv" ****/
/****  (len = size of "dest" incl. '\0').                   ****/
/****  This function returns "strlen (dest)".               ****/
{
  int n;

  if (! (conv_strings_initialized & CONV_STRINGS_ARE_INITIALIZED))
    {
      initialize_conv_strings (0);

      if (! (conv_strings_initialized & CONV_STRINGS_ARE_INITIALIZED))
        {
         /****  internal error  ****/
         dest[0] = '\0';
         return (-1);
        }
    }

  n = 0;
  while (*src != '\0'  &&  n < len-1)
    {
      /****  find next char in "src"  ****/
      dest[n] = up_and_conv [(unsigned char) *src];
      n++;
      src++;
    }

  dest[n] = '\0';
  return (n);
}





int up_expand (char src[], char dest[], int len, int run_mode)

/****  Function converts "src" to upper case,             ****/
/****  expands umlauts and compresses successive blanks,  ****/
/****  i.e. "<ae>" will be converted to "AE".             ****/

/****  The conversion is done using the arrays "sortchar" ****/
/****  and "sortchar2". The second array is needed only   ****/
/****  for umlauts and contains null values for all       ****/
/****  other chars.                                       ****/

/****  (len = size of "dest" incl. '\0').                 ****/
/****  This function returns "strlen (dest)".             ****/
{
  int  i,n;
  char c;

  if (! (conv_strings_initialized & CONV_STRINGS_ARE_INITIALIZED))
    {
      initialize_conv_strings (run_mode);

      if (! (conv_strings_initialized & CONV_STRINGS_ARE_INITIALIZED))
        {
         /****  internal error  ****/
         dest[0] = '\0';
         return (-1);
        }
    }

  i = 0;
  n = 0;
  while (*src != '\0'  &&  n < len-1)
    {
      /****  find next char in "src"  ****/
      c = '\0';
      if (i != 0)
        {
          c = sortchar2 [(unsigned char) *src];
          i = 0;
          src++;
        }
      if (c == '\0')
        {
          c = sortchar [(unsigned char) *src];
          i = 1;
        }

      if (c != '\0'
      && (c != ' '  ||  n <= 0  ||  dest[n-1] != ' '))
        {
          if ((run_mode & COMPRESS_MULTIPLE_CHARS)
          &&  n > 0  &&  c == dest[n-1])
            {
              n--;
            }
          dest[n] = c;
          n++;
        }
    }

  dest[n] = '\0';
  return (n);
}





struct LEV_RESULT lev_x (char *pattern,
          char *text, char *desc, int max_points, int run_mode)
{
  struct LEV_RESULT l_res;
  int limit,diff;
  int empty_diff;
  int trigrams = 0;
  int points,unknown;

  if (max_points < 100)
    {
     max_points = 100;
    }

  l_res = calculate_limit (pattern, text, run_mode);

/***
  max_points = l_res.max_points;
***/
  limit = l_res.points;
  empty_diff = l_res.empty_diff;
  unknown = l_res.diff;

  if (empty_diff != 0)
    {
     l_res = calculate_points
                 (100, limit, 0, unknown, empty_diff, max_points);

     points = l_res.points;
     max_points = l_res.max_points;
     diff = l_res.diff;
     empty_diff = l_res.empty_diff;
    }
  else
    {
     /****  compare "pattern" with "text"  ****/
     diff = 0;
     if (strcmp (text,pattern) != 0)
       {
        diff = lev_diff (pattern, text, limit+50, & trigrams, run_mode);
       }

     points = 0;
     if (diff <= limit + 200  ||  trigrams >= 3)
       {
        l_res = calculate_points
                    (diff, limit, trigrams, unknown, 0, max_points);

        points = l_res.points;

      /***
        max_points = l_res.max_points;
        diff = l_res.diff;
        empty_diff = l_res.empty_diff;
      ***/

        if (diff == 0)
          {
           max_points = points;
          }
       }
     else
       {
        unknown = 0;
       }
    }

  if (run_mode & (TRACE_ADDR | TRACE_LEV))
    {
     printf ("%s", desc);
     print_number (":  points", points, -1);
     print_number (",  max_points", max_points, -1);
     print_number (",  diff", diff, -1);
     print_number (",  empty_diff", empty_diff, -1);
     printf ("\n");
    }

  l_res.points = points;
  l_res.max_points = max_points;
  l_res.diff = diff;
  l_res.empty_diff = empty_diff;

  return (l_res);
}




struct LEV_RESULT lev_zipcode_city (char *zip_code1, char *zip_code2, 
    char *city1, char *city2, char *desc, 
    int max_points, int max_points_city, int run_mode)
{
  struct LEV_RESULT l_res;
  int  i,n,limit,diff;
  int  empty_diff;
  /**  int trigrams;  **/
  int  points,unknown;
  char *s,temp1[51];
  char temp2[51];

  if (max_points < 100)
    {
     max_points = 100;
    }

  l_res = calculate_limit (zip_code1, zip_code2, run_mode);

/***
  max_points = l_res.max_points;
***/
  limit = l_res.points;
  empty_diff = l_res.empty_diff;
  unknown = l_res.diff;

  if (unknown == 0)
    {
     limit = 300;
    }

  if (empty_diff != 0)
    {
  /****
     l_res = calculate_points
                 (diff, limit, 0, unknown, empty_diff, max_points);
  ***/

     /****  compare cities instead of ZIP codes  ****/
     if (max_points < max_points_city)
       {
         max_points_city = max_points;
       }
     l_res = lev_x (city1, city2, desc, max_points_city, run_mode);

     points = l_res.points;
     max_points = l_res.max_points;
     diff = l_res.diff;
     empty_diff = l_res.empty_diff;

     return (l_res);
    }
  else
    {
     /****  compare "zip_code1" with "zip_code2"  ****/
     diff = 0;
     if (strcmp (zip_code2,zip_code1) != 0)
       {
        diff = lev_diff (zip_code1, zip_code2, 
                     limit + 100, NULL, run_mode);
       }

     if (diff > 100  &&  zip_code1[0] == zip_code2[0]
     &&  IS_DIGIT (zip_code1[0])
     &&  IS_DIGIT (zip_code1[1]))
       {
        i = (unsigned char) (zip_code1[1]) - (unsigned char) (zip_code2[1]);

        if (diff >= 400  &&  i >= -1  &&  i <= 1)
          {
           /****  ZIP codes are (e.g.) "56xxxx" and "57xxxx"  ****/
           diff -= 100;
          }

        if (diff >= 300  &&  (i < -1  ||  i > 1))
          {
           /****  ZIP codes are (e.g.) "50xxxx" and "57xxxx"  ****/
           diff += 100;
          }

        if (i == 0)
          {
           /****  first two digits are matching  ****/
           diff = 200;
           if (zip_code1[2] == zip_code2[2])
             {
              /****  first three digits are matching  ****/
              diff = 100;
             }
          }

        if ((run_mode & LEV_COMPARE_GERMAN)
        &&  diff > 200)
          {
           /****  check and compare cities  ****/
           up_expand (city1, temp1, 51,0);
           up_expand (city2, temp2, 51,0);

           n = 0;
           if (strcmp (temp1, temp2) == 0)
             {
              for (i=0; (s=zip_german_cities[i]) != NULL; i++)
                {
                 if (s[0] == zip_code1[0]  &&  s[1] == zip_code1[1]
                 &&  strcmp (s+3,temp1) == 0)
                   {
                    n += 1;
                   }

                 if (s[0] == zip_code2[0]  &&  s[1] == zip_code2[1]
                 &&  strcmp (s+3,temp2) == 0)
                   {
                    n += 2;
                   }
                }
             }

           if (n == 3)
             {
              diff = 200;
             }
          }
       }

     points = 0;
     if (diff <= limit + 200)
       {
        l_res = calculate_points
                    (diff, limit, 0, unknown, 0, max_points);

        points = l_res.points;

      /***
        max_points = l_res.max_points;
        diff = l_res.diff;
        empty_diff = l_res.empty_diff;
      ***/

        if (diff == 0)
          {
           max_points = points;
          }
       }
     else
       {
        unknown = 0;
       }
    }

  if (run_mode & (TRACE_ADDR | TRACE_LEV))
    {
     printf ("%s", desc);
     print_number (":  points", points, -1);
     print_number (",  max_points", max_points, -1);
     print_number (",  diff", diff, -1);
     print_number (",  empty_diff", empty_diff, -1);
     printf ("\n");
    }

  l_res.points = points;
  l_res.max_points = max_points;
  l_res.diff = diff;
  l_res.empty_diff = empty_diff;

  return (l_res);
}




struct LEV_RESULT lev_2_ph (char *a_text, char *a2_text,
    char *a_text_ph, char *a2_text_ph,
    char *desc, int max_points, int run_mode)
{
  struct LEV_RESULT l_res;
  int  n,limit;
  int  diff,points;
  int  empty_diff;
  int  trigrams = 0;
  int  unknown;
  double shrink_factor_ph = 0.1;
#ifdef USE_PHONET
  char a_temp[LENGTH_INTERNAL_VAR+1];
  char a2_temp[LENGTH_INTERNAL_VAR+1];
#endif

  if (run_mode & TRACE_LEV)
    {
     printf ("Strings before \"lev_2_ph\": '%s'%s'\n", a_text, a2_text);
    }

  l_res = calculate_limit (a_text, a2_text, run_mode);

/***
  max_points = l_res.max_points;
***/
  limit = l_res.points;
  empty_diff = l_res.empty_diff;
  unknown = l_res.diff;

#ifdef SHRINK_FACTOR_PH
    shrink_factor_ph = SHRINK_FACTOR_PH;
#endif

  if (empty_diff != 0)
    {
     l_res = calculate_points
                 (100, limit, 0, unknown, empty_diff, max_points);

     points = l_res.points;
     max_points = l_res.max_points;
     diff = l_res.diff;
     empty_diff = l_res.empty_diff;
    }
  else
    {
     /****  compare "a_text" with "a2_text"  ****/
     diff = 0;
     if (strcmp (a_text,a2_text) != 0)
       {
        diff = lev_diff (a_text, a2_text,
                   limit + 50, & trigrams, run_mode);
       }

     if (diff > 100)
       {
   #ifdef USE_PHONET
        shrink_factor_ph = PHONET_SHRINK_FACTOR;

        if (run_mode & LEV_COMPARE_GERMAN)
          {
           /****  do phonetic conversion with "phonet"  ****/
           if (a_text_ph == NULL  ||  a_text_ph[0] == '\0')
             {
              (void) phonet (a_text, a_temp, LENGTH_WHOLE_NAME+1, PHONET_MODE);
              a_text_ph = a_temp;
             }

           if (a2_text_ph == NULL  ||  a2_text_ph[0] == '\0')
             {
              (void) phonet (a2_text, a2_temp, LENGTH_WHOLE_NAME+1, PHONET_MODE);
              a2_text_ph = a2_temp;
             }
          }
   #endif

        if (a_text_ph != NULL   &&  a_text_ph[0] != '\0'
        &&  a2_text_ph != NULL  &&  a2_text_ph[0] != '\0')
          {
           /****  compare phonetic variables  ****/
           n = lev_diff  (a_text_ph, a2_text_ph, limit,
                    & trigrams, run_mode);

           n = (int) ((4L * (long)(n+100)) / 3L);

           if (n < diff)
             {
              diff = n;
              max_points = (int) ((double)max_points * shrink_factor_ph);
             }
          }
       }

     points = 0;
     if (diff <= limit + 200  ||  trigrams >= 3)
       {
        l_res = calculate_points
                    (diff, limit, trigrams, unknown, 0, max_points);

        points = l_res.points;

      /***
        max_points = l_res.max_points;
        diff = l_res.diff;
        empty_diff = l_res.empty_diff;
      ***/

        if (diff == 0)
          {
           max_points = points;
          }
       }
     else
       {
        unknown = 0;
       }

     if (diff > limit + 200  &&  limit > 100)
       {
        /****  Look for names like "Miller" vs. "Miller-Smith"  ****/
        l_res = lev_2_name (a_text, a2_text, run_mode);
        n = l_res.diff;

        if (n+n <= limit)
          {
           shrink_factor_ph = 1.5 * (double) l_res.points;
           if (l_res.max_points > 0  &&  l_res.empty_diff > 0)
             {
              shrink_factor_ph /= (double) (l_res.max_points * l_res.empty_diff);
             }
           if (shrink_factor_ph > 1.0)
             {
              shrink_factor_ph = 1.0;
             }

           diff = n + 100;
           l_res = calculate_points
                       (n, limit, -123, unknown, 0, max_points);

           points = (int) ((double) l_res.points * shrink_factor_ph);
          }
       }
    }

  if (run_mode & (TRACE_ADDR | TRACE_LEV))
    {
     printf ("%s", desc);
     print_number (":  points", points, -1);
     print_number (",  max_points", max_points, -1);
     print_number (",  diff", diff, -1);
     print_number (",  empty_diff", empty_diff, -1);
     printf ("\n");
    }

  l_res.points = points;
  l_res.max_points = max_points;
  l_res.diff = diff;
  l_res.empty_diff = empty_diff;

  return (l_res);
}




#ifdef LEV100_EXECUTABLE

int main (int argc, char *argv[])
{
  int  i,k,n,x;
  int  errors;
  char *s1, text_1a[21], text_1b[21], text_1c[21];
  char *s2, text_2a[21], text_2b[21], text_2c[21];

  if (argc >= 3)
    {
      (void) lev_diff (argv[1], argv[2], 10000, NULL, (LEV_COMPARE_GERMAN | TRACE_LEV));
      return (0);
    }

  printf ("Consistency check for the phonetically extended Levenshtein function:\n");
  errors = 0;

  for (i=0; ph_rules_german[i].text_1 != NULL; i++)
    {
      sprintf (text_1a, "%s;789", ph_rules_german[i].text_1);
      sprintf (text_1b, "123-%s", ph_rules_german[i].text_1);
      sprintf (text_1c, "123-%s;789", ph_rules_german[i].text_1);

      sprintf (text_2a, "%s;789", ph_rules_german[i].text_2);
      sprintf (text_2b, "123-%s", ph_rules_german[i].text_2);
      sprintf (text_2c, "123-%s;789", ph_rules_german[i].text_2);

      n = ph_rules_german[i].ph_diff;

      for (k=1; k<=6; k++)
        {
          switch (k)
            {
              case 1 :
              case 2 :  s1 = text_1a;
                        s2 = text_2a;
                  break;

              case 3 :
              case 4 :  s1 = text_1b;
                        s2 = text_2b;
                  break;

              case 5 :
              default:  s1 = text_1c;
                        s2 = text_2c;
                  break;
            }

          if (k % 2 == 1)
            {
              x = lev_diff (s1,s2, 10000, NULL, (LEV_SKIP_UPEXPAND | LEV_COMPARE_GERMAN));
            }
          else
            {
              x = lev_diff (s2,s1, 10000, NULL, (LEV_SKIP_UPEXPAND | LEV_COMPARE_GERMAN));
            }

          if (x != n
          && ! (strcmp (ph_rules_german[i].text_2,"") == 0
           &&  (int)strlen(ph_rules_german[i].text_1) >= 3
           &&  n >= 200  &&  x == n-50))
            {
              errors++;
              printf ("Error in rule %d (%d): '%s'%s' lev = %d --> %d\n",
                  i+1, k, s1,s2, x,n);
            }
        }
    }

  if (errors > 0)
    {
      printf ("\n");
      printf ("Total number of errors is: %d\n", errors);
    }
  else
    {
      printf ("No errors found.\n");
    }

  return (0);
}

#endif

/************************************************************/
/****  end of file "lev100ph.c"  ****************************/
/************************************************************/
