root/unary.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. unarymatch
  2. metaaster
  3. metaperiod
  4. metaescape

   1 /*
   2  *   Compound Text word statistics and search report
   3  *
   4  *   unary.c : multilingal word matching -- unary matching
   5  *
   6  *                                           Copyright(c) isao yasuda, 1998 
   7  */
   8 static char *rcs_id = "$Id: unary.c,v 1.2 2007/09/09 11:19:57 isao Exp $";
   9 
  10 #include <stdio.h>
  11 #include "staslova.h"
  12 #include "table.h"
  13 
  14 int 
  15 unarymatch(unsigned char *twp, unsigned char *cwp)
  16 {
  17   int metaaster(unsigned char **, unsigned char *, unsigned char *, \
  18                 unsigned char **, unsigned char *, unsigned char *);
  19   void metaperiod(unsigned char **, unsigned char *, unsigned char *, \
  20                   unsigned char **, unsigned char *, unsigned char *);
  21   int metaescape(unsigned char **, unsigned char *, unsigned char *, \
  22                  unsigned char **, unsigned char *, unsigned char *);
  23 
  24   unsigned char tg0 = ASCII;
  25   unsigned char tg1 = LATIN1;
  26   unsigned char cg0 = ASCII;
  27   unsigned char cg1 = LATIN1;
  28   unsigned char c;
  29 
  30   mlmdset(&cwp, &cg0, &cg1);
  31   mlmdset(&twp, &tg0, &tg1);
  32 
  33   while (*cwp != '\0' && *twp != '\0') {
  34     switch (c = mlchrtype(&cwp, &cg0, &cg1)) {
  35     case '*' :
  36       cwp++;
  37       switch (metaaster(&twp, &tg0, &tg1, &cwp, &cg0, &cg1)) {
  38       case MATCH   : return MATCH;
  39       case UNMATCH : return UNMATCH;
  40       default      : break; /* NEXT */
  41       }
  42       break;
  43     case '.' :
  44       metaperiod(&twp, &tg0, &tg1, &cwp, &cg0, &cg1);
  45       break;
  46     case '\\':
  47       if ((metaescape(&twp, &tg0, &tg1, &cwp, &cg0, &cg1)) == UNMATCH)
  48         return UNMATCH;
  49       break;
  50     default :
  51       if ((mlchrcmp(twp, &tg0, &tg1, cwp, &cg0, &cg1)) == UNMATCH)
  52         return UNMATCH;
  53       mlchrinc(&twp, &tg0, &tg1);
  54       mlchrinc(&cwp, &cg0, &cg1);
  55       break;
  56     }
  57   }
  58   if (*cwp == *twp)
  59     return MATCH;
  60   else {
  61     c = mlchrtype(&cwp, &cg0, &cg1);
  62     if (*twp == '\0' &&  c == '*')
  63       if (*++cwp == '\0' || (mlesccheck(cwp)) == '\0')
  64         return MATCH;
  65     return UNMATCH;
  66   }
  67 }
  68 
  69 int
  70 metaaster(unsigned char **tp, unsigned char *tg0, unsigned char *tg1, \
  71           unsigned char **cp, unsigned char *cg0, unsigned char *cg1)
  72 {
  73   unsigned char c;
  74   unsigned char *tw = *tp;
  75   unsigned char *cw = *cp;
  76   unsigned char *wkp;
  77   unsigned char *cws;
  78   unsigned char g0s;
  79   unsigned char g1s;
  80   int rtn;
  81 
  82   mlmdset(&cw, cg0, cg1);
  83   mlmdset(&tw, tg0, tg1);
  84   if (*cw == '\0')
  85     return MATCH;
  86   while (*cw != '\0' && *cw != '*' && *tw != '\0') {
  87     switch (c = mlchrtype(&cw, cg0, cg1)) {
  88     case '.' : /* process image `*...' -> `...*' */ 
  89       while (*cw == '.') {
  90         if (*tw == '\0')
  91           return UNMATCH;
  92         cw++;
  93         mlchrinc(&tw, tg0, tg1);
  94       }
  95       break;
  96     default :
  97       if (c == '\\') { 
  98         cws = cw++; g0s = *cg0; g1s = *cg1;
  99       } else {
 100         cws = cw; g0s = *cg0; g1s = *cg1;
 101       }
 102       if ((wkp = mlstrchr(tw, tg0, tg1, cw, cg0, cg1)) != NULL) {
 103         tw = wkp;
 104         mlchrinc(&tw, tg0, tg1);
 105         mlchrinc(&cw, cg0, cg1);
 106         while ((c = mlchrtype(&cw, cg0, cg1)) == '.' || c >= '0') {
 107           if (*tw == '\0')
 108             return UNMATCH;
 109           if (c != '.') { 
 110             if (c == '\\')
 111               cw++;
 112             if ((mlchrcmp(tw, tg0, tg1, cw, cg0, cg1)) == UNMATCH) {
 113               cw = cws; *cg0 = g0s; *cg1 = g1s;
 114               rtn = metaaster(&tw, tg0, tg1, &cw, cg0, cg1);
 115               *tp = tw;
 116               *cp = cw;
 117               return rtn;
 118             }
 119           }
 120           mlchrinc(&cw, cg0, cg1);
 121           c = mlchrtype(&cw, cg0, cg1);
 122           mlchrinc(&tw, tg0, tg1);
 123         }
 124         if (*cw == '\0')
 125           if (*tw == '\0')
 126             return MATCH;
 127           else {
 128             cw = cws; *cg0 = g0s; *cg1 = g1s;
 129             rtn = metaaster(&tw, tg0, tg1, &cw, cg0, cg1);
 130             *tp = tw;
 131             *cp = cw;
 132             return rtn;
 133           }
 134         break;
 135       } else {
 136         return UNMATCH;
 137       }
 138     }
 139   }
 140   *tp = tw;
 141   *cp = cw;
 142   return NEXT;
 143 }
 144 
 145 void
 146 metaperiod(unsigned char **tp, unsigned char *tg0, unsigned char *tg1, \
 147            unsigned char **cp, unsigned char *cg0, unsigned char *cg1)
 148 {
 149   unsigned char *tw = *tp;
 150   unsigned char *cw = *cp;
 151 
 152   mlchrinc(&tw, tg0, tg1);
 153   mlchrinc(&cw, cg0, cg1);
 154   *tp = tw;
 155   *cp = cw;
 156 }
 157 
 158 int
 159 metaescape(unsigned char **tp, unsigned char *tg0, unsigned char *tg1, \
 160            unsigned char **cp, unsigned char *cg0, unsigned char *cg1)
 161 {
 162   unsigned char *tw = *tp;
 163   unsigned char *cw = *cp;
 164 
 165   if (*++cw != '\0') {
 166     if ((mlchrcmp(tw, tg0, tg1, cw, cg0, cg1)) != UNMATCH) {
 167       mlchrinc(&tw, tg0, tg1);
 168       mlchrinc(&cw, cg0, cg1);
 169     } else
 170       return UNMATCH;
 171   }
 172   *tp = tw;
 173   *cp = cw;
 174   return NEXT;
 175 }
 176 
 177 
 178 

/* [previous][next][first][last][top][bottom][index][help] */