root/mlfunc.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mlmdset
  2. mlstrchr
  3. mlchrcmp
  4. mlchrinc
  5. mlchrlower
  6. mlstrcmp
  7. mlchrtype
  8. mlesccheck

   1 /*
   2  *   Compound Text word statistics and search report
   3  *
   4  *   mlfunc.c : functions for multilingal word operation
   5  *
   6  *                                           Copyright(c) isao yasuda, 1998 
   7  */
   8 static char *rcs_id = "$Id: mlfunc.c,v 1.2 2007/09/09 11:19:56 isao Exp $";
   9 
  10 #include <stdio.h>
  11 #include "staslova.h"
  12 #include "table.h"
  13 
  14 unsigned char
  15 mlmdset(unsigned char **cwa, unsigned char *g0, unsigned char *g1)
  16 {
  17   unsigned char *cw = *cwa;
  18   unsigned char c;
  19 
  20   if (*cw != '\x1B')
  21     return;
  22   if ((c = *++cw) == '\x28') {
  23     if (*++cw == '\x42') {
  24       *g0 = ASCII;
  25       cw++;
  26     } else {
  27       fprintf(stderr, "Unsupported ESC sequence x1B%X.(mlmdset)\n", (int) c);
  28       fprintf(stderr, "line no : %d\n", lno);
  29       *g0 = ASCII;
  30       cw++;
  31     }
  32   } else if (c == '\x24') {
  33     if (*++cw == '\x28') { 
  34       *g0 = JIS;
  35       cw += 2;
  36     }
  37   } else if (c  == '\x2D') {
  38     switch (c = *++cw) {
  39     case '\x41' : *g1 = LATIN1; break;
  40     case '\x42' : *g1 = LATIN2; break;
  41     case '\x4C' : *g1 = CYRIL;  break;
  42     default     : *g1 = OTHER;  break;
  43     }
  44     cw++;
  45   } else {
  46     fprintf(stderr, "Unsupported ESC sequence x1B%X.(mlmdset)\n", (int) c);
  47     fprintf(stderr, "line no : %d\n", lno);
  48     *g1 = LATIN1;
  49     cw += 2;
  50   }
  51   *cwa = cw;
  52   return *cw;
  53 }
  54 
  55 unsigned char *
  56 mlstrchr(unsigned char *tp, unsigned char *tg0, unsigned char *tg1,\
  57          unsigned char *cp, unsigned char *cg0, unsigned char *cg1)
  58 {
  59   if (*cp == ESC)
  60     mlmdset(&cp, cg0, cg1);
  61   if (*cp == '\0')
  62     return NULL;
  63   while (*tp != '\0') {
  64     if (*tp == ESC)
  65       mlmdset(&tp, tg0, tg1);
  66     if ((mlchrcmp(tp, tg0, tg1, cp, cg0, cg1)) == MATCH)
  67       return tp;
  68     mlchrinc(&tp, tg0, tg1);
  69   }
  70   return NULL;
  71 }
  72 
  73 int
  74 mlchrcmp(unsigned char *tw, unsigned char *tg0, unsigned char *tg1,\
  75          unsigned char *cw, unsigned char *cg0, unsigned char *cg1)
  76 {
  77   int ans;
  78 
  79   if (*tw == ESC || *tw == '\0')
  80     return NEXT;
  81   if (*cw == ESC || *cw == '\0')
  82     return NEXT;
  83 
  84   if (*cw < (unsigned char) '\xA0') {
  85     if (*tg0 == *cg0)
  86       if (*tw == *cw)
  87         if (*cg0 == JIS)
  88           if (*(tw+1) == *(cw+1))
  89             ans = MATCH;
  90           else
  91             ans = UNMATCH;
  92         else
  93           ans = MATCH;
  94       else
  95         ans = UNMATCH;
  96     else
  97       ans = UNMATCH;
  98   } else {
  99     if (*tg1 == *cg1)
 100       if (*tw == *cw)
 101         ans = MATCH;
 102       else
 103         ans = UNMATCH;
 104     else
 105       ans = UNMATCH;
 106   }
 107   return ans;
 108 }
 109 
 110 unsigned char
 111 mlchrinc(unsigned char **cwa, unsigned char *g0, unsigned char *g1)
 112 {
 113   unsigned char c;
 114   unsigned char *cw = *cwa;
 115 
 116   while (*cw == ESC)
 117     mlmdset(&cw, g0, g1);
 118   if (*cw != '\0') 
 119     if (*cw < (unsigned char) '\xA0')
 120       if (*g0 == JIS) 
 121         cw += 2;
 122       else
 123         cw++;
 124     else
 125       cw++;
 126   while (*cw == ESC)
 127     mlmdset(&cw, g0, g1);
 128   *cwa = cw;
 129   return *cw;
 130 }
 131 
 132 unsigned char *
 133 mlchrlower(unsigned char *p)
 134 {
 135   unsigned char g0 = ASCII;
 136   unsigned char g1 = LATIN1;
 137   unsigned char *ps = p;
 138   
 139   while (*p != '\0') {
 140     while (*p == ESC)
 141       mlmdset(&p, &g0, &g1);
 142     if (*p == '\0')
 143       return ps;
 144     if (*p < (unsigned char) '\xA0') {
 145       if (g0 == ASCII) {
 146         if (*p >= 'A' && *p <= 'Z')
 147           *p = *p + 'a' - 'A';
 148         p++;
 149       } else
 150         p += 2;
 151     } else {
 152       switch (g1) {
 153       case LATIN1 :
 154         if (*p >= (unsigned char) '\xC0' && *p <= (unsigned char) '\xDE' \
 155             && *p != (unsigned char) '\xD7')
 156           *p = *p + (unsigned char) '\x20';
 157         break;
 158       case LATIN2 :
 159         if (*p >= (unsigned char) '\xA1' && *p <= (unsigned char) '\xAF' \
 160             && *p != (unsigned char) '\xA0' && *p != (unsigned char) '\xA2'\
 161             && *p != (unsigned char) '\xA4' && *p != (unsigned char) '\xA7'\
 162             && *p != (unsigned char) '\xA8' && *p != (unsigned char) '\xAD')
 163           *p = *p + (unsigned char) '\x10';
 164         if (*p >= (unsigned char) '\xC0' && *p <= (unsigned char) '\xDE' \
 165             && *p != (unsigned char) '\xD7')
 166           *p = *p + (unsigned char) '\x20';
 167         break;
 168       case CYRIL  :
 169         if (*p >= (unsigned char) '\xA1' && *p <= (unsigned char) '\xAF' \
 170             && *p != (unsigned char) '\xAD')
 171           *p = *p + (unsigned char) '\x50';
 172         if (*p >= (unsigned char) '\xB0' && *p <= (unsigned char) '\xCF')
 173           *p = *p + (unsigned char) '\x20';
 174         break;
 175       case OTHER  :
 176       default :
 177         break;
 178       }
 179       p++;
 180     }
 181   }
 182   return ps;
 183 }
 184 
 185 int 
 186 mlstrcmp(unsigned char *s, unsigned char *t)
 187 {
 188   for ( ; *s == *t && *s != '\0'; s++, t++);
 189   return *s - *t;
 190 }
 191 
 192 unsigned char
 193 mlchrtype(unsigned char **cwa, unsigned char *cg0, unsigned char *cg1)
 194 {
 195   unsigned char *cw = *cwa;
 196   unsigned char rtn;
 197   
 198   while (*cw == ESC)
 199     mlmdset(&cw, cg0, cg1);
 200   if (*cw == '\0')
 201     return '\0';
 202   if (*cw < (unsigned char) '\xA0')
 203     if (*cg0 == JIS)
 204       rtn = '9';
 205     else 
 206       switch (*cw) {
 207       case '*' :
 208       case '.' :
 209       case '\\':
 210         rtn = *cw;
 211         break;
 212       default :
 213         rtn = '0';
 214         break;
 215       }
 216   else 
 217     rtn = '1';
 218   *cwa = cw;
 219   return rtn;
 220 }
 221 
 222 unsigned char
 223 mlesccheck(unsigned char *cw)
 224 {
 225   if ((strncmp(cw, ESCASCII, 3)) == 0)
 226     cw += 3;
 227   else if ((strncmp(cw, ESC88591, 3)) == 0)
 228     cw += 3;
 229   else if ((strncmp(cw, ESCASCII ESC88591, 6)) == 0)
 230     cw += 6;
 231   else if ((strncmp(cw, ESC88591 ESCASCII, 6)) == 0)
 232     cw += 6;
 233   return *cw;
 234 }
 235 

/* [previous][next][first][last][top][bottom][index][help] */