root/check.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mlsepcheck
  2. js_check
  3. jc_check
  4. jp_check
  5. ks_check
  6. kc_check
  7. kp_check

   1 /*
   2  *   Compound Text word statistics and search report
   3  *
   4  *   check.c : Separator check 
   5  *
   6  *                                           Copyright(c) isao yasuda, 1998 
   7  */
   8 static char *rcs_id = "$Id: check.c,v 1.2 2007/09/09 11:19:56 isao Exp $";
   9 
  10 #include <stdio.h>
  11 #include "staslova.h"
  12 #include "table.h"
  13 
  14 unsigned char *js_check(unsigned char *);
  15 unsigned char *jc_check(unsigned char *);
  16 unsigned char *jp_check(unsigned char *);
  17 unsigned char *ks_check(unsigned char *, unsigned char *);
  18 unsigned char *kc_check(unsigned char *, unsigned char *);
  19 unsigned char *kp_check(unsigned char *, unsigned char *, unsigned char);
  20 
  21 int
  22 mlsepcheck(unsigned char **wd, unsigned char l)
  23 {
  24   unsigned char *k1, *k2, *k3;
  25   unsigned char *res;
  26 
  27   if (**wd <= ' ')
  28     return SEP;
  29 
  30   if (l == JIS) { /* JIS kanji */
  31     /* separate check 1 : independent separator */
  32     if (*(res = js_check(*wd)) == ' ' || *res == '\0') {
  33       *wd = res;
  34       return SEP;
  35     /* separate check 2 : sequential separator */
  36     } else if (*(res = jc_check(*wd)) == ' ' || *res == '\0') {
  37       *wd = res;
  38       return SEP;
  39     /* separate check 3 : coupling separator */
  40     } else if (*(res = jp_check(*wd)) == ' ' || *res == '\0') {
  41       *wd = res;
  42       return SEP;
  43     }
  44   } else { /* ASCII, ISO-8859-X */
  45     switch (l) {
  46     case ASCII  : k1 = ks0; k2 = kc0; k3 = kp0; break;
  47     case LATIN1 : k1 = ks1; k2 = kc1; k3 = kp1; break;
  48     case CYRIL  : k1 = ks5; k2 = kc5; k3 = kp5; break;
  49     case LATIN2 : k1 = ks2; k2 = kc2; k3 = kp2; break;
  50     default     : return NOSEP;
  51     }
  52     /* separate check 1 : independent separator */
  53     if (*(res = ks_check(*wd, k1)) == ' ' || *res == '\0') {
  54       *wd = res;
  55       return SEP;
  56     /* separate check 2 : sequential separator */
  57     } else if (*(res = kc_check(*wd, k2)) == ' ' || *res == '\0') {
  58       *wd = res;
  59       return SEP;
  60     /* separate check 3 : coupling separator */
  61     } else if (*(res = kp_check(*wd, k3, l)) == ' ' || *res == '\0') {
  62       *wd = res;
  63       return SEP;
  64     }
  65   }
  66   return NOSEP;
  67 }
  68     
  69 unsigned char *  /* when it's a separator return address of ' ' or '\0' */
  70 js_check(unsigned char *s)
  71 {
  72   unsigned char fc;
  73   unsigned char rc;
  74   unsigned char *sp1;
  75 
  76   for (sp1 = js, fc = *s, rc = *(s+1); \
  77        fc != '\0' && *sp1 != '\0'; sp1++) {
  78     if (*(sp1+2) != ' ') { /* range sign check */
  79       if ((fc == *sp1++) && (rc == *sp1)) { 
  80         *s++ = ' '; *s = ' ';
  81         return s;
  82       }
  83     } else { /* range check */
  84       if (((fc > *sp1) && (fc < *(sp1+3))) || \
  85           ((fc > *sp1) && (fc == *(sp1+3)) && (rc <= *(sp1+4))) || \
  86           ((fc == *sp1) && (rc >= *(sp1+1)) && (fc < *(sp1+3))) || \
  87           ((fc == *sp1) && (rc >= *(sp1+1)) && \
  88            (fc == *(sp1+3)) && (rc <= *(sp1+4)))) {
  89         *s++ = ' '; *s = ' ';
  90         return s;
  91       }
  92       sp1 += 4;
  93     }
  94   }
  95   return s;
  96 }
  97 
  98 unsigned char *  /* when it's a separator return address of ' ' or '\0' */
  99 jc_check(unsigned char *s)
 100 {
 101   unsigned char fc;
 102   unsigned char rc;
 103   unsigned char *sp1;
 104   
 105   for (sp1 = jc, fc = *s; \
 106        *s != '\0' && *sp1 != '\0'; sp1++) {
 107     if (*sp1 == ' ') {
 108       fprintf(stderr, "Sorry jc not support range `:\' yet.(mlsepcheck)\n");
 109       sp1++;
 110     }
 111     while (!strncmp(sp1, s, 2)) {
 112       *s = ' '; *(s+1) = ' ';
 113       s +=2;
 114       fc = '\0';
 115     }
 116     if (fc == '\0') {
 117       s--;
 118       return s;
 119     }
 120     sp1++;
 121   }
 122   return s;
 123 }      
 124 
 125 unsigned char *  /* when it's a separator return address of ' ' or '\0' */
 126 jp_check(unsigned char *s)
 127 {
 128   unsigned char fc;
 129   unsigned char rc;
 130   unsigned char *sp1;
 131   unsigned char *sa;
 132 
 133   for (sp1 = jp, fc = *s, rc = *(s+1); \
 134        fc != '\0' && *sp1 != '\0'; sp1++) {
 135     if (*sp1 == ' ') {
 136       fprintf(stderr, "Sorry jp not support range `:\' yet.(mlsepcheck)\n");
 137       sp1++;
 138     }
 139     if ((fc == *sp1++) && (rc == *sp1)) {
 140       if (wc == 0) {
 141         *s++ = ' '; *s = ' ';
 142         return s;
 143       } else {
 144         sa = s; 
 145         s += 2;
 146         if ((mlsepcheck(&s, JIS)) == SEP) { /* minaosi ga hituyou ! */
 147           *sa++ = ' '; *sa = ' ';
 148         }
 149         s = sa;
 150         return s;
 151       }
 152     }
 153   }
 154   return s;
 155 }
 156 
 157 unsigned char *  /* when it's a separator return address of ' ' or '\0' */
 158 ks_check(unsigned char *s, unsigned char *k1)
 159 {
 160   unsigned char fc;
 161   unsigned char *sp1;
 162 
 163   for (sp1 = k1, fc = *s; *sp1 != fc && *sp1 != '\0'; sp1++);
 164   if (*sp1 != '\0')
 165     *s = ' ';
 166   return s;
 167 }
 168 
 169 unsigned char *  /* when it's a separator return address of ' ' or '\0' */
 170 kc_check(unsigned char *s, unsigned char *k2)
 171 {
 172   unsigned char fc;
 173   unsigned char *sp1;
 174   int i;
 175 
 176   for (sp1 = k2, fc = *s; *sp1 != fc && *sp1 != '\0'; sp1++);
 177   if (*sp1 != '\0') {
 178     for (i = 0; *sp1 == *s; i++)
 179       *s++ = ' ';
 180     if (i == 1)
 181       *--s = *sp1;
 182   }
 183   return s;
 184 }
 185 
 186 unsigned char *  /* when it's a separator return address of ' ' or '\0' */
 187 kp_check(unsigned char *s, unsigned char *k3, unsigned char l)
 188 {
 189   unsigned char fc;
 190   unsigned char *sp1;
 191   unsigned char *sa;
 192 
 193   for (sp1 = k3, fc = *s; *sp1 != fc && *sp1 != '\0'; sp1++);
 194   if (*sp1 != '\0')
 195     if (wc == 0)
 196       *s = ' ';
 197     else {
 198       sa = s++;
 199       if ((mlsepcheck(&s, l)) == SEP) 
 200         *sa = ' ';
 201       s = sa;
 202     }
 203   return s;
 204 }
 205 
 206 

/* [previous][next][first][last][top][bottom][index][help] */