root/divide.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. getwdarray
  2. getcdarray
  3. divideword
  4. wordgen
  5. wordptgen

   1 /*
   2  *   Compound Text word statistics and search report
   3  *
   4  *   devide.c : multilingal word devision & word array construction
   5  *
   6  *                                           Copyright(c) isao yasuda, 1998 
   7  */
   8 static char *rcs_id = "$Id: divide.c,v 1.3 2008/05/04 13:07:34 isao Exp $";
   9 
  10 #include <stdio.h>
  11 #include <stdlib.h>
  12 #include <string.h>
  13 #include "staslova.h"
  14 #include "table.h"
  15 
  16 unsigned char **
  17 getwdarray(unsigned char *ctext)
  18 {
  19   unsigned char *wp[MAXWORD+1];
  20   unsigned char c;
  21   int no = 0;
  22   
  23   while (*ctext) { /* kind = 0 : operator check off */
  24     if ((c = divideword(&ctext, 0)) != '\0') {
  25       if (no > MAXWORD) {
  26         fprintf(stderr, "Word number limit over.(getwdarray)\n");
  27         exit(1);
  28       }
  29       if ((wp[no++] = wordgen(&word[0])) == NULL) 
  30         exit(1);
  31     } 
  32   }
  33   if (no == 0) {
  34     return NULL;
  35     erc = NOWORDS;
  36   }
  37   return wordptgen(&wp[0], no);
  38 }
  39 
  40 unsigned char **
  41 getcdarray(unsigned char **cdtext)
  42 {
  43   unsigned char *wp[MAXWORD+1];
  44   unsigned char *ctext = *cdtext;
  45   unsigned char c;
  46   int no = 0;
  47   int expf = 0;
  48   
  49   if (stock != NULL) {
  50     wp[no++] = stock;
  51     if (*stock != '(')
  52       expf = 1;
  53     stock = NULL;
  54   }
  55   while (*ctext != '\0' && expf !=2) {
  56     switch (c = divideword(&ctext, 1)) { /* kind = 1 : operator check on */
  57     case (unsigned char)0 : break;
  58     case EXP :
  59       if (casemode == '0')
  60         (unsigned char *)mlchrlower(&word[2]);
  61       if (expf == 1) { 
  62         expf = 2;
  63         if ((stock = wordgen(&word[0])) == NULL)
  64           exit(1);
  65       } else {
  66         expf = 1;
  67         if ((wp[no++] = wordgen(&word[0])) == NULL)
  68           exit(1);
  69       }
  70       break;
  71     case '(' :
  72       if (expf == 1) {
  73         expf = 2;
  74         if ((stock = wordgen(&word[0])) == NULL)
  75           exit(1);
  76       } else {
  77         if ((wp[no++] = wordgen(&word[0])) == NULL)
  78           exit(1);
  79       }
  80       break;
  81     case '*' :
  82     case '+' :
  83     case '#' :
  84       expf = 0;
  85     default :
  86       if ((wp[no++] = wordgen(&word[0])) == NULL)
  87         exit(1);
  88       break;
  89     }
  90   }
  91   *cdtext = ctext;
  92   if (no == 0) {
  93     erc = NOWORDS;
  94     return NULL;
  95   }
  96   return wordptgen(&wp[0], no);
  97 }      
  98 
  99 unsigned char
 100 divideword(unsigned char **string, int kind)
 101 {
 102   unsigned char langp;
 103 
 104   text = *string;
 105   wordp = word + 2;
 106   lang = BASE;
 107   wc = 0;
 108 
 109   for ( ; *text <= ' ' && *text != ESC && *text != '\0'; text++);
 110   while (*text > ' ' || *text == ESC) {
 111     if (*text == ESC) 
 112       mlesccpy();
 113     else if (*text >= ' ' && *text < (unsigned char) '\xA0') {
 114       if (G0 == JIS) {
 115         if (mlsepcheck(&text, JIS) != SEP) {
 116           if (escfg0 == 0) {
 117             mlescinsert(IN);
 118             escfg0 = 1;
 119           }
 120           lang |= JIS;
 121           *wordp++ = *text++; *wordp++ = *text++; wc += 2;
 122         } 
 123       } else {
 124         if (kind) { /* if kind is not 0 operator check on */
 125           switch (type = *text) {
 126           case '*' :
 127           case '+' :
 128           case '#' :
 129             if (*(text-1) == ' ' || *(text-1) == '\t') {
 130               text++;
 131               if ((*text == ' ') || (*text == '\t') || (*text == '(')) { 
 132                 sprintf(&word[0], "%c\0", type);
 133                 *string = text; 
 134                 return type;
 135               } else { 
 136                 text--;
 137               }
 138             }
 139             break;
 140           case '(' :
 141             *string = ++text;
 142             sprintf(&word[0], "%c\0", type);
 143             return type;
 144           case ')' :
 145             if (wc == 0) {
 146               *string = ++text;
 147               sprintf(&word[0], "%c\0", type);
 148               return type;
 149             } else 
 150               break;
 151           case '\\':
 152             *wordp++ = *text++; wc++;
 153             break;
 154           default :
 155             break;
 156           }
 157           while (*text == '*' || *text == '.') {
 158             *wordp++ = *text++; wc++;
 159           }
 160           if (*text == ')')
 161             break;
 162           if (*text == '\\')
 163             continue;
 164         } 
 165         if (*text < (unsigned char) '\xA0')
 166           if (mlsepcheck(&text, ASCII) != SEP) {
 167             lang |= ASCII;
 168             *wordp++ = *text++; wc ++;
 169           }
 170       }
 171     } else { /* *text >= '\xA0' */
 172       switch (G1) {
 173       case LATIN1 : langp = LATIN1; break;
 174       case LATIN2 : langp = LATIN2; break;
 175       case CYRIL  : langp = CYRIL ; break;
 176       default     : langp = BASE  ; break;
 177       }
 178       if (langp != BASE) {
 179         if (mlsepcheck(&text, langp) != SEP) {
 180           if (escfg1 == 0) {
 181             mlescinsert(IN);
 182             escfg1 = 1;
 183           }
 184           lang |= langp;
 185           *wordp++ = *text++; wc ++;
 186         }
 187       } else {
 188         if (escfg1 == 0) {
 189           mlescinsert(IN);
 190           escfg1 = 1;
 191         }
 192         lang |= OTHER;
 193         *wordp++ = *text++; wc++;
 194       }
 195     }
 196   }
 197   if (wc != 0) { /* if no significant characters return null */
 198     mlescinsert(OUT);
 199     *wordp = '\0';
 200     type = EXP;
 201     word[0] = type;
 202     word[1] = lang;
 203   } else
 204     type = '\0';
 205   escfg0 = escfg1 = 0;
 206   *string = text;
 207   return type;
 208 }
 209 
 210 unsigned char *
 211 wordgen(unsigned char *word)
 212 {
 213   unsigned char *p;
 214 
 215   if ((p = (unsigned char *) malloc(strlen(word)+1)) != NULL) 
 216     return strcpy(p, word);
 217   else {
 218     fprintf(stderr, "Word memory allocation failed.(wordgen)\n");
 219     erc = MEMERR;
 220     return NULL;
 221   }
 222 }
 223 
 224 unsigned char **
 225 wordptgen(unsigned char **wp, int no)
 226 {
 227   unsigned char **wk;
 228   unsigned char **p = wk;
 229   if ((wk = (unsigned char **) \
 230        malloc((no + 1) * sizeof(unsigned char *))) != NULL) {
 231     p = wk;
 232     while (no--)
 233       *wk++ = *wp++;
 234     *wk = NULL;
 235     return p;
 236   } else {
 237     fprintf(stderr, "Array table memory allocation failed.(wordptgen)\n");
 238     erc = MEMERR;
 239     return NULL;
 240   }
 241 }
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 

/* [previous][next][first][last][top][bottom][index][help] */