root/separate.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. sepextend

   1 /*
   2  *   Compound Text word statistics and search report
   3  *
   4  *   separate.c : separator definition parameter extension
   5  *
   6  *                                           Copyright(c) isao yasuda, 1998 
   7  */
   8 static char *rcs_id = "$Id: separate.c,v 1.3 2008/05/04 13:05:00 isao Exp $";
   9 
  10 #include <stdio.h>
  11 #include <string.h>
  12 #include "staslova.h"
  13 #include "table.h"
  14 
  15 int
  16 sepextend(unsigned char *text)
  17 {
  18   unsigned char buf[MAXJS+1];
  19   unsigned char *k = buf;
  20   unsigned char *x;
  21   int c, r, i, max;
  22   int mode = 0;
  23   unsigned char hex[5]= " ";
  24 
  25   if ((c = *text++) == 'k') { /* Latin */
  26     if (*text == 's') {
  27       switch (*++text) {
  28       case '0' : x = &ks0[0]; c = ' '   ; max = MAXKS; break; 
  29       case '1' : x = &ks1[0]; c = (unsigned char) '\xA0'; max = MAXKS; break;
  30       case '2' : x = &ks2[0]; c = (unsigned char) '\xA0'; max = MAXKS; break;
  31       case '5' : x = &ks5[0]; c = (unsigned char) '\xA0'; max = MAXKS; break;
  32       default  :
  33         fprintf(stderr, "Unknown ks%c\n", *text);
  34         return 1;
  35       }
  36     } else if (*text == 'c') {
  37       switch (*++text) {
  38       case '0' : x = &kc0[0]; c = ' '   ; max = MAXKC; break; 
  39       case '1' : x = &kc1[0]; c = (unsigned char) '\xA0'; max = MAXKC; break;
  40       case '2' : x = &kc2[0]; c = (unsigned char) '\xA0'; max = MAXKC; break;
  41       case '5' : x = &kc5[0]; c = (unsigned char) '\xA0'; max = MAXKC; break;
  42       default  :
  43         fprintf(stderr, "Unknown kc%c\n", *text);
  44         return 1;
  45       }
  46     } else if (*text == 'p') {
  47       switch (*++text) {
  48       case '0' : x = &kp0[0]; c = ' '   ; max = MAXKP; break; 
  49       case '1' : x = &kp1[0]; c = (unsigned char) '\xA0'; max = MAXKP; break;
  50       case '2' : x = &kp2[0]; c = (unsigned char) '\xA0'; max = MAXKP; break;
  51       case '5' : x = &kp5[0]; c = (unsigned char) '\xA0'; max = MAXKP; break;
  52       default  :
  53         fprintf(stderr, "Unknown kp%c\n", *text);
  54         return 1;
  55       }
  56     } else {
  57       fprintf(stderr, "Unknown command k%c\n", *text);
  58       return 1;
  59     }
  60     while (*++text)
  61       if (*text > c)
  62         *k++ = *text;
  63     *k = '\0';
  64     if ((strlen(x) + strlen(&buf[0])) > max) {
  65       fprintf(stderr, "Separator definition size limit over.\n");
  66       return 1;
  67     }
  68     strcat(x, buf);
  69     
  70   } else { /* JIS kanji */ 
  71     if (c == 'j') {
  72       switch (c = *text++) {
  73       case 's' : x = &js[0]; c = ' '   ; max = MAXKP; break; /* js */
  74       case 'c' : x = &jc[0]; c = ' '   ; max = MAXKP; break; /* jc */
  75       case 'p' : x = &jp[0]; c = ' '   ; max = MAXKP; break; /* jp */
  76       default  : 
  77         fprintf(stderr, "Unknown command j%c\n", c);
  78         return 1;
  79       }
  80       text++;
  81       while (*text != '\0' && *text != '\n') {
  82         switch (*text) {
  83         case ESC :
  84           if (strncmp(text, ESCKANJI, 3) == 0) {
  85             text += 4;
  86             mode = 1;
  87           } else if (strncmp(text, ESCASCII, 3) == 0) {
  88             text += 3;
  89             mode = 0;
  90           } else {
  91             fprintf(stderr, "Unexpected ESC sequence.(separate)\n");
  92             return 1;
  93           }
  94           break;
  95         case ':' :
  96           if (mode == 0) { /* range sign rewrite by space*/
  97             *k++ = ' ';
  98             text++;
  99           } else { /* part of kanji */
 100             *k++ = *text++; *k++ = *text++;
 101           }
 102           break;
 103         case ' ' :
 104         case '\t' :
 105           text++;
 106           break;
 107         default :
 108           if (mode ==1) { /* kanji */
 109             *k++ = *text++; *k++ = *text++;
 110             break;
 111           } else { /* hexadecimal conversion of kanji */
 112             i = 0;
 113             if (isxdigit(r = *text)) {
 114               hex[i++] = *text++;
 115             } else {
 116               fprintf(stderr, "Invalid digit %c.(separate)\n", r);
 117               return 1;
 118             } 
 119             if (isxdigit(r = *text)) {
 120               hex[i++] = *text++; hex[i] = '\0';
 121             } else {
 122               fprintf(stderr, "Invalid digit %c.(separate)\n", r);
 123               return 1;
 124             }
 125             sscanf(&hex[0], "%x", &r);
 126             sprintf(k++, "%c", r);
 127           }         
 128         }
 129       }
 130       *k = '\0';
 131       if ((strlen(x) + strlen(&buf[0])) > max) {
 132         fprintf(stderr, "Separator definition size limit over.\n");
 133         return 1;
 134       }
 135       strcat(x, buf);
 136     } else {
 137       fprintf(stderr, "Unsupport function.(separate)\n");
 138       return 1;
 139     }
 140   }
 141   return 0;
 142 }
 143 
 144 
 145 
 146 
 147 
 148 
 149 
 150 
 151 
 152 

/* [previous][next][first][last][top][bottom][index][help] */