/* * bgrep.c * $Id: bgrep.c,v 1.5 2006/06/22 11:45:24 candy Exp candy $ */ #include #include #include #include #include #include #include static char *myname; static int verbose; static int use_bm = 1; #define RING_SIZE 1024 static int fring[RING_SIZE]; static int fr_rp; static int fr_wp; /* RING(-1) = 最後に読んだ文字, RING(-2) = その前に読んだ文字 */ #define RING(i) (fring[(fr_rp + (i) + RING_SIZE) % RING_SIZE]) static int ring_seek_back(int n) { fr_rp = (fr_rp + RING_SIZE - n) % RING_SIZE; return 0; }/* ring_seek_back */ static int ring_getc(FILE *fp) { int ch = EOF; if (fr_rp != fr_wp) { ch = fring[fr_rp]; fr_rp = (fr_rp + 1) % RING_SIZE; } else { ch = getc(fp); fring[fr_wp] = ch; fr_wp = (fr_wp + 1) % RING_SIZE; fr_rp = fr_wp; } return ch; }/* ring_getc */ #define CHARSET (UCHAR_MAX + 1) static int bm_tbl[CHARSET]; static void bm_make_tbl(const unsigned char *key, size_t klen) { int i; for (i = 0; i < CHARSET; i++) bm_tbl[i] = klen; for (i = 0; i < klen - 1; i++) bm_tbl[key[i]] = klen - 1 - i; }/* bm_make_tbl */ static void output_match(FILE *fp, fpos_t pos, size_t klen) { printf("%#llx", pos); if (verbose) { int i; ring_seek_back(klen); for (i = 0; i < klen; i++) { printf(" %02x", ring_getc(fp)); }/* for */ } printf("\n"); }/* output_match */ static int fread_n(FILE *fp, int n) { int i = 0; int ch = 0; while (i++ < n && (ch = ring_getc(fp)) != EOF) ; return ch; }/* fread_n */ static int bm_file(FILE *fp, const unsigned char *key, size_t klen) { int nmatch = 0; fpos_t pos = 0; int ch; bm_make_tbl(key, klen); ch = fread_n(fp, klen); while (ch != EOF) { int match = 0; if (ch == key[klen - 1]) { int k = klen - 2; int r = -2; match = 1; while (match && k >= 0) { match = RING(r) == key[k]; r--; k--; } if (match) { output_match(fp, pos, klen); pos += klen; nmatch++; ch = fread_n(fp, klen); } } if (!match) { int n = bm_tbl[(unsigned char)ch]; ch = fread_n(fp, n); pos += n; } } return nmatch; }/* bm_file */ static int search_file(FILE *fp, const unsigned char *key, size_t klen) { int nmatch = 0; fpos_t pos = 0; int ch; while ((ch = ring_getc(fp)) != EOF) { int match = 0; size_t k = 0; if (ch == key[0]) { k++; while (k < klen && (ch = ring_getc(fp)) != EOF && ch == key[k]) { k++; } if (k == klen) match = 1; } if (match) { output_match(fp, pos, klen); pos += klen; nmatch++; } else { pos++; ring_seek_back(k); } } return nmatch; }/* search_file */ static int fnain(FILE *fp, const unsigned char *key, size_t klen) { int ret; if (use_bm) ret = bm_file(fp, key, klen); else ret = search_file(fp, key, klen); return ret; }/* fnain */ static int nain(const char *name, const unsigned char *key, size_t klen) { int ret = -1; FILE *fp = fopen(name, "r"); if (fp != NULL) { printf("%s\n", name); ret = fnain(fp, key, klen); fclose(fp); } else { fprintf(stderr, "%s: %s: %s\n", myname, name, strerror(errno)); } return ret; }/* nain */ static int atox(int c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'A' && c <= 'F') return c - 'A' + 10; if (c >= 'a' && c <= 'f') return c - 'a' + 10; return 0; }/* atox */ #define isoctal(c) ((c) >= '0' && (c) <= '7') static size_t lex(const char *str, unsigned char *buf, size_t size) { const unsigned char *s = (const unsigned char *)str; unsigned char *d = buf; size_t i = 0; while (i < size && *s != '\0') { int x = 0; switch (*s++) { case '\\': if (*s == 'x') { if (isxdigit(s[1])) { if (isxdigit(s[2])) { x = (atox(s[1]) << 4) | atox(s[2]); s += 3; } else { x = atox(s[1]); s += 2; } } else { x = *s++; } } else if (isoctal(s[0])) { if (isoctal(s[1])) { if (isoctal(s[2])) { x = (((atox(s[0]) << 3) | atox(s[1])) << 3) | atox(s[2]); s += 3; } else { x = (atox(s[0]) << 3) | atox(s[1]); s += 2; } } else { x = atox(s[0]); s += 1; } } else { x = *s++; } break; default: x = s[-1]; break; }/* switch */ *d++ = x; i++; }/* while */ if (verbose & 2) { int j; printf("key:"); for (j = 0; j < i; j++) printf(" %02x", buf[j]); printf("\n"); } return i; }/* lex */ static char *usage_msg = "usage: %s [-bsVv] PATTERN file ...\n" "Search PATTERN from file. Version 0.8086\n" "\t-b\tBoyer-Moore (default)\n" "\t-s\tsimple algorism\n" "\t-v -vv\tverbose\n" "PATTERN:\n" "\tABCabc\tliteral\n" "\t\\ooo\toctal\n" "\t\\xhh\thex\n" ; int main(int argc, char *argv[]) { int ex = 1, ch, show_usage = 0; myname = argv[0]; while ((ch = getopt(argc, argv, "bsVv")) != EOF) { switch (ch) { default: case 'V': show_usage++; break; case 'b': use_bm = 1; break; case 's': use_bm = 0; break; case 'v': verbose++; break; }/* switch */ }/* while */ if (argc - optind == 0) { show_usage++; } if (show_usage) { fprintf(stderr, usage_msg, myname); } else { char *pat = argv[optind]; unsigned char *key = malloc(strlen(pat) + 1); if (key != NULL) { int klen = lex(pat, key, strlen(pat)); if (argc - optind - 1 == 0) { ex = fnain(stdin, key, klen) == 0; } else { int i; for (i = optind + 1; i < argc; i++) { if (nain(argv[i], key, klen) >= 0) { ex = 0; } }/* for */ } } else { fprintf(stderr, "%s: %s\n", myname, strerror(errno)); } } return ex; }/* main */