/* KWIC (Keyword in context generator) $Id: kwic.c,v 1.4 2002/12/23 22:38:12 harding Exp $ - If a stoplist is supplied we only show non-stopped items. - If a formstable is supplied we map forms to root items where we can. - But in the generated table, the *context* we show for the selected items is non-stopped and not mapped to roots: for display purposes, it is closer to the text content. Output looks like (line #'s down the left): 5 easy under the appleboughs About the lilting house and 15 carefree famous among barns About the happy yard and 6 was green The night above the dingle starry Time 36 all Shining it was Adam and maiden The sky 39 it must have been after the birth of the 37 maiden The sky gathered again And the sun grew 25 the chimneys it was air And playing lovely and 49 blue trades that time allows In all his tuneful 56 the moon that is always rising Nor that riding 9 his eyes and honoured among wagons I was prince */ #include #include #include #include #include #include #include #define KWIC_CONTEXT 4 /* how many words on either side? */ void print_kwic(lxLexlist *list); void print_kwicitem(lxContext *cntx, lxLexitem *item, char *root); void usage(void) { fprintf(stderr, "usage: kwic [-s stoplist] [-f formstable] textfile\n"); exit(1); } int main(int argc, char **argv) { int ch; char *stoplist; char *formstable; lxLexlist *list; stoplist = NULL; formstable = NULL; while ((ch = getopt(argc, argv, "s:f:")) != -1) { switch(ch) { case 's': stoplist = optarg; break; case 'f': formstable = optarg; break; default: usage(); break; } } argc -= optind; argv += optind; if (argc < 1) usage(); /* * parse markers; this could be an option except that * the plain parser doesn't break out newlines. But * the marker parser will become the default parser * soon. */ lx_readtoken = lx_marker_readtoken; list = lx_newlexlist("temp"); if (!list) { perror("malloc"); exit(1); } if (lx_loadlist(list, argv[0]) == -1) { fprintf(stderr, "error loading \"%s\": %s\n", argv[0], lx_strerr(lxerrnum)); exit(1); } if (stoplist && lx_loadstops(list, stoplist) == -1) { perror(stoplist); exit(1); } if (formstable && lxf_loadforms(list, formstable) == -1) { perror(formstable); exit(1); } print_kwic(list); return 0; } void print_kwic(lxLexlist *list) { char *root; lxTrav *lxtv; lxIndexnode *inode; lxLexitem *item; lxContext *cntx; /* * set up context for lexical items (both stopped and non-stopped) */ cntx = lx_newcontext(list, LX_SELLEX, KWIC_CONTEXT, KWIC_CONTEXT); if (!cntx) { perror("malloc"); /* XX wrong */ exit(1); } lxtv = lx_travinit(list, LX_SELNSLEX); if (!lxtv) { perror("malloc"); exit(1); } for (inode = lx_travfirstindex(lxtv); inode; inode = lx_travnextindex(lxtv)) { root = lx_derefindextoken(inode, LX_DEREFROOT); for (item = lx_firstnsoccur(inode); item; item = lx_nextnsoccur(item)) { print_kwicitem(cntx, item, root); } } return; } void print_kwicitem(lxContext *cntx, lxLexitem *item, char *root) { int i, len, lead; (void)lx_getcontext(cntx, item); i = lxp_getlineno(item); printf("%5d ", (i) ? i : -1); /* length of tokens + 1 per token for spaces */ len = lx_llenfromcontext(cntx) + lx_lnumfromcontext(cntx); if ((lead = (30 - len)) > 0) { for (i = 0; i < lead; i++) putchar(' '); } for (item = lx_lfirstfromcontext(cntx); item; item = lx_lnextfromcontext(cntx)) printf("%s ", lx_token(item)); printf(" %-14.14s ", root); for (item = lx_rfirstfromcontext(cntx); item; item = lx_rnextfromcontext(cntx)) printf("%s ", lx_token(item)); printf("\n"); return; } /* END */