/* Make an index file from raw index entries. * * $Id: mkindex.c,v 1.6 2003/03/28 05:10:22 grog Exp $ * */ #include #include #include #include #include #include #define NRECS 16384 /* number of records we can handle in core */ #define BIGBUF 1024 #define ENTLEN 128 /* maxiumum entry length */ #define MIN_KW 9 /* minimum length of keyword */ #define SUBS 8 /* this many substrings (see below) */ int romannumber (char *text); /* these are in roman.c */ char *toroman (int number); char outbuf [BIGBUF]; int outptr; /* pointer in outbuf */ char inbuf [NRECS] [BIGBUF]; char keyword [BIGBUF]; int keywordlen; #define debug 0 /* for testing */ #if debug #define dprintf(a) printf (a) #else #define dprintf(a) #endif struct ref; struct ref { struct ref *next; int firstpage; int lastpage; int isroman; /* this is a roman number (ugh) */ } reference; struct ref *refptr; struct ref *lastref = &reference; /* * To ensure correct indentation, we maintain a list of substrings * representing the text up to a certain level of commas. For * example, * * file system, ufs, creating * * would consist of the substrings * * file system * file system, ufs * file system, ufs, creating * * Store them here. */ char substring [SUBS] [ENTLEN]; /* substrings of current */ int substrings; /* and number currently active */ char letter; /* letter for heading */ void put_line () { char *s; /* pointer in substring */ char *rest; /* differing rest from last entry */ int i; /* level of substring */ int common; /* levels in common with the previous */ int len; /* length of possible substring */ if (*keyword) { /* * Find out if there are commas in the keyword. If so, it may be * time to indent. */ s = keyword; rest = s; common = 0; /* nothing in common yet */ for (i = 0; i < SUBS; i++) { while (*s && *s != ',') /* look for the next comma */ s++; len = s - keyword; /* length of possible substring */ if (*s) /* not at end yet, */ { s++; /* point past the comma */ while (*s == ' ') s++; /* and skip any blanks */ } if ((i >= substrings) || memcmp (substring [i], keyword, len)) /* not the same */ { memcpy (substring [i], keyword, len); /* put in this keyword */ substring [i] [len] = '\0'; /* and delimit string */ substrings = i + 1; /* we currently have this many */ } else { common = i + 1; /* this many levels in common */ rest = s; /* and this is the uncommon text */ } if (! *s) /* got to the end, */ break; } if (isalpha (*keyword) && (toupper (*keyword) != letter)) /* changed initial letter */ { letter = toupper (*keyword); printf (".Indexletter %c\n", letter); } /* XXX */ switch (common) { case 0: printf (".Indexent0 \"%s\" \"", rest); break; case 1: printf (".Indexent1 \"%s\" \"%s\" \"", substring [0], rest); break; case 2: printf (".Indexent2 \"%s\" \"%s\" \"", substring [1], rest); break; default: printf (".Indexent%d \"%s\" \"", common, rest); } refptr = reference.next; /* go through the references */ /* This will break if we have no references. Do we care? */ while (refptr) { char *oldref = (char *) refptr; if (refptr->isroman) /* roman number */ { if (refptr->lastpage != refptr->firstpage) /* range */ printf ("%s\\(en%s", toroman (refptr->firstpage), toroman (refptr->lastpage)); else printf ("%s", toroman (refptr->firstpage)); /* only one */ } else { if (refptr->lastpage != refptr->firstpage) /* range */ printf ("%d\\(en%d", refptr->firstpage, refptr->lastpage ); else printf ("%d", refptr->firstpage); /* only one */ } if (refptr->next) /* more to come after this one, */ printf (", "); else printf ("\"\n"); refptr = refptr->next; /* get off the branch */ free (oldref); /* and chop it off */ } reference.next = NULL; /* no refs any more */ } } void insert_ref (char *start) { int pagenumber; int isroman = 0; /* until proven otherwise */ struct ref *lastref = &reference; /* last reference */ struct ref *thisref = reference.next; /* pointer in list of references */ if (isdigit (*start)) /* normal number */ { pagenumber = atoi (start); /* convert it */ while (thisref /* look for a place to lay our egg */ && (thisref->isroman || (thisref->lastpage < pagenumber) ) ) /* move on */ { if (thisref->lastpage == pagenumber - 1) /* we can coalesce */ { thisref->lastpage = pagenumber; /* update last page number */ return; /* and out */ } lastref = thisref; thisref = thisref->next; } } else { isroman = 1; pagenumber = romannumber (start); while (thisref && thisref->isroman /* look for a place to lay our egg */ && (thisref->lastpage < pagenumber) ) /* move on */ { if (thisref->lastpage == pagenumber - 1) /* we can coalesce */ { thisref->lastpage = pagenumber; /* update last page number */ return; /* and out */ } lastref = thisref; thisref = thisref->next; } } lastref->next = (struct ref *) malloc (sizeof (struct ref)); /* allocate a reference */ lastref = lastref->next; /* and point to it */ lastref->next = thisref; lastref->isroman = isroman; lastref->firstpage = pagenumber; lastref->lastpage = pagenumber; } /* * Auxiliary function for collate(). * This is a mess. Our input lines contain a single page number at * the end. That's not part of the sorting order. Fix this by noting * the last character for each that we want to be part of the sort. */ void trimlast (char *c) { char *a = &c [strlen (c) - 1]; /* last char */ while (*a == ' ') /* skip any trailing blanks */ a--; while (*a != ' ') /* skip line number */ a--; while (*a == ' ') /* skip intervening blanks */ a--; a [1] = '\0'; /* chop off at first char after string */ } /* * sort function for mergesort. This does a word-by-word sort which * ignores case and special characters. Case will still prevail if * it's the only difference, since the input stream has been sorted in * the normal ASCII collating sequence, and mergesort maintains order * unless told differently. */ #define BIGINDEX 1024 /* largest index entry */ int collate (const void *a, const void *b) { char ac [BIGINDEX]; char bc [BIGINDEX]; char *first = ac; char *second = bc; char f; /* pointer in first string */ char s; /* and in second */ int casediff = 0; /* set if we have only a case difference */ strcpy (ac, a); strcpy (bc, b); trimlast (ac); /* limit length to what we want to to examine */ trimlast (bc); /* limit length to what we want to to examine */ #if debug printf ("collate '%s' '%s': ", ac, bc); #endif while (1) { while (*first && ! isalpha (*first) && ! isdigit (*first) && (*first != ',') && (*first != '-') && (*first != ' ') ) first++; f = *first; if (islower (f)) f = toupper (f); while (*second && ! isalpha (*second) && ! isdigit (*second) && (*second != ',') && (*second != '-') && (*second != ' ') ) /* skip to letter, digit, space or comma */ second++; s = *second; if (islower (s)) s = toupper (s); if ((f == s) /* same except maybe for case */ && (! casediff) ) casediff = *first - *second; if (f == '\0') /* ran out of first one */ { dprintf ("a shorter\n"); if (s) return -1; /* s wins */ else return casediff; } if (s == '\0') /* ran out of second */ { dprintf ("b shorter\n"); return 1; /* first is larger */ } if (f < s) { if (s == ',') /* comma is special, */ { dprintf ("comma, a > b\n"); return 1; /* pretend to be smaller */ } else { dprintf ("a < b\n"); return -1; } } else if (f > s) { if (f == ',') /* comma is special, */ { dprintf ("comma, a < b\n"); return -1; /* pretend to be smaller */ } else { dprintf ("a > b\n"); return 1; } } /* Same so far. Move on and try again. */ first++; second++; } } int main (int argc, char *argv []) { int kwend; /* copy indices */ int pnstart; int pnend; int recs = 0; /* number of entries */ int i; if (argc > 1) { printf ("%s: this program is a filter, no arguments please\n", argv [0]); exit (1); } while (fgets (inbuf [recs], BIGBUF, stdin)) if (++recs > NRECS) { fprintf (stderr, "Too many records: %d\n", NRECS); exit (1); } /* * The records we read in are already in some sort of order, but we * want to ignore case and leading / and things. */ if (mergesort (inbuf, recs, BIGBUF, &collate)) { fprintf (stderr, "Can't sort entries: %s (%d)\n", strerror (errno), errno); exit (1); } #if debug for (i = 0; i < recs; i++) puts (inbuf [i]); #endif puts (".Index-head"); for (i = 0; i < recs; i++) { if (inbuf [i] [0] != ',') /* don't take broken entries (from man pages) */ { pnend = strlen (inbuf [i]) - 1; /* point to end of entry */ for (pnstart = pnend - 1; /* find blank after keyword */ (inbuf [i] [pnstart] == '\n') || isalnum (inbuf [i] [pnstart]); pnstart--); for (kwend = pnstart; inbuf [i] [kwend] == ' '; kwend--); pnstart++; /* point to page number */ kwend++; /* point past keyword */ if ((kwend != keywordlen) /* keywords are different length */ || memcmp (inbuf [i], keyword, kwend) ) /* or they're different */ { put_line (); /* print previous line */ memcpy (keyword, inbuf [i], kwend); /* save new keyword */ keyword [keywordlen = kwend] = '\0'; /* and delimit it */ } if (kwend > 0) /* more than just the tag? */ insert_ref (&inbuf [i] [pnstart]); /* insert a reference */ } } put_line (); /* show it */ puts (".Indexend"); return 0; }