V7/usr/src/cmd/refer/what4.c

Find at most related files.
including files from this version of Unix.

# include "what..c"
struct wst { char *tx; int ct; } ;
# define NW 5
# define ZIPF 10
# define HASHF 3
# define WLEN 10
# define SAME 0
# define TSIZE HASHF*ZIPF*NW
int HSIZE;
static struct wst word[TSIZE];
static char tbuf[NW*ZIPF*WLEN], *tp tbuf;
# define NF 10

freqwd ( fn, wd, nin )
	char *fn[], *wd[];
{
	FILE *fi[NF];
	int nw 0, i, any, nf, j, wexch(), wcomp();
	char tw[20];
for(HSIZE=TSIZE; !prime(HSIZE); HSIZE--);
for(nf=0; fn[nf] && nf<NF; nf++)
	fi[nf] = fn[nf][0] ? fopen(fn[nf], "r") : NULL;
do {
	any=0;
	for(i=0; i<nf; i++)
		{
		if (fi[i]==NULL) continue;
		if (gw(fi[i], tw)==0)
			{
			fclose(fi[i]);
			fi[i]==NULL;
			continue;
			}
		any=1;
		if (common(tw)) continue;
		if (strlen(tw)<3) continue;
		j = lookup (tw);
		if (j<0 && nw < ZIPF*NW)
			{
			j = -j;
			strcpy (tp, tw);
			word[j].tx = tp;
			while (*tp++);
			_assert (tp < tbuf+NW*ZIPF*WLEN);
			word[j].ct = 1;
			nw++;
			}
		else if (j>0)
			word[j].ct++;
		}
	} while (any>0);
shell ( TSIZE, wcomp, wexch );
for(nw=0; word[nw].ct >0 && nw<TSIZE; nw++)
	if (nw>=nin*2 && word[nw].ct != word[0].ct)
		break;
for(i=0; i<nw; i++)
	wd[i] = word[i].tx;
return(nw);
}

lookup (wt)
	char *wt;
{
int h;
h = hash(wt);
for( h = h%HSIZE; word[h].tx; h = (h+1)%HSIZE)
	{
	if (h==0) continue;
	if (strcmp(wt, word[h].tx) == SAME)
		return (h);
	}
return ( -h );
}

hash (s)
	char *s;
{
int k 0, c 0, i 0;
while ( c = *s++ )
	k ^= (c << (i++%5) );
return (k>0 ? k : -k);
}

gw (f, t)
	char *t;
	FILE *f;
{
int start 1, oldc ' ', c;
if (f==NULL) return (0);
while ( (c=getc(f)) != EOF)
	{
	if (isupper(c)) c= tolower(c);
	if (start==1)
		if (!alphanum(c, oldc))
			continue;
		else
			start=0;
	if (start==0)
		if (alphanum(c, oldc))
			*t++ = c;
		else
			{
			*t=0;
			return(1);
			}
	oldc=c;
	}
return(0);
}

alphanum( c, oldc )
{
if (isalpha(c) || isdigit(c)) return(1);
if (isalpha(oldc))
	if (c== '\'' || c == '-') return(1);
return(0);
}

wcomp (n1, n2)
{
return (word[n1].ct >= word[n2].ct);
}

wexch (n1, n2)
{
struct wst tt;
tt.tx = word[n1].tx; tt.ct = word[n1].ct;
word[n1].tx = word[n2].tx; word[n1].ct = word[n2].ct;
word[n2].tx = tt.tx; word[n2].ct = tt.ct;
}

prime(n)
{
/* only executed once- slow is ok */
int i;
if (n%2==0) return(0);
for(i=3; i*i<=n; i+= 2)
	if (n%i ==0 ) return(0);
return(1);
}
trimnl(s)
	char *s;
{
	while (*s)s++;
	if (*--s=='\n') *s=0;
}


/* this is the test for what4.c as a standalone prog ...
main (argc, argv)
	char *argv[];
{
char *ff[10], *wd[20], **ffp ff;
int n, i;
while (--argc)
	*ffp++ = *++argv;
*ffp=0;
n=freqwd(ff,wd);
for(i=0; i<n; i++)
 printf("%s\n",wd[i]);
printf("total of %d items\n",n);
}
 /* .... */