V7/usr/src/cmd/spell/spell.c

Find at most related files.
including files from this version of Unix.

#include "spell.h"
#define DLEV 2

char	*strcat();
int	strip();
char	*skipv();
int	an();
int	s();
int	es();
int	ily();
int	ncy();
int	CCe();
int	VCe();
int	bility();
int	tion();
int	ize();
int	y_to_e();
int	i_to_y();
int	nop();
int	metry();

struct suftab {
	char *suf;
	int (*p1)();
	int n1;
	char *d1;
	char *a1;
	int (*p2)();
	int n2;
	char *d2;
	char *a2;
} suftab[] = {
	{"ssen",ily,4,"-y+iness","+ness" },
	{"ssel",ily,4,"-y+i+less","+less" },
	{"se",s,1,"","+s",		es,2,"-y+ies","+es" },
	{"s'",s,2,"","+'s"},
	{"s",s,1,"","+s"},
	{"ecn",ncy,1,"","-t+ce"},
	{"ycn",ncy,1,"","-cy+t"},
	{"ytilb",nop,0,"",""},
	{"ytilib",bility,5,"-le+ility",""},
	{"elbaif",i_to_y,4,"-y+iable",""},
	{"elba",CCe,4,"-e+able","+able"},
	{"yti",CCe,3,"-e+ity","+ity"},
	{"ylb",y_to_e,1,"-e+y",""},
	{"yl",ily,2,"-y+ily","+ly"},
	{"laci",strip,2,"","+al"},
	{"latnem",strip,2,"","+al"},
	{"lanoi",strip,2,"","+al"},
	{"tnem",strip,4,"","+ment"},
	{"gni",CCe,3,"-e+ing","+ing"},
	{"reta",nop,0,"",""},
	{"re",strip,1,"","+r",		i_to_y,2,"-y+ier","+er"},
	{"de",strip,1,"","+d",		i_to_y,2,"-y+ied","+ed"},
	{"citsi",strip,2,"","+ic"},
	{"cihparg",i_to_y,1,"-y+ic",""},
	{"tse",strip,2,"","+st",	i_to_y,3,"-y+iest","+est"},
	{"cirtem",i_to_y,1,"-y+ic",""},
	{"yrtem",metry,0,"-ry+er",""},
	{"cigol",i_to_y,1,"-y+ic",""},
	{"tsigol",i_to_y,2,"-y+ist",""},
	{"tsi",VCe,3,"-e+ist","+ist"},
	{"msi",VCe,3,"-e+ism","+ist"},
	{"noitacif",i_to_y,6,"-y+ication",""},
	{"noitazi",ize,5,"-e+ation",""},
	{"rota",tion,2,"-e+or",""},
	{"noit",tion,3,"-e+ion","+ion"},
	{"naino",an,3,"","+ian"},
	{"na",an,1,"","+n"},
	{"evit",tion,3,"-e+ive","+ive"},
	{"ezi",CCe,3,"-e+ize","+ize"},
	{"pihs",strip,4,"","+ship"},
	{"dooh",ily,4,"-y+ihood","+hood"},
	{"luf",ily,3,"-y+iful","+ful"},
	{"ekil",strip,4,"","+like"},
	0
};

char *preftab[] = {
	"anti",
	"bio",
	"dis",
	"electro",
	"en",
	"fore",
	"hyper",
	"intra",
	"inter",
	"iso",
	"kilo",
	"magneto",
	"meta",
	"micro",
	"milli",
	"mis",
	"mono",
	"multi",
	"non",
	"out",
	"over",
	"photo",
	"poly",
	"pre",
	"pseudo",
	"re",
	"semi",
	"stereo",
	"sub",
	"super",
	"thermo",
	"ultra",
	"under",	/*must precede un*/
	"un",
	0
};

int vflag;
int xflag;
char word[100];
char original[100];
char *deriv[40];
char affix[40];

main(argc,argv)
char **argv;
{
	register char *ep, *cp;
	register char *dp;
	int fold;
	int j;
	FILE *file, *found;
	if(!prime(argc,argv)) {
		fprintf(stderr,
		    "spell: cannot initialize hash table\n");
		exit(1);
	}
	found = fopen(argv[2],"w");
	for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++)
		switch(argv[0][1]) {
		case 'b':
			ise();
			break;
		case 'v':
			vflag++;
			break;
		case 'x':
			xflag++;
			break;
		}
	for(;; fprintf(file,"%s%s\n",affix,original)) {
		affix[0] = 0;
		file = found;
		for(ep=word;(*ep=j=getchar())!='\n';ep++)
			if(j == EOF)
				exit(0);
		for(cp=word,dp=original; cp<ep; )
			*dp++ = *cp++;
		*dp = 0;
		fold = 0;
		for(cp=word;cp<ep;cp++)
			if(islower(*cp))
				goto lcase;
		if(putsuf(ep,".",0))
			continue;
		++fold;
		for(cp=original+1,dp=word+1;dp<ep;dp++,cp++)
			*dp = Tolower(*cp);
lcase:
		if(putsuf(ep,".",0)||suffix(ep,0))
			continue;
		if(isupper(word[0])) {
			for(cp=original,dp=word; *dp = *cp++; dp++)
				if (fold) *dp = Tolower(*dp);
			word[0] = Tolower(word[0]);
			goto lcase;
		}
		file = stdout;
	}
}

suffix(ep,lev)
char *ep;
{
	register struct suftab *t;
	register char *cp, *sp;
	lev += DLEV;
	deriv[lev] = deriv[lev-1] = 0;
	for(t= &suftab[0];sp=t->suf;t++) {
		cp = ep;
		while(*sp)
			if(*--cp!=*sp++)
				goto next;
		for(sp=cp; --sp>=word&&!vowel(*sp); ) ;
		if(sp<word)
			return(0);
		if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1))
			return(1);
		if(t->p2!=0) {
			deriv[lev] = deriv[lev+1] = 0;
			return((*t->p2)(ep-t->n2,t->d2,t->a2,lev));
		}
		return(0);
next:		;
	}
	return(0);
}

nop()
{
	return(0);
}

strip(ep,d,a,lev)
char *ep,*d,*a;
{
	return(putsuf(ep,a,lev)||suffix(ep,lev));
}

s(ep,d,a,lev)
char *ep,*d,*a;
{
	if(lev>DLEV+1)
		return(0);
	if(*ep=='s'&&ep[-1]=='s')
		return(0);
	return(strip(ep,d,a,lev));
}

an(ep,d,a,lev)
char *ep,*d,*a;
{
	if(!isupper(*word))	/*must be proper name*/
		return(0);
	return(putsuf(ep,a,lev));
}

ize(ep,d,a,lev)
char *ep,*d,*a;
{
	*ep++ = 'e';
	return(strip(ep,"",d,lev));
}

y_to_e(ep,d,a,lev)
char *ep,*d,*a;
{
	*ep++ = 'e';
	return(strip(ep,"",d,lev));
}

ily(ep,d,a,lev)
char *ep,*d,*a;
{
	if(ep[-1]=='i')
		return(i_to_y(ep,d,a,lev));
	else
		return(strip(ep,d,a,lev));
}

ncy(ep,d,a,lev)
char *ep, *d, *a;
{
	if(skipv(skipv(ep-1))<word)
		return(0);
	ep[-1] = 't';
	return(strip(ep,d,a,lev));
}

bility(ep,d,a,lev)
char *ep,*d,*a;
{
	*ep++ = 'l';
	return(y_to_e(ep,d,a,lev));
}

i_to_y(ep,d,a,lev)
char *ep,*d,*a;
{
	if(ep[-1]=='i') {
		ep[-1] = 'y';
		a = d;
	}
	return(strip(ep,"",a,lev));
}

es(ep,d,a,lev)
char *ep,*d,*a;
{
	if(lev>DLEV)
		return(0);
	switch(ep[-1]) {
	default:
		return(0);
	case 'i':
		return(i_to_y(ep,d,a,lev));
	case 's':
	case 'h':
	case 'z':
	case 'x':
		return(strip(ep,d,a,lev));
	}
}

metry(ep,d,a,lev)
char *ep, *d,*a;
{
	ep[-2] = 'e';
	ep[-1] = 'r';
	return(strip(ep,d,a,lev));
}

tion(ep,d,a,lev)
char *ep,*d,*a;
{
	switch(ep[-2]) {
	case 'c':
	case 'r':
		return(putsuf(ep,a,lev));
	case 'a':
		return(y_to_e(ep,d,a,lev));
	}
	return(0);
}

/*	possible consonant-consonant-e ending*/
CCe(ep,d,a,lev)
char *ep,*d,*a;
{
	switch(ep[-1]) {
	case 'l':
		if(vowel(ep[-2]))
			break;
		switch(ep[-2]) {
		case 'l':
		case 'r':
		case 'w':
			break;
		default:
			return(y_to_e(ep,d,a,lev));
		}
		break;
	case 's':
		if(ep[-2]=='s')
			break;
	case 'c':
	case 'g':
		if(*ep=='a')
			return(0);
	case 'v':
	case 'z':
		if(vowel(ep[-2]))
			break;
	case 'u':
		if(y_to_e(ep,d,a,lev))
			return(1);
		if(!(ep[-2]=='n'&&ep[-1]=='g'))
			return(0);
	}
	return(VCe(ep,d,a,lev));
}

/*	possible consonant-vowel-consonant-e ending*/
VCe(ep,d,a,lev)
char *ep,*d,*a;
{
	char c;
	c = ep[-1];
	if(c=='e')
		return(0);
	if(!vowel(c) && vowel(ep[-2])) {
		c = *ep;
		*ep++ = 'e';
		if(putsuf(ep,d,lev)||suffix(ep,lev))
			return(1);
		ep--;
		*ep = c;
	}
	return(strip(ep,d,a,lev));
}

char *lookuppref(wp,ep)
char **wp;
char *ep;
{
	register char **sp;
	register char *bp,*cp;
	for(sp=preftab;*sp;sp++) {
		bp = *wp;
		for(cp= *sp;*cp;cp++,bp++)
			if(Tolower(*bp)!=*cp)
				goto next;
		for(cp=bp;cp<ep;cp++) 
			if(vowel(*cp)) {
				*wp = bp;
				return(*sp);
			}
next:	;
	}
	return(0);
}

putsuf(ep,a,lev)
char *ep,*a;
{
	register char *cp;
	char *bp;
	register char *pp;
	int val = 0;
	char space[20];
	deriv[lev] = a;
	if(putw(word,ep,lev))
		return(1);
	bp = word;
	pp = space;
	deriv[lev+1] = pp;
	while(cp=lookuppref(&bp,ep)) {
		*pp++ = '+';
		while(*pp = *cp++)
			pp++;
		if(putw(bp,ep,lev+1)) {
			val = 1;
			break;
		}
	}
	deriv[lev+1] = deriv[lev+2] = 0;
	return(val);
}

putw(bp,ep,lev)
char *bp,*ep;
{
	register i, j;
	char duple[3];
	if(ep-bp<=1)
		return(0);
	if(vowel(*ep)) {
		if(monosyl(bp,ep))
			return(0);
	}
	i = dict(bp,ep);
	if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) {
		ep--;
		deriv[++lev] = duple;
		duple[0] = '+';
		duple[1] = *ep;
		duple[2] = 0;
		i = dict(bp,ep);
	}
	if(vflag==0||i==0)
		return(i);
	j = lev;
	do {
		if(deriv[j])
			strcat(affix,deriv[j]);
	} while(--j>0);
	strcat(affix,"\t");
	return(i);
}


monosyl(bp,ep)
char *bp, *ep;
{
	if(ep<bp+2)
		return(0);
	if(vowel(*--ep)||!vowel(*--ep)
		||ep[1]=='x'||ep[1]=='w')
		return(0);
	while(--ep>=bp)
		if(vowel(*ep))
			return(0);
	return(1);
}

char *
skipv(s)
char *s;
{
	if(s>=word&&vowel(*s))
		s--;
	while(s>=word&&!vowel(*s))
		s--;
	return(s);
}

vowel(c)
{
	switch(Tolower(c)) {
	case 'a':
	case 'e':
	case 'i':
	case 'o':
	case 'u':
	case 'y':
		return(1);
	}
	return(0);
}

/* crummy way to Britishise */
ise()
{
	register struct suftab *p;
	for(p = suftab;p->suf;p++) {
		ztos(p->suf);
		ztos(p->d1);
		ztos(p->a1);
	}
}
ztos(s)
char *s;
{
	for(;*s;s++)
		if(*s=='z')
			*s = 's';
}

dict(bp,ep)
char *bp, *ep;
{
	register char *wp;
	long h;
	register long *lp;
	register i;
	if(xflag)
		printf("=%.*s\n",ep-bp,bp);
	for(i=0; i<NP; i++) {
		for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp)
			h += *wp * *lp;
		h += '\n' * *lp;
		h %= p[i];
		if(get(h)==0)
			return(0);
	}
	return(1);
}