V7/usr/src/cmd/spell/spell.c
#include "spell.h"
#define DLEV 2
char *strcat();
int strip();
char *skipv();
int an();
int s();
int es();
int ily();
int ncy();
int CCe();
int VCe();
int bility();
int tion();
int ize();
int y_to_e();
int i_to_y();
int nop();
int metry();
struct suftab {
char *suf;
int (*p1)();
int n1;
char *d1;
char *a1;
int (*p2)();
int n2;
char *d2;
char *a2;
} suftab[] = {
{"ssen",ily,4,"-y+iness","+ness" },
{"ssel",ily,4,"-y+i+less","+less" },
{"se",s,1,"","+s", es,2,"-y+ies","+es" },
{"s'",s,2,"","+'s"},
{"s",s,1,"","+s"},
{"ecn",ncy,1,"","-t+ce"},
{"ycn",ncy,1,"","-cy+t"},
{"ytilb",nop,0,"",""},
{"ytilib",bility,5,"-le+ility",""},
{"elbaif",i_to_y,4,"-y+iable",""},
{"elba",CCe,4,"-e+able","+able"},
{"yti",CCe,3,"-e+ity","+ity"},
{"ylb",y_to_e,1,"-e+y",""},
{"yl",ily,2,"-y+ily","+ly"},
{"laci",strip,2,"","+al"},
{"latnem",strip,2,"","+al"},
{"lanoi",strip,2,"","+al"},
{"tnem",strip,4,"","+ment"},
{"gni",CCe,3,"-e+ing","+ing"},
{"reta",nop,0,"",""},
{"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"},
{"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"},
{"citsi",strip,2,"","+ic"},
{"cihparg",i_to_y,1,"-y+ic",""},
{"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"},
{"cirtem",i_to_y,1,"-y+ic",""},
{"yrtem",metry,0,"-ry+er",""},
{"cigol",i_to_y,1,"-y+ic",""},
{"tsigol",i_to_y,2,"-y+ist",""},
{"tsi",VCe,3,"-e+ist","+ist"},
{"msi",VCe,3,"-e+ism","+ist"},
{"noitacif",i_to_y,6,"-y+ication",""},
{"noitazi",ize,5,"-e+ation",""},
{"rota",tion,2,"-e+or",""},
{"noit",tion,3,"-e+ion","+ion"},
{"naino",an,3,"","+ian"},
{"na",an,1,"","+n"},
{"evit",tion,3,"-e+ive","+ive"},
{"ezi",CCe,3,"-e+ize","+ize"},
{"pihs",strip,4,"","+ship"},
{"dooh",ily,4,"-y+ihood","+hood"},
{"luf",ily,3,"-y+iful","+ful"},
{"ekil",strip,4,"","+like"},
0
};
char *preftab[] = {
"anti",
"bio",
"dis",
"electro",
"en",
"fore",
"hyper",
"intra",
"inter",
"iso",
"kilo",
"magneto",
"meta",
"micro",
"milli",
"mis",
"mono",
"multi",
"non",
"out",
"over",
"photo",
"poly",
"pre",
"pseudo",
"re",
"semi",
"stereo",
"sub",
"super",
"thermo",
"ultra",
"under", /*must precede un*/
"un",
0
};
int vflag;
int xflag;
char word[100];
char original[100];
char *deriv[40];
char affix[40];
main(argc,argv)
char **argv;
{
register char *ep, *cp;
register char *dp;
int fold;
int j;
FILE *file, *found;
if(!prime(argc,argv)) {
fprintf(stderr,
"spell: cannot initialize hash table\n");
exit(1);
}
found = fopen(argv[2],"w");
for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++)
switch(argv[0][1]) {
case 'b':
ise();
break;
case 'v':
vflag++;
break;
case 'x':
xflag++;
break;
}
for(;; fprintf(file,"%s%s\n",affix,original)) {
affix[0] = 0;
file = found;
for(ep=word;(*ep=j=getchar())!='\n';ep++)
if(j == EOF)
exit(0);
for(cp=word,dp=original; cp<ep; )
*dp++ = *cp++;
*dp = 0;
fold = 0;
for(cp=word;cp<ep;cp++)
if(islower(*cp))
goto lcase;
if(putsuf(ep,".",0))
continue;
++fold;
for(cp=original+1,dp=word+1;dp<ep;dp++,cp++)
*dp = Tolower(*cp);
lcase:
if(putsuf(ep,".",0)||suffix(ep,0))
continue;
if(isupper(word[0])) {
for(cp=original,dp=word; *dp = *cp++; dp++)
if (fold) *dp = Tolower(*dp);
word[0] = Tolower(word[0]);
goto lcase;
}
file = stdout;
}
}
suffix(ep,lev)
char *ep;
{
register struct suftab *t;
register char *cp, *sp;
lev += DLEV;
deriv[lev] = deriv[lev-1] = 0;
for(t= &suftab[0];sp=t->suf;t++) {
cp = ep;
while(*sp)
if(*--cp!=*sp++)
goto next;
for(sp=cp; --sp>=word&&!vowel(*sp); ) ;
if(sp<word)
return(0);
if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1))
return(1);
if(t->p2!=0) {
deriv[lev] = deriv[lev+1] = 0;
return((*t->p2)(ep-t->n2,t->d2,t->a2,lev));
}
return(0);
next: ;
}
return(0);
}
nop()
{
return(0);
}
strip(ep,d,a,lev)
char *ep,*d,*a;
{
return(putsuf(ep,a,lev)||suffix(ep,lev));
}
s(ep,d,a,lev)
char *ep,*d,*a;
{
if(lev>DLEV+1)
return(0);
if(*ep=='s'&&ep[-1]=='s')
return(0);
return(strip(ep,d,a,lev));
}
an(ep,d,a,lev)
char *ep,*d,*a;
{
if(!isupper(*word)) /*must be proper name*/
return(0);
return(putsuf(ep,a,lev));
}
ize(ep,d,a,lev)
char *ep,*d,*a;
{
*ep++ = 'e';
return(strip(ep,"",d,lev));
}
y_to_e(ep,d,a,lev)
char *ep,*d,*a;
{
*ep++ = 'e';
return(strip(ep,"",d,lev));
}
ily(ep,d,a,lev)
char *ep,*d,*a;
{
if(ep[-1]=='i')
return(i_to_y(ep,d,a,lev));
else
return(strip(ep,d,a,lev));
}
ncy(ep,d,a,lev)
char *ep, *d, *a;
{
if(skipv(skipv(ep-1))<word)
return(0);
ep[-1] = 't';
return(strip(ep,d,a,lev));
}
bility(ep,d,a,lev)
char *ep,*d,*a;
{
*ep++ = 'l';
return(y_to_e(ep,d,a,lev));
}
i_to_y(ep,d,a,lev)
char *ep,*d,*a;
{
if(ep[-1]=='i') {
ep[-1] = 'y';
a = d;
}
return(strip(ep,"",a,lev));
}
es(ep,d,a,lev)
char *ep,*d,*a;
{
if(lev>DLEV)
return(0);
switch(ep[-1]) {
default:
return(0);
case 'i':
return(i_to_y(ep,d,a,lev));
case 's':
case 'h':
case 'z':
case 'x':
return(strip(ep,d,a,lev));
}
}
metry(ep,d,a,lev)
char *ep, *d,*a;
{
ep[-2] = 'e';
ep[-1] = 'r';
return(strip(ep,d,a,lev));
}
tion(ep,d,a,lev)
char *ep,*d,*a;
{
switch(ep[-2]) {
case 'c':
case 'r':
return(putsuf(ep,a,lev));
case 'a':
return(y_to_e(ep,d,a,lev));
}
return(0);
}
/* possible consonant-consonant-e ending*/
CCe(ep,d,a,lev)
char *ep,*d,*a;
{
switch(ep[-1]) {
case 'l':
if(vowel(ep[-2]))
break;
switch(ep[-2]) {
case 'l':
case 'r':
case 'w':
break;
default:
return(y_to_e(ep,d,a,lev));
}
break;
case 's':
if(ep[-2]=='s')
break;
case 'c':
case 'g':
if(*ep=='a')
return(0);
case 'v':
case 'z':
if(vowel(ep[-2]))
break;
case 'u':
if(y_to_e(ep,d,a,lev))
return(1);
if(!(ep[-2]=='n'&&ep[-1]=='g'))
return(0);
}
return(VCe(ep,d,a,lev));
}
/* possible consonant-vowel-consonant-e ending*/
VCe(ep,d,a,lev)
char *ep,*d,*a;
{
char c;
c = ep[-1];
if(c=='e')
return(0);
if(!vowel(c) && vowel(ep[-2])) {
c = *ep;
*ep++ = 'e';
if(putsuf(ep,d,lev)||suffix(ep,lev))
return(1);
ep--;
*ep = c;
}
return(strip(ep,d,a,lev));
}
char *lookuppref(wp,ep)
char **wp;
char *ep;
{
register char **sp;
register char *bp,*cp;
for(sp=preftab;*sp;sp++) {
bp = *wp;
for(cp= *sp;*cp;cp++,bp++)
if(Tolower(*bp)!=*cp)
goto next;
for(cp=bp;cp<ep;cp++)
if(vowel(*cp)) {
*wp = bp;
return(*sp);
}
next: ;
}
return(0);
}
putsuf(ep,a,lev)
char *ep,*a;
{
register char *cp;
char *bp;
register char *pp;
int val = 0;
char space[20];
deriv[lev] = a;
if(putw(word,ep,lev))
return(1);
bp = word;
pp = space;
deriv[lev+1] = pp;
while(cp=lookuppref(&bp,ep)) {
*pp++ = '+';
while(*pp = *cp++)
pp++;
if(putw(bp,ep,lev+1)) {
val = 1;
break;
}
}
deriv[lev+1] = deriv[lev+2] = 0;
return(val);
}
putw(bp,ep,lev)
char *bp,*ep;
{
register i, j;
char duple[3];
if(ep-bp<=1)
return(0);
if(vowel(*ep)) {
if(monosyl(bp,ep))
return(0);
}
i = dict(bp,ep);
if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) {
ep--;
deriv[++lev] = duple;
duple[0] = '+';
duple[1] = *ep;
duple[2] = 0;
i = dict(bp,ep);
}
if(vflag==0||i==0)
return(i);
j = lev;
do {
if(deriv[j])
strcat(affix,deriv[j]);
} while(--j>0);
strcat(affix,"\t");
return(i);
}
monosyl(bp,ep)
char *bp, *ep;
{
if(ep<bp+2)
return(0);
if(vowel(*--ep)||!vowel(*--ep)
||ep[1]=='x'||ep[1]=='w')
return(0);
while(--ep>=bp)
if(vowel(*ep))
return(0);
return(1);
}
char *
skipv(s)
char *s;
{
if(s>=word&&vowel(*s))
s--;
while(s>=word&&!vowel(*s))
s--;
return(s);
}
vowel(c)
{
switch(Tolower(c)) {
case 'a':
case 'e':
case 'i':
case 'o':
case 'u':
case 'y':
return(1);
}
return(0);
}
/* crummy way to Britishise */
ise()
{
register struct suftab *p;
for(p = suftab;p->suf;p++) {
ztos(p->suf);
ztos(p->d1);
ztos(p->a1);
}
}
ztos(s)
char *s;
{
for(;*s;s++)
if(*s=='z')
*s = 's';
}
dict(bp,ep)
char *bp, *ep;
{
register char *wp;
long h;
register long *lp;
register i;
if(xflag)
printf("=%.*s\n",ep-bp,bp);
for(i=0; i<NP; i++) {
for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp)
h += *wp * *lp;
h += '\n' * *lp;
h %= p[i];
if(get(h)==0)
return(0);
}
return(1);
}