V7/usr/src/cmd/c/c00.c
#
/* C compiler
*
*
*
* Called from cc:
* c0 source temp1 temp2 [ profileflag ]
* temp1 gets most of the intermediate code;
* strings are put on temp2, which c1 reads after temp1.
*/
#include "c0.h"
int isn 1;
int peeksym -1;
int line 1;
struct tnode funcblk { NAME, 0, NULL, NULL, NULL, NULL };
struct kwtab {
char *kwname;
int kwval;
} kwtab[]
{
"int", INT,
"char", CHAR,
"float", FLOAT,
"double", DOUBLE,
"struct", STRUCT,
"long", LONG,
"unsigned", UNSIGN,
"union", UNION,
"short", INT,
"auto", AUTO,
"extern", EXTERN,
"static", STATIC,
"register", REG,
"goto", GOTO,
"return", RETURN,
"if", IF,
"while", WHILE,
"else", ELSE,
"switch", SWITCH,
"case", CASE,
"break", BREAK,
"continue", CONTIN,
"do", DO,
"default", DEFAULT,
"for", FOR,
"sizeof", SIZEOF,
"typedef", TYPEDEF,
"enum", ENUM,
0, 0,
};
main(argc, argv)
char *argv[];
{
register char *sp;
register i;
register struct kwtab *ip;
if(argc<4) {
error("Arg count");
exit(1);
}
if (freopen(argv[1], "r", stdin)==NULL) {
error("Can't find %s", argv[1]);
exit(1);
}
if (freopen(argv[2], "w", stdout)==NULL || (sbufp=fopen(argv[3],"w"))==NULL) {
error("Can't create temp");
exit(1);
}
setbuf(sbufp, sbuf);
if (argc>4)
proflg++;
/*
* The hash table locations of the keywords
* are marked; if an identifier hashes to one of
* these locations, it is looked up in in the keyword
* table first.
*/
for (ip=kwtab; (sp = ip->kwname); ip++) {
i = 0;
while (*sp)
i =+ *sp++;
hshtab[i%HSHSIZ].hflag = FKEYW;
}
coremax = funcbase = curbase = sbrk(0);
while(!eof)
extdef();
outcode("B", EOFC);
strflg++;
outcode("B", EOFC);
exit(nerror!=0);
}
/*
* Look up the identifier in symbuf in the symbol table.
* If it hashes to the same spot as a keyword, try the keyword table
* first. An initial "." is ignored in the hash.
* Return is a ptr to the symbol table entry.
*/
lookup()
{
int ihash;
register struct hshtab *rp;
register char *sp, *np;
ihash = 0;
sp = symbuf;
while (sp<symbuf+NCPS)
ihash =+ *sp++&0177;
rp = &hshtab[ihash%HSHSIZ];
if (rp->hflag&FKEYW)
if (findkw())
return(KEYW);
while (*(np = rp->name)) {
for (sp=symbuf; sp<symbuf+NCPS;)
if (*np++ != *sp++)
goto no;
if (mossym != (rp->hflag&FMOS))
goto no;
csym = rp;
return(NAME);
no:
if (++rp >= &hshtab[HSHSIZ])
rp = hshtab;
}
if(++hshused >= HSHSIZ) {
error("Symbol table overflow");
exit(1);
}
rp->hclass = 0;
rp->htype = 0;
rp->hoffset = 0;
rp->subsp = NULL;
rp->strp = NULL;
rp->hpdown = NULL;
rp->hblklev = blklev;
rp->hflag =| mossym;
sp = symbuf;
for (np=rp->name; sp<symbuf+NCPS;)
*np++ = *sp++;
csym = rp;
return(NAME);
}
/*
* Search the keyword table.
* Ignore initial "." to avoid member-of-structure
* problems.
*/
findkw()
{
register struct kwtab *kp;
register char *p1, *p2;
char *wp;
int firstc;
wp = symbuf;
firstc = *wp;
for (kp=kwtab; (p2 = kp->kwname); kp++) {
p1 = wp;
while (*p1 == *p2++)
if (*p1++ == '\0') {
cval = kp->kwval;
return(1);
}
}
*wp = firstc;
return(0);
}
/*
* Return the next symbol from the input.
* peeksym is a pushed-back symbol, peekc is a pushed-back
* character (after peeksym).
* mosflg means that the next symbol, if an identifier,
* is a member of structure or a structure tag, and it
* gets a "." prepended to it to distinguish
* it from other identifiers.
*/
symbol() {
register c;
register char *sp;
register tline;
if (peeksym>=0) {
c = peeksym;
peeksym = -1;
if (c==NAME)
mosflg = 0;
return(c);
}
if (peekc) {
c = peekc;
peekc = 0;
} else
if (eof)
return(EOFC);
else
c = getchar();
loop:
if (c==EOF) {
eof++;
return(EOFC);
}
switch(ctab[c]) {
case SHARP:
if ((c=symbol())!=CON) {
error("Illegal #");
return(c);
}
tline = cval;
while (ctab[peekc]==SPACE)
peekc = getchar();
if (peekc=='"') {
sp = filename;
while ((c = mapch('"')) >= 0)
*sp++ = c;
*sp++ = 0;
peekc = getchar();
}
if (peekc != '\n') {
error("Illegal #");
while (getchar()!='\n' && eof==0)
;
}
peekc = 0;
line = tline;
return(symbol());
case INSERT: /* ignore newlines */
inhdr = 1;
c = getchar();
goto loop;
case NEWLN:
if (!inhdr)
line++;
inhdr = 0;
case SPACE:
c = getchar();
goto loop;
case PLUS:
return(subseq(c,PLUS,INCBEF));
case MINUS:
return(subseq(c,subseq('>',MINUS,ARROW),DECBEF));
case ASSIGN:
c = spnextchar();
peekc = 0;
if (c=='=')
return(EQUAL);
if (c==' ')
return(ASSIGN);
if (c=='<' || c=='>') {
if (spnextchar() != c) {
peeksym = ctab[c];
return(ASSIGN);
}
peekc = 0;
return(c=='<'? ASLSH: ASRSH);
}
if (ctab[c]>=PLUS && ctab[c]<=EXOR) {
if (spnextchar() != ' '
&& (c=='-' || c=='&' || c=='*')) {
error("Warning: %c= operator assumed", c);
nerror--;
}
c = ctab[c];
return(c+ASPLUS-PLUS);
}
peekc = c;
return(ASSIGN);
case LESS:
if (subseq(c,0,1)) return(LSHIFT);
return(subseq('=',LESS,LESSEQ));
case GREAT:
if (subseq(c,0,1)) return(RSHIFT);
return(subseq('=',GREAT,GREATEQ));
case EXCLA:
return(subseq('=',EXCLA,NEQUAL));
case BSLASH:
if (subseq('/', 0, 1))
return(MAX);
goto unkn;
case DIVIDE:
if (subseq('\\', 0, 1))
return(MIN);
if (subseq('*',1,0))
return(DIVIDE);
while ((c = spnextchar()) != EOFC) {
peekc = 0;
if (c=='*') {
if (spnextchar() == '/') {
peekc = 0;
c = getchar();
goto loop;
}
}
}
eof++;
error("Nonterminated comment");
return(0);
case PERIOD:
case DIGIT:
peekc = c;
return(getnum());
case DQUOTE:
cval = isn++;
return(STRING);
case SQUOTE:
return(getcc());
case LETTER:
sp = symbuf;
while(ctab[c]==LETTER || ctab[c]==DIGIT) {
if (sp<symbuf+NCPS)
*sp++ = c;
c = getchar();
}
while(sp<symbuf+NCPS)
*sp++ = '\0';
mossym = 0;
if (mosflg) {
mossym = FMOS;
mosflg = 0;
}
peekc = c;
if ((c=lookup())==KEYW && cval==SIZEOF)
c = SIZEOF;
return(c);
case AND:
return(subseq('&', AND, LOGAND));
case OR:
return(subseq('|', OR, LOGOR));
case UNKN:
unkn:
error("Unknown character");
c = getchar();
goto loop;
}
return(ctab[c]);
}
/*
* Read a number. Return kind.
*/
getnum()
{
register char *np;
register c, base;
int expseen, sym, ndigit;
char *nsyn;
int maxdigit;
nsyn = "Number syntax";
lcval = 0;
base = 10;
maxdigit = 0;
np = numbuf;
ndigit = 0;
sym = CON;
expseen = 0;
if ((c=spnextchar()) == '0')
base = 8;
for (;; c = getchar()) {
*np++ = c;
if (ctab[c]==DIGIT || (base==16) && ('a'<=c&&c<='f'||'A'<=c&&c<='F')) {
if (base==8)
lcval =<< 3;
else if (base==10)
lcval = ((lcval<<2) + lcval)<<1;
else
lcval =<< 4;
if (ctab[c]==DIGIT)
c =- '0';
else if (c>='a')
c =- 'a'-10;
else
c =- 'A'-10;
lcval =+ c;
ndigit++;
if (c>maxdigit)
maxdigit = c;
continue;
}
if (c=='.') {
if (base==16 || sym==FCON)
error(nsyn);
sym = FCON;
base = 10;
continue;
}
if (ndigit==0) {
sym = DOT;
break;
}
if ((c=='e'||c=='E') && expseen==0) {
expseen++;
sym = FCON;
if (base==16 || maxdigit>=10)
error(nsyn);
base = 10;
*np++ = c = getchar();
if (c!='+' && c!='-' && ctab[c]!=DIGIT)
break;
} else if (c=='x' || c=='X') {
if (base!=8 || lcval!=0 || sym!=CON)
error(nsyn);
base = 16;
} else if ((c=='l' || c=='L') && sym==CON) {
c = getchar();
sym = LCON;
break;
} else
break;
}
peekc = c;
if (maxdigit >= base)
error(nsyn);
if (sym==FCON) {
np[-1] = 0;
cval = np-numbuf;
return(FCON);
}
if (sym==CON && (lcval<0 || lcval>MAXINT&&base==10 || (lcval>>1)>MAXINT)) {
sym = LCON;
}
cval = lcval;
return(sym);
}
/*
* If the next input character is c, return b and advance.
* Otherwise push back the character and return a.
*/
subseq(c,a,b)
{
if (spnextchar() != c)
return(a);
peekc = 0;
return(b);
}
/*
* Write out a string, either in-line
* or in the string temp file labelled by
* lab.
*/
putstr(lab, amax)
{
register int c, max;
nchstr = 0;
max = amax;
if (lab) {
strflg++;
outcode("BNB", LABEL, lab, BDATA);
max = 10000;
} else
outcode("B", BDATA);
while ((c = mapch('"')) >= 0) {
if (nchstr < max) {
nchstr++;
if (nchstr%15 == 0)
outcode("0B", BDATA);
outcode("1N", c & 0377);
}
}
if (nchstr < max) {
nchstr++;
outcode("10");
}
outcode("0");
strflg = 0;
}
/*
* read a single-quoted character constant.
* The routine is sensitive to the layout of
* characters in a word.
*/
getcc()
{
register int c, cc;
register char *ccp;
char realc;
cval = 0;
ccp = &cval;
cc = 0;
while((c=mapch('\'')) >= 0)
if(cc++ < LNCPW)
*ccp++ = c;
if (cc>LNCPW)
error("Long character constant");
if (cc==1) {
realc = cval;
cval = realc;
}
return(CON);
}
/*
* Read a character in a string or character constant,
* detecting the end of the string.
* It implements the escape sequences.
*/
mapch(ac)
{
register int a, c, n;
static mpeek;
c = ac;
if (a = mpeek)
mpeek = 0;
else
a = getchar();
loop:
if (a==c)
return(-1);
switch(a) {
case '\n':
case '\0':
error("Nonterminated string");
peekc = a;
return(-1);
case '\\':
switch (a=getchar()) {
case 't':
return('\t');
case 'n':
return('\n');
case 'b':
return('\b');
case 'f':
return('\014');
case 'v':
return('\013');
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
n = 0;
c = 0;
while (++c<=3 && '0'<=a && a<='7') {
n =<< 3;
n =+ a-'0';
a = getchar();
}
mpeek = a;
return(n);
case 'r':
return('\r');
case '\n':
if (!inhdr)
line++;
inhdr = 0;
a = getchar();
goto loop;
}
}
return(a);
}
/*
* Read an expression and return a pointer to its tree.
* It's the classical bottom-up, priority-driven scheme.
* The initflg prevents the parse from going past
* "," or ":" because those delimiters are special
* in initializer (and some other) expressions.
*/
struct tnode *
tree()
{
int *op, opst[SSIZE], *pp, prst[SSIZE];
register int andflg, o;
register struct hshtab *cs;
int p, ps, os;
struct tnode *cmst[CMSIZ];
struct lnode *lcp;
curbase = funcbase;
op = opst;
pp = prst;
cp = cmst;
*op = SEOF;
*pp = 06;
andflg = 0;
advanc:
switch (o=symbol()) {
case NAME:
cs = csym;
if (cs->hclass==TYPEDEF)
goto atype;
if (cs->hclass==ENUMCON) {
*cp++ = cblock(cs->hoffset);
goto tand;
}
if (cs->hclass==0 && cs->htype==0)
if(nextchar()=='(') {
/* set function */
cs->hclass = EXTERN;
cs->htype = FUNC;
} else {
cs->hclass = STATIC;
error("%.8s undefined; func. %.8s", cs->name, funcsym->name);
if (initflg) {
cs->hclass = EXTERN;
error("(Warning only)");
nerror =- 2;
}
}
*cp++ = nblock(cs);
goto tand;
case FCON:
*cp++ = fblock(DOUBLE, copnum(cval));
goto tand;
case LCON:
cs = gblock(sizeof(*lcp));
cs->op = LCON;
cs->type = LONG;
cs->lvalue = lcval;
*cp++ = cs;
goto tand;
case CON:
*cp++ = cblock(cval);
goto tand;
/* fake a static char array */
case STRING:
putstr(cval, 0);
cs = gblock(sizeof(*cs));
cs->hclass = STATIC;
cs->hoffset = cval;
*cp++ = block(NAME, ARRAY+CHAR, &nchstr, NULL, cs);
tand:
if(cp>=cmst+CMSIZ) {
error("Expression overflow");
exit(1);
}
if (andflg)
goto syntax;
andflg = 1;
goto advanc;
case KEYW:
atype:
if (*op != LPARN || andflg)
goto syntax;
peeksym = o;
*cp++ = xprtype(gblock(sizeof(*xprtype())));
if ((o=symbol()) != RPARN)
goto syntax;
o = CAST;
--op;
--pp;
if (*op == SIZEOF) {
andflg = 1;
*pp = 100;
goto advanc;
}
goto oponst;
case INCBEF:
case DECBEF:
if (andflg)
o =+ 2;
goto oponst;
case COMPL:
case EXCLA:
case SIZEOF:
if (andflg)
goto syntax;
goto oponst;
case MINUS:
if (!andflg)
o = NEG;
andflg = 0;
goto oponst;
case AND:
case TIMES:
if (andflg)
andflg = 0;
else if (o==AND)
o = AMPER;
else
o = STAR;
goto oponst;
case LPARN:
if (andflg) {
o = symbol();
if (o==RPARN)
o = MCALL;
else {
peeksym = o;
o = CALL;
andflg = 0;
}
}
goto oponst;
case RBRACK:
case RPARN:
if (!andflg)
goto syntax;
goto oponst;
case DOT:
case ARROW:
mosflg++;
break;
case ASSIGN:
if (andflg==0 && PLUS<=*op && *op<=EXOR) {
o = *op-- + ASPLUS - PLUS;
pp--;
goto oponst;
}
break;
}
/* binaries */
if (andflg==0)
goto syntax;
andflg = 0;
oponst:
p = (opdope[o]>>9) & 077;
opon1:
ps = *pp;
if (p>ps || p==ps && (opdope[o]&RASSOC)!=0) {
switch (o) {
case INCAFT:
case DECAFT:
p = 37;
break;
case LPARN:
case LBRACK:
case CALL:
p = 04;
}
if (initflg) {
if ((o==COMMA && *op!=LPARN && *op!=CALL)
|| (o==COLON && *op!=QUEST)) {
p = 00;
goto opon1;
}
}
if (op >= &opst[SSIZE-1]) {
error("expression overflow");
exit(1);
}
*++op = o;
*++pp = p;
goto advanc;
}
--pp;
switch (os = *op--) {
case SEOF:
peeksym = o;
build(0); /* flush conversions */
return(*--cp);
case COMMA:
if (*op != CALL)
os = SEQNC;
break;
case CALL:
if (o!=RPARN)
goto syntax;
build(os);
goto advanc;
case MCALL:
*cp++ = NULL; /* empty arglist */
os = CALL;
break;
case INCBEF:
case INCAFT:
case DECBEF:
case DECAFT:
*cp++ = cblock(1);
break;
case LPARN:
if (o!=RPARN)
goto syntax;
goto advanc;
case LBRACK:
if (o!=RBRACK)
goto syntax;
build(LBRACK);
goto advanc;
}
build(os);
goto opon1;
syntax:
error("Expression syntax");
errflush(o);
return(0);
}
struct hshtab *
xprtype(atyb)
struct hshtab *atyb;
{
register struct hshtab *tyb;
struct hshtab typer;
int sc;
register char *md, *fb;
struct tnode *scp;
tyb = atyb;
fb = funcbase;
md = maxdecl;
scp = cp;
funcbase = curbase;
sc = DEFXTRN; /* will cause error if class mentioned */
getkeywords(&sc, &typer);
tyb->hclass = 0;
tyb->hblklev = 0;
decl1(&sc, &typer, 0, tyb);
funcbase = fb;
maxdecl = md;
cp = scp;
tyb->op = ETYPE;
return(tyb);
}
char *
copnum(len)
{
register char *s1, *s2, *s3;
s1 = s2 = gblock((len+LNCPW-1) & ~(LNCPW-1));
s3 = numbuf;
while (*s2++ = *s3++);
return(s1);
}