Trailing-Edge
-
PDP-10 Archives
-
SRI_NIC_PERM_FS_1_19910112
-
c/kcc/cclex.c
There are 8 other files named cclex.c in the archive. Click here to see a list.
/* CCLEX.C - New KCC Lexer - Token input
**
** (c) Copyright Ken Harrenstien 1989
** All changes after v.150, 8-Apr-1988
** (c) Copyright Ken Harrenstien, SRI International 1985, 1986
** All changes after v.43, 8-Aug-1985
**
** Original version (C) 1981 K. Chen
*/
#include "cc.h"
#include "ccchar.h"
#include "cclex.h" /* Get stuff shared with CCINP */
/* Imported functions */
extern int tgmapch(); /* CC */
extern SYMBOL *symfind(); /* CCSYM */
extern int nextpp(); /* CCPP */
extern void pushpp(); /* CCPP */
/* Exported functions defined in CCLEX: */
void lexinit(); /* Initializes the lexer (CC) */
int nextoken(); /* Reads and parses next token (CCDECL,CCERR,CCSTMT) */
void tokpush(); /* Pushes back a token (like ungetc) (",",") */
/* Globals used */
extern int savelits; /* Set 0 by CC main parsing loop for each toplevel
** declaration parse, to indicate that string literal
** space is free and can be re-used again.
*/
/* See also stuff in "cclex.h" */
/* Globals set:
* int token Current token code.
* If token==T_ICONST, T_CCONST, T_FCONST, T_SCONST
* struct {} constant contains type+value of constant (CCINP,CCSTMT)
* If token==Q_IDENT or a reserved-word token,
* SYMBOL *csymbol contains pointer to SYMBOL for this identifier.
** If it hasn't yet been defined, it will be a
** global symbol with class SC_UNDEF.
* (CCDECL,CCERR,CCSTMT)
*
* Note: the "constant" structure is not correct after nextoken() returns
* a token which was pushed back by tokpush().
*
* Note that most routines operate, or begin to operate, on the current
* token in "token", rather than immediately reading the next token. When
* a token is completely processed and is not needed any more, nextoken()
* must be called in order to get rid of it and set up a new token for
* whatever will be next looking at the input. Occasionally "token" is
* set directly for proper "priming".
*/
/* Internal functions */
static int trident(), trintcon(), trfltcon(),
trstrcon(), trchrcon();
static int spcident(), cchar();
static int zerotok(), dzerotok(), szerotok();
/* Token stack - entries added by tokpush(), removed by nextok() */
static int tokstack;
static struct {
int ttoken;
SYMBOL *tsym;
} tstack[MAXTSTACK];
/* String literal char pool */
static char *slcptr; /* Pointer into slcpool */
static int slcleft; /* Countdown of # free chars left */
static int slcocnt; /* Saved slcleft for deriving string len */
static char slcpool[CPOOLSIZE]; /* String literal character pool */
/* Macros to handle deposit of chars into string literal char pool (slcpool) */
#define slcreset() (slcleft=CPOOLSIZE-1, slcptr=slcpool)
#define slcbeg() (slcocnt=slcleft, slcptr+1)
#define slcput(c) (--slcleft > 0 ? *++slcptr = (c) : (c))
#define slclen() (slcocnt - slcleft)
#define slcend() (--slcleft > 0 ? (*++slcptr = 0, slclen()) : -1)
/* LEXINIT() - Initialize the lexer
** The symbol table must have already been set up (by initsym)
** and the preprocessor initialized (by initinp)
** otherwise the initial nextoken() will not work properly.
*/
void
lexinit()
{
tokstack = 0;
savelits = 0; /* OK to reset string literal char pool */
if (!prepf) nextoken(); /* Prime with 1st token */
}
/* TOKPUSH(tok, sym) - Push a token
** Note that the "constant" structure is not pushed or changed.
** It is OK for the current token to be a constant, if the token pushed
** (arg to tokpush) is not a constant. In fact, no constants can be
** pushed. The code for unary() in CCSTMT is the only place where this
** sort of thing has to be taken into account.
*/
void
tokpush(t, s)
SYMBOL *s;
{
if(++tokstack >= MAXTSTACK) /* Token stack depth exceeded? */
--tokstack, int_error("tokpush: tokstack overflow");
else {
tstack[tokstack].ttoken = token;
tstack[tokstack].tsym = csymbol;
token = t;
csymbol = s;
}
}
/* NEXTOKEN() - Get next C language token, by transforming one or more
** PP-tokens from CCPP.
*/
int
nextoken()
{
if (tokstack) { /* Pop token from push-back stack */
csymbol = tstack[tokstack].tsym;
return token = tstack[tokstack--].ttoken;
}
csymbol = NULL; /* Clear sym associated with token */
for (;;) switch (token = nextpp()) { /* Get next preproc token */
case T_WSP: /* Just skip whitespace */
case T_EOL:
continue;
default: /* Most tokens returned directly! */
return token;
/* Transform things that need transforming */
case T_IDENT: return trident(); /* Transform the identifier */
case T_ICONST: return trintcon(); /* Integer constant */
case T_FCONST: return trfltcon(); /* Floating pt constant */
case T_CCONST: return trchrcon(); /* Char constant */
case T_SCONST: return trstrcon(); /* String constant */
/* Do debug checking to catch PP-only stuff. This would be
** caught later on by higher levels, but most responsible to screen
** them here.
*/
case T_MACRO:
case T_MACARG:
case T_MACINS:
case T_MACSTR:
case T_MACCAT:
int_error("nextoken: PP-only token %Q", token);
continue; /* Try again */
case T_SHARP:
case T_SHARP2:
error("# or ## can only appear in PP directives or macros");
continue; /* Try again with next token. */
case T_UNKNWN:
error("Unknown token: \"%s\"", curval.cp);
continue; /* Try again with next token. */
}
}
/* TRIDENT() - Transform identifer token
**
** Sets "csymbol" to point to the resulting symbol, and then returns the token
** corresponding to the given identifier (i.e. reserved word or Q_IDENT).
*/
static int
trident()
{
char ident[IDENTSIZE+4]; /* Identifier big enuf to trigger trunc */
char *cp;
if (!(cp = curval.cp)) {
int_error("trident: no string"); /* No string for T_IDENT */
return zerotok();
}
if (csymbol = cursym)
switch (csymbol->Sclass) {
case SC_RW: /* Reserved word, use its token */
return token = csymbol->Stoken;
case SC_MACRO: /* Paranoia check on CCPP */
int_error("trident: Escaped macro %S", csymbol);
default: /* Normal symbol, just return identifier */
return token = Q_IDENT;
}
if (*cp == SPC_IDQUOT && clevkcc) {
if (!spcident(ident, cp, sizeof(ident)-1))
return zerotok();
cp = ident;
} else int_error("trident: cursym 0 for \"%s\"", cp);
/* If no symbol already exists for identifier, find or get one.
** This will only happen when creating a symbol for a quoted identifier
** (which cannot be a macro), or recovering from an internal error.
** If a symbol is made, it will have class SC_UNDEF.
** symfind() will complain if the identifier was truncated.
*/
csymbol = symfind(cp, 1); /* Find sym or make one */
return token = Q_IDENT;
}
/* SPCIDENT(to, frm, cnt) - Get quoted identifier; special KCC extension.
** First char of "frm" string is '`'.
*/
static int
spcident(to, frm, cnt)
char *to, *frm;
int cnt;
{
register int c;
register char *s = to;
*s = SPC_IDQUOT; /* Start sym with special char */
for(;;) {
switch (c = *++frm) { /* Loop over input chars */
case '`': /* Terminator? */
if (!*++frm) /* Yes, string must stop now! */
break; /* Won! */
/* Drop thru to flag as error */
case 0:
int_error("spcident: Bad string for %s %Q", to, token);
return 0; /* Leave loop */
case '\\':
c = cchar(&frm); /* Get escaped char */
--frm; /* Back up so ++ gets next */
/* and drop thru to default */
default:
if (c == '.') c = '_'; /* Check symbol chars */
if (!iscsym(c) && (c != '$') && (c != '%'))
warn("Bad PDP10 symbol char: '%c'", c);
if (--cnt > 0)
*++s = c; /* add to ident. */
continue; /* and continue loop */
}
break; /* Leave loop */
}
*++s = '\0'; /* null terminate */
if (!to[1]) {
error("Quoted identifier is null");
return 0; /* Say no token */
}
return 1;
}
static int
zerotok()
{
constant.ctype = inttype;
constant.cvalue = 0;
return token = T_ICONST;
}
static int
dzerotok()
{
constant.ctype = dbltype;
constant.Cdouble = 0.0;
return token = T_FCONST;
}
/* TRINTCON() - Transform PP-number integer constant
*/
#define SIGN ((unsigned long)1<<(TGSIZ_LONG-1))
#define MAXPOSLONG ((long)((~(unsigned long)0)>>1))
static int
trintcon()
{
register char *cp;
register int c;
register long v = 0;
int ovfl = 0;
if (!(cp = curval.cp)) {
int_error("trintcon: no str");
return zerotok();
}
if ((c = *cp) == '0') { /* Octal/Hex prefix? */
c = *++cp;
if (c == 'x' || c == 'X') { /* Hex (base 16) */
while (isxdigit(c = *++cp)) {
if (v & (017 << (TGSIZ_LONG-4))) ovfl++;
v = ((unsigned long)v << 4) + toint(c);
}
} else { /* Octal (base 8) */
while (isodigit(c)) {
if (v & (07 << (TGSIZ_LONG-3))) ovfl++;
v = ((unsigned long)v << 3) + c - '0';
c = *++cp;
}
if (isdigit(c)) { /* Helpful msg for common error */
error("Octal constant cannot have '8' or '9'");
return zerotok();
}
}
constant.ctype = (v&SIGN) ? uinttype : inttype; /* Set right type */
} else { /* Decimal (base 10) */
v = c - '0';
while (isdigit(c = *++cp)) {
if (v < ((MAXPOSLONG-9)/10))
v = v*10 + c - '0'; /* Can't overflow, do it fast */
else { /* Slow unsigned multiply loop */
unsigned long pv, uv = v;
do {
pv = uv; /* Remember prev value */
uv = uv*10 + c - '0';
if (uv/10 != pv) ++ovfl; /* If cannot recover, ovflw */
} while (isdigit(c = *++cp));
v = uv;
break;
}
}
constant.ctype = (v&SIGN) ? ulongtype:inttype; /* Set right type */
}
/* Fix up result by checking suffixes and deciding type to use.
** Must use first of the types that can represent the value:
** Decimal: int, long, ulong
** Oct/Hex: int, uint, long, ulong
** U : uint, ulong
** L : long, ulong
** UL : ulong
**
** Since for the PDP-10 int and long are the same size, this basically
** just amounts to deciding whether signed or unsigned is appropriate.
** If sign bit set, unsigned type can hold value.
** If overflow is set, no type can hold value, use largest.
*/
if (ovfl) {
error("Integer constant overflow");
constant.ctype = ulongtype; /* Set to biggest type */
}
if (c) {
if ((c = toupper(c)) == 'L') {
if (!*++cp) constant.ctype = (ovfl||(v&SIGN)) ? ulongtype:longtype;
else if (toupper(*cp++) == 'U') constant.ctype = ulongtype;
else c = -1; /* Bad */
} else if (c == 'U') {
if (!*++cp) constant.ctype = (ovfl) ? ulongtype : uinttype;
else if (toupper(*cp++) == 'L') constant.ctype = ulongtype;
else c = -1; /* Bad */
} else c = -1; /* Bad */
if (c < 0 || *cp) /* Bad if flag set or anything left */
error("Bad integer constant suffix");
}
constant.cvalue = v; /* Now set value */
return token = T_ICONST;
}
/* TRFLTCON() - Transform floating-point PP-number constant
*/
long maxdbl[2] = {MAXPOSLONG, MAXPOSLONG};
#define MAXPOSDOUBLE (*(double *)maxdbl) /* Gross hack for now */
static int
trfltcon()
{
register char *cp;
register int c;
int expsign, exponent;
double divisor, value = 0; /* accumulated value */
int ovfl = 0;
/* Internal checks to verify token is correct */
if (!(cp = curval.cp) || (!isdigit(c = *cp) && c != '.')) {
int_error("trfltcon: bad str");
return dzerotok();
}
/* First do whole-number part. We use floating arithmetic to avoid
** the real possibility of integer overflow. Slower, but safer.
*/
for (; isdigit(c); c = *++cp) {
value = (value*10.0) + (c-'0');
if (value && value < 1.0) /* If exponent wrapped around, */
ovfl++; /* we overflowed. */
}
/* Now do fractional part if one was specified */
if (c == '.') {
divisor = 1.0; /* Place-value for post-. digits */
while (isdigit(*++cp))
value += (*cp - '0') / (divisor *= 10.0);
c = *cp;
}
/* Now exponent, if any */
if (c == 'E' || c == 'e') {
expsign = (c = *++cp); /* Get possible exponent sign */
if (c == '-' || c == '+')
c = *++cp;
if (!isdigit(c)) {
error("Bad floating constant exponent");
return dzerotok();
}
exponent = c - '0';
while (isdigit(c = *++cp)) {
exponent = exponent*10 + (c-'0');
if (exponent >= ((MAXPOSLONG-9)/10))
ovfl++, value = (expsign=='-' ? 0.0 : 1.0);
}
/* EXTREMELY dumb method of scaling value by exponent */
/* Fix this up later!! */
if (!ovfl) {
double pv;
if (expsign == '-')
while (--exponent >= 0) {
pv = value; /* Remember val so can */
if ((value /= 10.0) > pv) { /* check for underflow */
ovfl++;
value = 0;
break;
}
}
else
while (--exponent >= 0) {
pv = value;
if ((value *= 10.0) < pv) { /* Check for overflow */
ovfl++;
value = 1.0;
break;
}
}
}
}
/* See whether we overflowed or not, and fix up. */
if (ovfl) {
if (value) value = MAXPOSDOUBLE;
error("Floating constant %sflow", value ? "over" : "under");
}
/* Now check for suffix type specifier */
if (c) {
switch (toupper(c)) {
case 'F':
constant.ctype = flttype;
constant.Cfloat = value;
c = *++cp;
break;
case 'L':
constant.ctype = lngdbltype;
constant.Clngdbl = value;
c = *++cp;
break;
}
if (c) {
error("Bad floating constant suffix");
return dzerotok();
}
} else {
constant.ctype = dbltype; /* Set constant type to double */
constant.Cdouble = value; /* and set constant value */
}
return token = T_FCONST;
}
/* TRSTRCON() - Transform string constant
** The only chars not allowed in a string constant are
** newline, double-quote, and backslash. They must be
** entered as a character escape code.
**
** If using ANSI parsing, two successive string constants are
** merged into one!
*/
static int
trstrcon()
{
char *cp;
int wideflg, escval;
if (savelits++ == 0) { /* Can char pool be reset? */
slcreset(); /* Yes, do so */
}
constant.ctype = strcontype; /* Set constant type to string const */
constant.csptr = slcbeg(); /* Set constant string ptr */
/* Internal checks to verify token is correct */
if (!(cp = curval.cp)) {
int_error("trstrcon: no str");
return szerotok();
}
if ((wideflg = *cp) == 'L') ++cp; /* Get wchar_t indicator if any */
if (*cp != '"') {
int_error("trstrcon: no \"");
return szerotok();
}
for(;;) {
switch(*++cp) {
default:
if (tgmachuse.mapch)
slcput(tgmapch(*cp)); /* Map char if must, add to string */
else (void)slcput(*cp);
continue;
case '\\': /* Escape char */
slcput(escval = cchar(&cp)); /* Handle and map it */
--cp; /* Need to ensure ++ gets next */
if (escval & ~tgcmask) /* Did we truncate any bits? */
warn("Escape-seq value too large for char");
continue;
case '"': /* End of string? */
if (*++cp)
int_error("trstrcon: trailing junk");
if (clevel < CLEV_ANSI) /* Check for string concatenation? */
break; /* Nope, just return what we got */
/* Hairy stuff... must look at next token! */
for (;;) { /* Dumb loop to flush wsp */
switch (nextpp()) {
case T_WSP: case T_EOL: continue;
case T_SCONST:
if ((cp = curval.cp) /* Paranoia token check */
&& (*cp == wideflg) /* Wideness must match */
&& (*cp == '"' /* Paranoia format check */
|| (*cp == 'L' && *++cp == '"')))
break; /* Hurray, resume main loop! */
/* Everything's been set up... */
/* Can't concatenate next literal, drop thru */
default:
pushpp(); /* Push current token back */
cp = NULL; /* Say we're done */
break; /* Quit loop and return */
}
break; /* Stop inner loop */
}
if (cp) continue; /* Resume outer loop if concating */
break; /* else just leave switch */
case '\0':
int_error("trstrcon: no delim");
break;
}
break; /* Break from main switch is break from loop */
}
/* OK, now finalize string literal in pool. */
if ((constant.cslen = slcend()) < 0) {
error("Too many string literal chars, internal overflow");
return szerotok();
}
return token = T_SCONST;
}
static int
szerotok()
{
constant.csptr = ""; /* Set constant string ptr */
constant.cslen = 1;
return token = T_SCONST;
}
/* TRCHRCON() - Transform character constant.
*/
static int
trchrcon()
{
char *cp;
int wideflg;
unsigned long val;
/* Internal checks to verify token is correct */
if (!(cp = curval.cp)) {
int_error("trchrcon: No str");
return zerotok();
}
if ((wideflg = *cp) == 'L') ++cp; /* Get wchar_t indicator if any */
if (*cp != '\'' || !*++cp || *cp == '\'') {
int_error("trchrcon: Bad fmt");
return zerotok();
}
val = 0;
for (;;) {
if (val & (-1<<(TGSIZ_INT-TGSIZ_CHAR)))
error("Character constant overflow");
val <<= TGSIZ_CHAR;
val |= cchar(&cp) & ((1<<TGSIZ_CHAR)-1); /* Put into word */
if (*cp == '\'') /* Most common case, just one char */
break;
if (!*cp) {
int_error("trchrcon: Bad fmt");
return zerotok();
}
}
constant.ctype = (wideflg == 'L')
? chartype /* Wide char const is special type */
: inttype; /* Normal char const is type int */
constant.cvalue = val;
return T_CCONST;
}
/* CCHAR(&cp) - parse a character from a string literal, char constant,
** or quoted identifier.
** Handles escape sequences, and converts values into target char set.
** Input starts at 1st char of pointer, leaves pointer at first char
** not translated into resulting value.
*/
static int
cchar(acp)
char **acp;
{
register char *cp = *acp;
register int c = *cp;
if (c == '\\') switch (*++cp) { /* If escape char, handle it */
case 'a': c = 07; break; /* ANSI alert - map into BEL */
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
case '\'': c = '\''; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
case '?': c = '?'; break; /* To avoid trigraphs */
case 'x': /* Hexadecimal escape sequence */
if (isxdigit(*++cp)) {
int ovfl = 0;
c = toint(*cp);
while (isxdigit(*++cp)) {
if (c & (017 << (TGSIZ_INT-4))) ovfl++;
c = ((unsigned)c<<4) + toint(*cp);
}
if (ovfl) warn("Hex constant overflow");
} else error("Need hex digit after \\x");
*acp = cp;
return c; /* Specific hex value */
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
c = *cp - '0';
if (isodigit(*++cp)) {
c = ((unsigned)c<<3) + *cp - '0';
if (isodigit(*++cp)) {
c = ((unsigned)c<<3) + *cp - '0';
++cp;
}
}
*acp = cp;
return c; /* Specific octal value */
case '`':
if (clevkcc) { c = '`'; break; }
/* Else not doing KCC extensions, drop through to complain. */
default:
error("Unknown escape char (ignoring backslash): '\\%c'",*cp);
}
*acp = ++cp;
return (tgmachuse.mapch ? tgmapch(c) : c); /* Map char if must */
}