/* $Header: toke.c,v 3.0.1.12 91/01/11 18:31:45 lwall Locked $
 *
 *    Copyright (c) 1989, Larry Wall
 *
 *    You may distribute under the terms of the GNU General Public License
 *    as specified in the README file that comes with the perl 3.0 kit.
 *
 * $Log:	toke.c,v $
 * Revision 3.0.1.12  91/01/11  18:31:45  lwall
 * patch42: eval'ed formats without proper termination blew up
 * patch42: whitespace now allowed after terminating . of format
 * 
 * Revision 3.0.1.11  90/11/10  02:13:44  lwall
 * patch38: added alarm function
 * patch38: tr was busted in metacharacters on signed char machines
 * 
 * Revision 3.0.1.10  90/10/16  11:20:46  lwall
 * patch29: the length of a search pattern was limited
 * patch29: added DATA filehandle to read stuff after __END__
 * patch29: added -M, -A and -C
 * patch29: added cmp and <=>
 * patch29: added caller
 * patch29: added scalar
 * patch29: added sysread and syswrite
 * patch29: added SysV IPC
 * patch29: added waitpid
 * patch29: tr/// now understands c, d and s options, and handles nulls right
 * patch29: 0x80000000 now makes unsigned value
 * patch29: Null could not be used as a delimiter
 * patch29: added @###.## fields to format
 * 
 * Revision 3.0.1.9  90/08/13  22:37:25  lwall
 * patch28: defined(@array) and defined(%array) didn't work right
 * 
 * Revision 3.0.1.8  90/08/09  05:39:58  lwall
 * patch19: added require operator
 * patch19: added -x switch to extract script from input trash
 * patch19: bare @name didn't add array to symbol table
 * patch19: Added __LINE__ and __FILE__ tokens
 * patch19: Added __END__ token
 * patch19: Numeric literals are now stored only in floating point
 * patch19: some support for FPS compiler misfunction
 * patch19: "\\$foo" not handled right
 * patch19: program and data can now both come from STDIN
 * patch19: "here" strings caused warnings about uninitialized variables
 * 
 * Revision 3.0.1.7  90/03/27  16:32:37  lwall
 * patch16: MSDOS support
 * patch16: formats didn't work inside eval
 * patch16: final semicolon in program wasn't optional with -p or -n
 * 
 * Revision 3.0.1.6  90/03/12  17:06:36  lwall
 * patch13: last semicolon of program is now optional, just for Randal
 * patch13: added splice operator: @oldelems = splice(@array,$offset,$len,LIST)
 * 
 * Revision 3.0.1.5  90/02/28  18:47:06  lwall
 * patch9: return grandfathered to never be function call
 * patch9: non-existent perldb.pl now gives reasonable error message
 * patch9: perl can now start up other interpreters scripts
 * patch9: line numbers were bogus during certain portions of foreach evaluation
 * patch9: null hereis core dumped
 * 
 * Revision 3.0.1.4  89/12/21  20:26:56  lwall
 * patch7: -d switch incompatible with -p or -n
 * patch7: " ''$foo'' " didn't parse right
 * patch7: grandfathered m'pat' and s'pat'repl' to not be package qualifiers
 * 
 * Revision 3.0.1.3  89/11/17  15:43:15  lwall
 * patch5: IBM PC/RT compiler can't deal with UNI() and LOP() macros
 * patch5: } misadjusted expection of subsequent term or operator
 * patch5: y/abcde// didn't work
 * 
 * Revision 3.0.1.2  89/11/11  05:04:42  lwall
 * patch2: fixed a CLINE macro conflict
 * 
 * Revision 3.0.1.1  89/10/26  23:26:21  lwall
 * patch1: disambiguated word after "sort" better
 * 
 * Revision 3.0  89/10/18  15:32:33  lwall
 * 3.0 baseline
 * 
 */

#include "EXTERN.h"
#include "perl.h"
#include "perly.h"

/* which backslash sequences to keep in m// or s// */

static char *patleave = "\\.^$@dDwWsSbB+*?|()-nrtf0123456789[{]}";

char *reparse;		/* if non-null, scanreg found ${foo[$bar]} */

#ifdef CLINE
#undef CLINE
#endif
#define CLINE (cmdline = (curcmd->c_line < cmdline ? curcmd->c_line : cmdline))

#define META(c) ((c) | 128)

#define RETURN(retval) return (bufptr = s,(int)retval)
#define OPERATOR(retval) return (expectterm = TRUE,bufptr = s,(int)retval)
#define TERM(retval) return (CLINE, expectterm = FALSE,bufptr = s,(int)retval)
#define LOOPX(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)LOOPEX)
#define FTST(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)FILETEST)
#define FUN0(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC0)
#define FUN1(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC1)
#define FUN2(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2)
#define FUN2x(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC2x)
#define FUN3(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC3)
#define FUN4(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC4)
#define FUN5(f) return(yylval.ival = f,expectterm = FALSE,bufptr = s,(int)FUNC5)
#define FL(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST)
#define FL2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FLIST2)
#define HFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)HSHFUN)
#define HFUN3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)HSHFUN3)
#define LFUN(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LVALFUN)
#define AOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)ADDOP)
#define MOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)MULOP)
#define EOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)EQOP)
#define ROP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)RELOP)
#define FOP(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP)
#define FOP2(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP2)
#define FOP3(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP3)
#define FOP4(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP4)
#define FOP22(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP22)
#define FOP25(f) return(yylval.ival=f,expectterm = FALSE,bufptr = s,(int)FILOP25)

/* This bit of chicanery makes a unary function followed by
 * a parenthesis into a function with one argument, highest precedence.
 */
#define UNI(f) return(yylval.ival = f,expectterm = TRUE,bufptr = s, \
	(*s == '(' || (s = skipspace(s), *s == '(') ? (int)FUNC1 : (int)UNIOP) )

/* This does similarly for list operators, merely by pretending that the
 * paren came before the listop rather than after.
 */
#define LOP(f) return(*s == '(' || (s = skipspace(s), *s == '(') ? \
	(*s = META('('), bufptr = oldbufptr, '(') : \
	(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP))
/* grandfather return to old style */
#define OLDLOP(f) return(yylval.ival=f,expectterm = TRUE,bufptr = s,(int)LISTOP)

char *skipspace (register char *s)
{
    while (s < bufend && isascii(*s) && isspace(*s))
	s++;
    return s;
}

#ifdef CRIPPLED_CC

#undef UNI
#undef LOP
#define UNI(f) return uni(f,s)
#define LOP(f) return lop(f,s)

int uni (int f, char *s)
{
    yylval.ival = f;
    expectterm = TRUE;
    bufptr = s;
    if (*s == '(')
	return FUNC1;
    s = skipspace(s);
    if (*s == '(')
	return FUNC1;
    else
	return UNIOP;
}

int lop (int f, char *s)
{
    if (*s != '(')
	s = skipspace(s);
    if (*s == '(') {
	*s = META('(');
	bufptr = oldbufptr;
	return '(';
    }
    else {
	yylval.ival=f;
	expectterm = TRUE;
	bufptr = s;
	return LISTOP;
    }
}

#endif /* CRIPPLED_CC */

int yylex (void)
{
    register char *s = bufptr;
    register char *d;
    register int tmp;
    static bool in_format = FALSE;
    static bool firstline = TRUE;

    oldoldbufptr = oldbufptr;
    oldbufptr = s;

  retry:
#ifdef YYDEBUG
    if (debug & 1)
    {
	if (index(s,'\n'))
	    fprintf(stderr,"Tokener at %s",s);
	else
	    fprintf(stderr,"Tokener at %s\n",s);
    }
#endif
#ifdef BADSWITCH
    if (*s & 128) {
	if ((*s & 127) == '(')
	    *s++ = '(';
	else
	    warn("Unrecognized character \\%03o ignored", *s++);
	goto retry;
    }
#endif
    switch (*s) {
    default:
	if ((*s & 127) == '(')
	    *s++ = '(';
	else
	    warn("Unrecognized character \\%03o ignored", *s++);
	goto retry;
    case 4:
    case 26:
	goto fake_eof;			/* emulate EOF on ^D or ^Z */
    case 0:
	if (!rsfp)
	    RETURN(0);
	if (s++ < bufend)
	    goto retry;			/* ignore stray nulls */
	if (firstline) {
	    firstline = FALSE;
	    if (minus_n || minus_p || perldb) {
		str_set(linestr,"");
		if (perldb) {
		    char *pdb = getenv("Perl$DB");

		    str_cat(linestr, pdb ? pdb : "require 'Perldb'");
		    str_cat(linestr, ";");
		}
		if (minus_n || minus_p) {
		    str_cat(linestr,"line: while (<>) {");
		    if (minus_a)
			str_cat(linestr,"@F=split(' ');");
		}
		oldoldbufptr = oldbufptr = s = str_get(linestr);
		bufend = linestr->str_ptr + linestr->str_cur;
		goto retry;
	    }
	}
	if (in_format) {
	    bufptr = bufend;
	    yylval.formval = load_format();
	    in_format = FALSE;
	    oldoldbufptr = oldbufptr = s = str_get(linestr) + 1;
	    bufend = linestr->str_ptr + linestr->str_cur;
	    OPERATOR(FORMLIST);
	}
	curcmd->c_line++;
#ifdef CRYPTSCRIPT
	cryptswitch();
#endif /* CRYPTSCRIPT */
	do {
	    if ((s = str_gets(linestr, rsfp, 0)) == Nullch) {
	      fake_eof:
		if (rsfp) {
		    if (preprocess)
			(void)mypclose(rsfp);
		    else if (rsfp == stdin)
			clearerr(stdin);
		    else
			(void)fclose(rsfp);
		    rsfp = Nullfp;
		}
		if (minus_n || minus_p) {
		    str_set(linestr,minus_p ? ";}continue{print" : "");
		    str_cat(linestr,";}");
		    oldoldbufptr = oldbufptr = s = str_get(linestr);
		    bufend = linestr->str_ptr + linestr->str_cur;
		    minus_n = minus_p = 0;
		    goto retry;
		}
		oldoldbufptr = oldbufptr = s = str_get(linestr);
		str_set(linestr,"");
		RETURN(';');	/* not infinite loop because rsfp is NULL now */
	    }
	    if (doextract && *linestr->str_ptr == '#')
		doextract = FALSE;
	} while (doextract);
	oldoldbufptr = oldbufptr = bufptr = s;
	if (perldb) {
	    STR *str = Str_new(85,0);

	    str_sset(str,linestr);
	    astore(stab_xarray(curcmd->c_filestab),(int)curcmd->c_line,str);
	}
#ifdef DEBUG
	if (firstline) {
	    char *showinput();
	    s = showinput();
	}
#endif
	bufend = linestr->str_ptr + linestr->str_cur;
	if (curcmd->c_line == 1) {
	    if (*s == '#' && s[1] == '!') {
		if (!in_eval && !instr(s,"perl") && instr(origargv[0],"perl")) {
		    char **newargv;
		    char *cmd;

		    s += 2;
		    if (*s == ' ')
			s++;
		    cmd = s;
		    while (s < bufend && !isspace(*s))
			s++;
		    *s++ = '\0';
		    while (s < bufend && isspace(*s))
			s++;
		    if (s < bufend) {
			Newz(899,newargv,origargc+3,char*);
			newargv[1] = s;
			while (s < bufend && !isspace(*s))
			    s++;
			*s = '\0';
			Copy(origargv+1, newargv+2, origargc+1, char*);
		    }
		    else
			newargv = origargv;
		    newargv[0] = cmd;
		    execv(cmd,newargv);
		    fatal("Can't exec %s", cmd);
		}
	    }
	    else {
		while (s < bufend && isspace(*s))
		    s++;
		if (*s == ':')	/* for csh's that have to exec sh scripts */
		    s++;
	    }
	}
	goto retry;
    case ' ': case '\t': case '\f':
	s++;
	goto retry;
    case '#':
	if (preprocess && s == str_get(linestr) &&
	       s[1] == ' ' && isdigit(s[2])) {
	    curcmd->c_line = atoi(s+2)-1;
	    for (s += 2; isdigit(*s); s++) ;
	    d = bufend;
	    while (s < d && isspace(*s)) s++;
	    s[strlen(s)-1] = '\0';	/* wipe out newline */
	    if (*s == '"') {
		s++;
		s[strlen(s)-1] = '\0';	/* wipe out trailing quote */
	    }
	    if (*s)
		curcmd->c_filestab = fstab(s);
	    else
		curcmd->c_filestab = fstab(origfilename);
	    oldoldbufptr = oldbufptr = s = str_get(linestr);
	}
	/* FALL THROUGH */
    case '\n':
	if (in_eval && !rsfp) {
	    d = bufend;
	    while (s < d && *s != '\n')
		s++;
	    if (s < d)
		s++;
	    if (perldb) {
		STR *str = Str_new(85,0);

		str_nset(str,linestr->str_ptr, s - linestr->str_ptr);
		astore(stab_xarray(curcmd->c_filestab),(int)curcmd->c_line,str);
		str_chop(linestr, s);
	    }
	    if (in_format) {
		bufptr = s;
		yylval.formval = load_format();
		in_format = FALSE;
		oldoldbufptr = oldbufptr = s = bufptr + 1;
		TERM(FORMLIST);
	    }
	    curcmd->c_line++;
	}
	else {
	    *s = '\0';
	    bufend = s;
	}
	goto retry;
    case '-':
	if (s[1] && isalpha(s[1]) && !isalpha(s[2])) {
	    s++;
	    switch (*s++) {
	    case 'r': FTST(O_FTREAD);
	    case 'w': FTST(O_FTWRITE);
	    case 'L': FTST(O_FTLOCK);
	    case 'R': FTST(O_FTPREAD);
	    case 'W': FTST(O_FTPWRITE);
	    case 'e': FTST(O_FTIS);
	    case 'z': FTST(O_FTZERO);
	    case 's': FTST(O_FTSIZE);
	    case 'f': FTST(O_FTFILE);
	    case 'd': FTST(O_FTDIR);
	    case 't': FTST(O_FTTTY);
	    case 'T': FTST(O_FTTEXT);
	    case 'B': FTST(O_FTBINARY);
	    case 'A': stabent("\024",TRUE); FTST(O_FTTIME);
	    default:
		s -= 2;
		break;
	    }
	}
	tmp = *s++;
	if (*s == tmp) {
	    s++;
	    RETURN(DEC);
	}
	if (expectterm)
	    OPERATOR('-');
	else
	    AOP(O_SUBTRACT);
    case '+':
	tmp = *s++;
	if (*s == tmp) {
	    s++;
	    RETURN(INC);
	}
	if (expectterm)
	    OPERATOR('+');
	else
	    AOP(O_ADD);

    case '*':
	if (expectterm) {
	    s = scanreg(s,bufend,tokenbuf);
	    yylval.stabval = stabent(tokenbuf,TRUE);
	    TERM(STAR);
	}
	tmp = *s++;
	if (*s == tmp) {
	    s++;
	    OPERATOR(POW);
	}
	MOP(O_MULTIPLY);
    case '%':
	if (expectterm) {
	    s = scanreg(s,bufend,tokenbuf);
	    yylval.stabval = hadd(stabent(tokenbuf,TRUE));
	    TERM(HSH);
	}
	s++;
	MOP(O_MODULO);

    case '^':
    case '~':
    case '(':
    case ',':
    case ':':
    case '[':
	tmp = *s++;
	OPERATOR(tmp);
    case '{':
	tmp = *s++;
	if (isspace(*s) || *s == '#')
	    cmdline = NOLINE;   /* invalidate current command line number */
	OPERATOR(tmp);
    case ';':
	if (curcmd->c_line < cmdline)
	    cmdline = curcmd->c_line;
	tmp = *s++;
	OPERATOR(tmp);
    case ')':
    case ']':
	tmp = *s++;
	TERM(tmp);
    case '}':
	tmp = *s++;
	RETURN(tmp);
    case '&':
	s++;
	tmp = *s++;
	if (tmp == '&')
	    OPERATOR(ANDAND);
	s--;
	if (expectterm) {
	    d = bufend;
	    while (s < d && isspace(*s))
		s++;
	    if (isalpha(*s) || *s == '_' || *s == '\'')
		*(--s) = '\\';	/* force next ident to WORD */
	    OPERATOR(AMPER);
	}
	OPERATOR('&');
    case '|':
	s++;
	tmp = *s++;
	if (tmp == '|')
	    OPERATOR(OROR);
	s--;
	OPERATOR('|');
    case '=':
	s++;
	tmp = *s++;
	if (tmp == '=')
	    EOP(O_EQ);
	if (tmp == '~')
	    OPERATOR(MATCH);
	s--;
	OPERATOR('=');
    case '!':
	s++;
	tmp = *s++;
	if (tmp == '=')
	    EOP(O_NE);
	if (tmp == '~')
	    OPERATOR(NMATCH);
	s--;
	OPERATOR('!');
    case '<':
	if (expectterm) {
	    s = scanstr(s);
	    TERM(RSTRING);
	}
	s++;
	tmp = *s++;
	if (tmp == '<')
	    OPERATOR(LS);
	if (tmp == '=') {
	    tmp = *s++;
	    if (tmp == '>')
		EOP(O_NCMP);
	    s--;
	    ROP(O_LE);
	}
	s--;
	ROP(O_LT);
    case '>':
	s++;
	tmp = *s++;
	if (tmp == '>')
	    OPERATOR(RS);
	if (tmp == '=')
	    ROP(O_GE);
	s--;
	ROP(O_GT);

#define SNARFWORD \
	d = tokenbuf; \
	while (isascii(*s) && \
	  (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')) \
	    *d++ = *s++; \
	while (d[-1] == '\'') \
	    d--,s--; \
	*d = '\0'; \
	d = tokenbuf;

    case '$':
	if (s[1] == '#' && (isalpha(s[2]) || s[2] == '_')) {
	    s++;
	    s = scanreg(s,bufend,tokenbuf);
	    yylval.stabval = aadd(stabent(tokenbuf,TRUE));
	    TERM(ARYLEN);
	}
	d = s;
	s = scanreg(s,bufend,tokenbuf);
	if (reparse) {		/* turn ${foo[bar]} into ($foo[bar]) */
	  do_reparse:
	    s[-1] = ')';
	    s = d;
	    s[1] = s[0];
	    s[0] = '(';
	    goto retry;
	}
	yylval.stabval = stabent(tokenbuf,TRUE);
	TERM(REG);

    case '@':
	d = s;
	s = scanreg(s,bufend,tokenbuf);
	if (reparse)
	    goto do_reparse;
	yylval.stabval = aadd(stabent(tokenbuf,TRUE));
	TERM(ARY);

    case '/':			/* may either be division or pattern */
    case '?':			/* may either be conditional or pattern */
	if (expectterm) {
	    s = scanpat(s);
	    TERM(PATTERN);
	}
	tmp = *s++;
	if (tmp == '/')
	    MOP(O_DIVIDE);
	OPERATOR(tmp);

    case '.':
	if (!expectterm || !isdigit(s[1])) {
	    tmp = *s++;
	    if (*s == tmp) {
		s++;
		OPERATOR(DOTDOT);
	    }
	    AOP(O_CONCAT);
	}
	/* FALL THROUGH */
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
    case '\'': case '"': case '`':
	s = scanstr(s);
	TERM(RSTRING);

    case '\\':	/* some magic to force next word to be a WORD */
	s++;	/* used by do and sub to force a separate namespace */
	/* FALL THROUGH */
    case '_':
	SNARFWORD;
	if (d[1] == '_') {
	    if (strEQ(d,"__LINE__") || strEQ(d,"__FILE__")) {
		ARG *arg = op_new(1);

		yylval.arg = arg;
		arg->arg_type = O_ITEM;
		if (d[2] == 'L')
		    (void)sprintf(tokenbuf,"%ld",(long)curcmd->c_line);
		else
		    strcpy(tokenbuf, stab_val(curcmd->c_filestab)->str_ptr);
		arg[1].arg_type = A_SINGLE;
		arg[1].arg_ptr.arg_str = str_make(tokenbuf,strlen(tokenbuf));
		TERM(RSTRING);
	    }
	    else if (strEQ(d,"__END__")) {
#ifndef TAINT
		STAB *stab;

		if ((stab = stabent("DATA",FALSE)) != Nullstab) {
		    stab->str_pok |= SP_MULTI;
		    stab_io(stab) = stio_new();
		    stab_io(stab)->ifp = rsfp;
		    if (preprocess)
			stab_io(stab)->type = '|';
		    else if (rsfp == stdin)
			stab_io(stab)->type = '-';
		    else
			stab_io(stab)->type = '<';
		    rsfp = Nullfp;
		}
#endif
		goto fake_eof;
	    }
	}
	break;
    case 'a': case 'A':
	SNARFWORD;
	if (strEQ(d,"atan2"))
	    FUN2(O_ATAN2);
	break;
    case 'b': case 'B':
	SNARFWORD;
	break;
    case 'c': case 'C':
	SNARFWORD;
	if (strEQ(d,"chop"))
	    LFUN(O_CHOP);
	if (strEQ(d,"continue"))
	    OPERATOR(CONTINUE);
	if (strEQ(d,"chdir"))
	    UNI(O_CHDIR);
	if (strEQ(d,"close"))
	    FOP(O_CLOSE);
	if (strEQ(d,"closedir"))
	    FOP(O_CLOSEDIR);
	if (strEQ(d,"cmp"))
	    EOP(O_SCMP);
	if (strEQ(d,"caller"))
	    UNI(O_CALLER);
	if (strEQ(d,"crypt")) {
#ifdef FCRYPT
	    init_des();
#endif
	    FUN2(O_CRYPT);
	}
	if (strEQ(d,"cos"))
	    UNI(O_COS);
	break;
    case 'd': case 'D':
	SNARFWORD;
	if (strEQ(d,"do")) {
	    d = bufend;
	    while (s < d && isspace(*s))
		s++;
	    if (isalpha(*s) || *s == '_')
		*(--s) = '\\';	/* force next ident to WORD */
	    OPERATOR(DO);
	}
	if (strEQ(d,"die"))
	    LOP(O_DIE);
	if (strEQ(d,"defined"))
	    LFUN(O_DEFINED);
	if (strEQ(d,"delete"))
	    OPERATOR(DELETE);
	if (strEQ(d,"dbmopen"))
	    HFUN3(O_DBMOPEN);
	if (strEQ(d,"dbmclose"))
	    HFUN(O_DBMCLOSE);
	break;
    case 'e': case 'E':
	SNARFWORD;
	if (strEQ(d,"else"))
	    OPERATOR(ELSE);
	if (strEQ(d,"elsif")) {
	    yylval.ival = curcmd->c_line;
	    OPERATOR(ELSIF);
	}
	if (strEQ(d,"eq") || strEQ(d,"EQ"))
	    EOP(O_SEQ);
	if (strEQ(d,"exit"))
	    UNI(O_EXIT);
	if (strEQ(d,"eval")) {
	    allstabs = TRUE;		/* must initialize everything since */
	    UNI(O_EVAL);		/* we don't know what will be used */
	}
	if (strEQ(d,"eof"))
	    FOP(O_EOF);
	if (strEQ(d,"exp"))
	    UNI(O_EXP);
	if (strEQ(d,"each"))
	    HFUN(O_EACH);
	if (strEQ(d,"exec")) {
	    LOP(O_EXEC_OP);
	}
	break;
    case 'f': case 'F':
	SNARFWORD;
	if (strEQ(d,"for") || strEQ(d,"foreach")) {
	    yylval.ival = curcmd->c_line;
	    OPERATOR(FOR);
	}
	if (strEQ(d,"format")) {
	    d = bufend;
	    while (s < d && isspace(*s))
		s++;
	    if (isalpha(*s) || *s == '_')
		*(--s) = '\\';	/* force next ident to WORD */
	    in_format = TRUE;
	    allstabs = TRUE;		/* must initialize everything since */
	    OPERATOR(FORMAT);		/* we don't know what will be used */
	}
	break;
    case 'g': case 'G':
	SNARFWORD;
	if (strEQ(d,"gt") || strEQ(d,"GT"))
	    ROP(O_SGT);
	if (strEQ(d,"ge") || strEQ(d,"GE"))
	    ROP(O_SGE);
	if (strEQ(d,"grep"))
	    FL2(O_GREP);
	if (strEQ(d,"goto"))
	    LOOPX(O_GOTO);
	if (strEQ(d,"gmtime"))
	    UNI(O_GMTIME);
	if (strEQ(d,"getc"))
	    FOP(O_GETC);
	break;
    case 'h': case 'H':
	SNARFWORD;
	if (strEQ(d,"hex"))
	    UNI(O_HEX);
	break;
    case 'i': case 'I':
	SNARFWORD;
	if (strEQ(d,"if")) {
	    yylval.ival = curcmd->c_line;
	    OPERATOR(IF);
	}
	if (strEQ(d,"index"))
	    FUN2x(O_INDEX);
	if (strEQ(d,"int"))
	    UNI(O_INT);
	break;
    case 'j': case 'J':
	SNARFWORD;
	if (strEQ(d,"join"))
	    FL2(O_JOIN);
	break;
    case 'k': case 'K':
	SNARFWORD;
	if (strEQ(d,"keys"))
	    HFUN(O_KEYS);
	if (strEQ(d,"kill"))
	    UNI(O_KILL);
	break;
    case 'l': case 'L':
	SNARFWORD;
	if (strEQ(d,"last"))
	    LOOPX(O_LAST);
	if (strEQ(d,"local"))
	    OPERATOR(LOCAL);
	if (strEQ(d,"length"))
	    UNI(O_LENGTH);
	if (strEQ(d,"lt") || strEQ(d,"LT"))
	    ROP(O_SLT);
	if (strEQ(d,"le") || strEQ(d,"LE"))
	    ROP(O_SLE);
	if (strEQ(d,"localtime"))
	    UNI(O_LOCALTIME);
	if (strEQ(d,"log"))
	    UNI(O_LOG);
	break;
    case 'm': case 'M':
	if (s[1] == '\'') {
	    d = "m";
	    s++;
	}
	else {
	    SNARFWORD;
	}
	if (strEQ(d,"m")) {
	    s = scanpat(s-1);
	    if (yylval.arg)
		TERM(PATTERN);
	    else
		RETURN(1);	/* force error */
	}
	if (strEQ(d,"mkdir"))
	    UNI(O_MKDIR);
	break;
    case 'n': case 'N':
	SNARFWORD;
	if (strEQ(d,"next"))
	    LOOPX(O_NEXT);
	if (strEQ(d,"ne") || strEQ(d,"NE"))
	    EOP(O_SNE);
	break;
    case 'o': case 'O':
	SNARFWORD;
	if (strEQ(d,"open"))
	    OPERATOR(OPEN);
	if (strEQ(d,"ord"))
	    UNI(O_ORD);
	if (strEQ(d,"oct"))
	    UNI(O_OCT);
	if (strEQ(d,"opendir"))
	    FOP2(O_OPENDIR);
	break;
    case 'p': case 'P':
	SNARFWORD;
	if (strEQ(d,"print")) {
	    checkcomma(s,"filehandle");
	    LOP(O_PRINT);
	}
	if (strEQ(d,"printf")) {
	    checkcomma(s,"filehandle");
	    LOP(O_PRTF);
	}
	if (strEQ(d,"push")) {
	    yylval.ival = O_PUSH;
	    OPERATOR(PUSH);
	}
	if (strEQ(d,"pop"))
	    OPERATOR(POP);
	if (strEQ(d,"pack"))
	    FL2(O_PACK);
	if (strEQ(d,"package"))
	    OPERATOR(PACKAGE);
	break;
    case 'q': case 'Q':
	SNARFWORD;
	if (strEQ(d,"q")) {
	    s = scanstr(s-1);
	    TERM(RSTRING);
	}
	if (strEQ(d,"qq")) {
	    s = scanstr(s-2);
	    TERM(RSTRING);
	}
	break;
    case 'r': case 'R':
	SNARFWORD;
	if (strEQ(d,"return"))
	    OLDLOP(O_RETURN);
	if (strEQ(d,"require")) {
	    allstabs = TRUE;		/* must initialize everything since */
	    UNI(O_REQUIRE);		/* we don't know what will be used */
	}
	if (strEQ(d,"reset"))
	    UNI(O_RESET);
	if (strEQ(d,"redo"))
	    LOOPX(O_REDO);
	if (strEQ(d,"rename"))
	    FUN2(O_RENAME);
	if (strEQ(d,"rand"))
	    UNI(O_RAND);
	if (strEQ(d,"rmdir"))
	    UNI(O_RMDIR);
	if (strEQ(d,"rindex"))
	    FUN2x(O_RINDEX);
	if (strEQ(d,"read"))
	    FOP3(O_READ);
	if (strEQ(d,"readdir"))
	    FOP(O_READDIR);
	if (strEQ(d,"rewinddir"))
	    FOP(O_REWINDDIR);
	if (strEQ(d,"reverse"))
	    LOP(O_REVERSE);
	break;
    case 's': case 'S':
	if (s[1] == '\'') {
	    d = "s";
	    s++;
	}
	else {
	    SNARFWORD;
	}
	if (strEQ(d,"s")) {
	    s = scansubst(s);
	    if (yylval.arg)
		TERM(SUBST);
	    else
		RETURN(1);	/* force error */
	}
	switch (d[1]) {
	case 'a':
	case 'b':
	    break;
	case 'c':
	    if (strEQ(d,"scalar"))
		UNI(O_SCALAR);
	    break;
	case 'd':
	    break;
	case 'e':
	    if (strEQ(d,"select"))
		OPERATOR(SSELECT);
	    if (strEQ(d,"seek"))
		FOP3(O_SEEK);
	    if (strEQ(d,"seekdir"))
		FOP2(O_SEEKDIR);
	    break;
	case 'f':
	case 'g':
	    break;
	case 'h':
	    if (strEQ(d,"shift"))
		TERM(SHIFT);
	    break;
	case 'i':
	    if (strEQ(d,"sin"))
		UNI(O_SIN);
	    break;
	case 'j':
	case 'k':
	    break;
	case 'l':
	    if (strEQ(d,"sleep"))
		UNI(O_SLEEP);
	    break;
	case 'm':
	case 'n':
	    break;
	case 'o':
	    if (strEQ(d,"sort")) {
		checkcomma(s,"subroutine name");
		d = bufend;
		while (s < d && isascii(*s) && isspace(*s)) s++;
		if (*s == ';' || *s == ')')		/* probably a close */
		    fatal("sort is now a reserved word");
		if (isascii(*s) && (isalpha(*s) || *s == '_')) {
		    for (d = s; isalpha(*d) || isdigit(*d) || *d == '_'; d++) ;
		    strncpy(tokenbuf,s,d-s);
		    if (strNE(tokenbuf,"keys") &&
			strNE(tokenbuf,"values") &&
			strNE(tokenbuf,"split") &&
			strNE(tokenbuf,"grep") &&
			strNE(tokenbuf,"readdir") &&
			strNE(tokenbuf,"unpack") &&
			strNE(tokenbuf,"do") &&
			(d >= bufend || isspace(*d)) )
			*(--s) = '\\';	/* force next ident to WORD */
		}
		LOP(O_SORT);
	    }
	    break;
	case 'p':
	    if (strEQ(d,"split"))
		TERM(SPLIT);
	    if (strEQ(d,"sprintf"))
		FL(O_SPRINTF);
	    if (strEQ(d,"splice")) {
		yylval.ival = O_SPLICE;
		OPERATOR(PUSH);
	    }
	    break;
	case 'q':
	    if (strEQ(d,"sqrt"))
		UNI(O_SQRT);
	    break;
	case 'r':
	    if (strEQ(d,"srand"))
		UNI(O_SRAND);
	    break;
	case 's':
	    break;
	case 't':
	    if (strEQ(d,"stat"))
		FOP(O_STAT);
	    if (strEQ(d,"study")) {
		sawstudy++;
		LFUN(O_STUDY);
	    }
	    break;
	case 'u':
	    if (strEQ(d,"substr"))
		FUN2x(O_SUBSTR);
	    if (strEQ(d,"sub")) {
		subline = curcmd->c_line;
		d = bufend;
		while (s < d && isspace(*s))
		    s++;
		if (isalpha(*s) || *s == '_' || *s == '\'') {
		    if (perldb) {
			str_sset(subname,curstname);
			str_ncat(subname,"'",1);
			for (d = s+1;
			  isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\'';
			  d++);
			if (d[-1] == '\'')
			    d--;
			str_ncat(subname,s,d-s);
		    }
		    *(--s) = '\\';	/* force next ident to WORD */
		}
		else if (perldb)
		    str_set(subname,"?");
		OPERATOR(SUB);
	    }
	    break;
	case 'v':
	case 'w':
	case 'x':
	    break;
	case 'y':
	    if (strEQ(d,"system"))
		LOP(O_SYSTEM);
	    if (strEQ(d,"syscall"))
		LOP(O_SYSCALL);
	    break;
	case 'z':
	    break;
	}
	break;
    case 't': case 'T':
	SNARFWORD;
	if (strEQ(d,"tr")) {
	    s = scantrans(s);
	    if (yylval.arg)
		TERM(TRANS);
	    else
		RETURN(1);	/* force error */
	}
	if (strEQ(d,"tell"))
	    FOP(O_TELL);
	if (strEQ(d,"telldir"))
	    FOP(O_TELLDIR);
	if (strEQ(d,"time"))
	    FUN0(O_TIME);
	if (strEQ(d,"truncate"))
	    FOP2(O_TRUNCATE);
	break;
    case 'u': case 'U':
	SNARFWORD;
	if (strEQ(d,"using"))
	    OPERATOR(USING);
	if (strEQ(d,"until")) {
	    yylval.ival = curcmd->c_line;
	    OPERATOR(UNTIL);
	}
	if (strEQ(d,"unless")) {
	    yylval.ival = curcmd->c_line;
	    OPERATOR(UNLESS);
	}
	if (strEQ(d,"unlink"))
	    LOP(O_UNLINK);
	if (strEQ(d,"undef"))
	    LFUN(O_UNDEF);
	if (strEQ(d,"unpack"))
	    FUN2(O_UNPACK);
	if (strEQ(d,"unshift")) {
	    yylval.ival = O_UNSHIFT;
	    OPERATOR(PUSH);
	}
	break;
    case 'v': case 'V':
	SNARFWORD;
	if (strEQ(d,"values"))
	    HFUN(O_VALUES);
	if (strEQ(d,"vec")) {
	    sawvec = TRUE;
	    FUN3(O_VEC);
	}
	break;
    case 'w': case 'W':
	SNARFWORD;
	if (strEQ(d,"while")) {
	    yylval.ival = curcmd->c_line;
	    OPERATOR(WHILE);
	}
	if (strEQ(d,"warn"))
	    LOP(O_WARN);
	if (strEQ(d,"wantarray")) {
	    yylval.arg = op_new(1);
	    yylval.arg->arg_type = O_ITEM;
	    yylval.arg[1].arg_type = A_WANTARRAY;
	    TERM(RSTRING);
	}
	if (strEQ(d,"write"))
	    FOP(O_WRITE);
	break;
    case 'x': case 'X':
	SNARFWORD;
	if (!expectterm && strEQ(d,"x"))
	    MOP(O_REPEAT);
	break;
    case 'y': case 'Y':
	if (s[1] == '\'') {
	    d = "y";
	    s++;
	}
	else {
	    SNARFWORD;
	}
	if (strEQ(d,"y")) {
	    s = scantrans(s);
	    TERM(TRANS);
	}
	break;
    case 'z': case 'Z':
	SNARFWORD;
	break;
    }
    yylval.cval = savestr(d);
    expectterm = FALSE;
    if (oldoldbufptr && oldoldbufptr < bufptr) {
	while (isspace(*oldoldbufptr))
	    oldoldbufptr++;
	if (*oldoldbufptr == 'p' && strnEQ(oldoldbufptr,"print",5))
	    expectterm = TRUE;
	else if (*oldoldbufptr == 's' && strnEQ(oldoldbufptr,"sort",4))
	    expectterm = TRUE;
    }
    return (CLINE, bufptr = s, (int)WORD);
}

void checkcomma (register char *s, char *what)
{
    char *word;

    if (*s == '(')
	s++;
    while (s < bufend && isascii(*s) && isspace(*s))
	s++;
    if (isascii(*s) && (isalpha(*s) || *s == '_')) {
	word = s++;
	while (isalpha(*s) || isdigit(*s) || *s == '_')
	    s++;
	while (s < bufend && isspace(*s))
	    s++;
	if (*s == ',') {
	    *s = '\0';
	    word = instr(
	"tell eof length shift cos exp int log rand sin sqrt ord wantarray",
	      word);
	    *s = ',';
	    if (word)
		return;
	    fatal("No comma allowed after %s", what);
	}
    }
}

char *scanreg (register char *s, register char *send, char *dest)
{
    register char *d;
    int brackets = 0;

    reparse = Nullch;
    s++;
    d = dest;
    if (isdigit(*s)) {
	while (isdigit(*s))
	    *d++ = *s++;
    }
    else {
	while (isalpha(*s) || isdigit(*s) || *s == '_' || *s == '\'')
	    *d++ = *s++;
    }
    while (d > dest+1 && d[-1] == '\'')
	d--,s--;
    *d = '\0';
    d = dest;
    if (!*d) {
	*d = *s++;
	if (*d == '{' /* } */ ) {
	    d = dest;
	    brackets++;
	    while (s < send && brackets) {
		if (!reparse && (d == dest || (*s && isascii(*s) &&
		  (isalpha(*s) || isdigit(*s) || *s == '_') ))) {
		    *d++ = *s++;
		    continue;
		}
		else if (!reparse)
		    reparse = s;
		switch (*s++) {
		/* { */
		case '}':
		    brackets--;
		    if (reparse && reparse == s - 1)
			reparse = Nullch;
		    break;
		case '{':   /* } */
		    brackets++;
		    break;
		}
	    }
	    *d = '\0';
	    d = dest;
	}
	else
	    d[1] = '\0';
    }
    if (*d == '^' && !isspace(*s))
	*d = *s++ & 31;
    return s;
}

STR *scanconst (char *string, int len)
{
    register STR *retstr;
    register char *t;
    register char *d;
    register char *e;

    if (index(string,'|')) {
	return Nullstr;
    }
    retstr = Str_new(86,len);
    str_nset(retstr,string,len);
    t = str_get(retstr);
    e = t + len;
    retstr->str_u.str_useful = 100;
    for (d=t; d < e; ) {
	switch (*d) {
	case '{':
	    if (isdigit(d[1]))
		e = d;
	    else
		goto defchar;
	    break;
	case '.': case '[': case '$': case '(': case ')': case '|': case '+':
	    e = d;
	    break;
	case '\\':
	    if (d[1] && index("wWbB0123456789sSdD",d[1])) {
		e = d;
		break;
	    }
	    (void)bcopy(d+1,d,e-d);
	    e--;
	    switch(*d) {
	    case 'n':
		*d = '\n';
		break;
	    case 't':
		*d = '\t';
		break;
	    case 'f':
		*d = '\f';
		break;
	    case 'r':
		*d = '\r';
		break;
	    }
	    /* FALL THROUGH */
	default:
	  defchar:
	    if (d[1] == '*' || (d[1] == '{' && d[2] == '0') || d[1] == '?') {
		e = d;
		break;
	    }
	    d++;
	}
    }
    if (d == t) {
	str_free(retstr);
	return Nullstr;
    }
    *d = '\0';
    retstr->str_cur = d - t;
    return retstr;
}

char *scanpat (register char *s)
{
    register SPAT *spat;
    register char *d;
    register char *e;
    int len;
    SPAT savespat;
    STR *str = Str_new(93,0);

    Newz(801,spat,1,SPAT);
    spat->spat_next = curstash->tbl_spatroot;	/* link into spat list */
    curstash->tbl_spatroot = spat;

    switch (*s++) {
    case 'm':
	s++;
	break;
    case '/':
	break;
    case '?':
	spat->spat_flags |= SPAT_ONCE;
	break;
    default:
	fatal("panic: scanpat");
    }
    s = str_append_till(str,s,bufend,s[-1],patleave);
    if (s >= bufend) {
	str_free(str);
	yyerror("Search pattern not terminated");
	yylval.arg = Nullarg;
	return s;
    }
    s++;
    while (*s == 'i' || *s == 'o') {
	if (*s == 'i') {
	    s++;
	    sawi = TRUE;
	    spat->spat_flags |= SPAT_FOLD;
	}
	if (*s == 'o') {
	    s++;
	    spat->spat_flags |= SPAT_KEEP;
	}
    }
    len = str->str_cur;
    e = str->str_ptr + len;
    for (d = str->str_ptr; d < e; d++) {
	if (*d == '\\')
	    d++;
	else if ((*d == '$' && d[1] && d[1] != '|' && d[1] != ')') ||
		 (*d == '@')) {
	    register ARG *arg;

	    spat->spat_runtime = arg = op_new(1);
	    arg->arg_type = O_ITEM;
	    arg[1].arg_type = A_DOUBLE;
	    arg[1].arg_ptr.arg_str = str_smake(str);
	    d = scanreg(d,bufend,buf);
	    (void)stabent(buf,TRUE);		/* make sure it's created */
	    for (; d < e; d++) {
		if (*d == '\\')
		    d++;
		else if (*d == '$' && d[1] && d[1] != '|' && d[1] != ')') {
		    d = scanreg(d,bufend,buf);
		    (void)stabent(buf,TRUE);
		}
		else if (*d == '@') {
		    d = scanreg(d,bufend,buf);
		    if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
		      strEQ(buf,"SIG") || strEQ(buf,"INC"))
			(void)stabent(buf,TRUE);
		}
	    }
	    goto got_pat;		/* skip compiling for now */
	}
    }
    if (spat->spat_flags & SPAT_FOLD)
#ifdef STRUCTCOPY
	savespat = *spat;
#else
	(void)bcopy((char *)spat, (char *)&savespat, sizeof(SPAT));
#endif
    if (*str->str_ptr == '^') {
	spat->spat_short = scanconst(str->str_ptr+1,len-1);
	if (spat->spat_short) {
	    spat->spat_slen = spat->spat_short->str_cur;
	    if (spat->spat_slen == len - 1)
		spat->spat_flags |= SPAT_ALL;
	}
    }
    else {
	spat->spat_flags |= SPAT_SCANFIRST;
	spat->spat_short = scanconst(str->str_ptr,len);
	if (spat->spat_short) {
	    spat->spat_slen = spat->spat_short->str_cur;
	    if (spat->spat_slen == len)
		spat->spat_flags |= SPAT_ALL;
	}
    }	
    if ((spat->spat_flags & SPAT_ALL) && (spat->spat_flags & SPAT_SCANFIRST)) {
	fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
	spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
	    spat->spat_flags & SPAT_FOLD);
		/* Note that this regexp can still be used if someone says
		 * something like /a/ && s//b/;  so we can't delete it.
		 */
    }
    else {
	if (spat->spat_flags & SPAT_FOLD)
#ifdef STRUCTCOPY
	    *spat = savespat;
#else
	    (void)bcopy((char *)&savespat, (char *)spat, sizeof(SPAT));
#endif
	if (spat->spat_short)
	    fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
	spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
	    spat->spat_flags & SPAT_FOLD);
	hoistmust(spat);
    }
  got_pat:
    str_free(str);
    yylval.arg = make_match(O_MATCH,stab2arg(A_STAB,defstab),spat);
    return s;
}

char *scansubst (register char *s)
{
    register SPAT *spat;
    register char *d;
    register char *e;
    int len;
    STR *str = Str_new(93,0);

    Newz(802,spat,1,SPAT);
    spat->spat_next = curstash->tbl_spatroot;	/* link into spat list */
    curstash->tbl_spatroot = spat;

    s = str_append_till(str,s+1,bufend,*s,patleave);
    if (s >= bufend) {
	yyerror("Substitution pattern not terminated");
	yylval.arg = Nullarg;
	return s;
    }
    len = str->str_cur;
    e = str->str_ptr + len;
    for (d = str->str_ptr; d < e; d++) {
	if (*d == '\\')
	    d++;
	else if ((*d == '$' && d[1] && d[1] != '|' && /*(*/ d[1] != ')') ||
	    *d == '@' ) {
	    register ARG *arg;

	    spat->spat_runtime = arg = op_new(1);
	    arg->arg_type = O_ITEM;
	    arg[1].arg_type = A_DOUBLE;
	    arg[1].arg_ptr.arg_str = str_smake(str);
	    d = scanreg(d,bufend,buf);
	    (void)stabent(buf,TRUE);		/* make sure it's created */
	    for (; *d; d++) {
		if (*d == '$' && d[1] && d[-1] != '\\' && d[1] != '|') {
		    d = scanreg(d,bufend,buf);
		    (void)stabent(buf,TRUE);
		}
		else if (*d == '@' && d[-1] != '\\') {
		    d = scanreg(d,bufend,buf);
		    if (strEQ(buf,"ARGV") || strEQ(buf,"ENV") ||
		      strEQ(buf,"SIG") || strEQ(buf,"INC"))
			(void)stabent(buf,TRUE);
		}
	    }
	    goto get_repl;		/* skip compiling for now */
	}
    }
    if (*str->str_ptr == '^') {
	spat->spat_short = scanconst(str->str_ptr+1,len-1);
	if (spat->spat_short)
	    spat->spat_slen = spat->spat_short->str_cur;
    }
    else {
	spat->spat_flags |= SPAT_SCANFIRST;
	spat->spat_short = scanconst(str->str_ptr,len);
	if (spat->spat_short)
	    spat->spat_slen = spat->spat_short->str_cur;
    }
get_repl:
    s = scanstr(s);
    if (s >= bufend) {
	str_free(str);
	yyerror("Substitution replacement not terminated");
	yylval.arg = Nullarg;
	return s;
    }
    spat->spat_repl = yylval.arg;
    spat->spat_flags |= SPAT_ONCE;
    if ((spat->spat_repl[1].arg_type & A_MASK) == A_SINGLE)
	spat->spat_flags |= SPAT_CONST;
    else if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE) {
	STR *tmpstr;
	register char *t;

	spat->spat_flags |= SPAT_CONST;
	tmpstr = spat->spat_repl[1].arg_ptr.arg_str;
	e = tmpstr->str_ptr + tmpstr->str_cur;
	for (t = tmpstr->str_ptr; t < e; t++) {
	    if (*t == '$' && t[1] && (index("`'&+0123456789",t[1]) ||
	      (t[1] == '{' /*}*/ && isdigit(t[2])) ))
		spat->spat_flags &= ~SPAT_CONST;
	}
    }
    while (*s == 'g' || *s == 'i' || *s == 'e' || *s == 'o') {
	if (*s == 'e') {
	    s++;
	    if ((spat->spat_repl[1].arg_type & A_MASK) == A_DOUBLE)
		spat->spat_repl[1].arg_type = A_SINGLE;
	    spat->spat_repl = make_op(O_EVAL,2,
		spat->spat_repl,
		Nullarg,
		Nullarg);
	    spat->spat_flags &= ~SPAT_CONST;
	}
	if (*s == 'g') {
	    s++;
	    spat->spat_flags &= ~SPAT_ONCE;
	}
	if (*s == 'i') {
	    s++;
	    sawi = TRUE;
	    spat->spat_flags |= SPAT_FOLD;
	    if (!(spat->spat_flags & SPAT_SCANFIRST)) {
		str_free(spat->spat_short);	/* anchored opt doesn't do */
		spat->spat_short = Nullstr;	/* case insensitive match */
		spat->spat_slen = 0;
	    }
	}
	if (*s == 'o') {
	    s++;
	    spat->spat_flags |= SPAT_KEEP;
	}
    }
    if (spat->spat_short && (spat->spat_flags & SPAT_SCANFIRST))
	fbmcompile(spat->spat_short, spat->spat_flags & SPAT_FOLD);
    if (!spat->spat_runtime) {
	spat->spat_regexp = regcomp(str->str_ptr,str->str_ptr+len,
	  spat->spat_flags & SPAT_FOLD);
	hoistmust(spat);
    }
    yylval.arg = make_match(O_SUBST,stab2arg(A_STAB,defstab),spat);
    str_free(str);
    return s;
}

void hoistmust (register SPAT *spat)
{
    if (spat->spat_regexp->regmust) {	/* is there a better short-circuit? */
	if (spat->spat_short &&
	  str_eq(spat->spat_short,spat->spat_regexp->regmust))
	{
	    if (spat->spat_flags & SPAT_SCANFIRST) {
		str_free(spat->spat_short);
		spat->spat_short = Nullstr;
	    }
	    else {
		str_free(spat->spat_regexp->regmust);
		spat->spat_regexp->regmust = Nullstr;
		return;
	    }
	}
	if (!spat->spat_short ||	/* promote the better string */
	  ((spat->spat_flags & SPAT_SCANFIRST) &&
	   (spat->spat_short->str_cur < spat->spat_regexp->regmust->str_cur) )){
	    str_free(spat->spat_short);		/* ok if null */
	    spat->spat_short = spat->spat_regexp->regmust;
	    spat->spat_regexp->regmust = Nullstr;
	    spat->spat_flags |= SPAT_SCANFIRST;
	}
    }
}

char *expand_charset (register char *s, int len, int *retlen)
{
    char t[520];
    register char *d = t;
    register int i;
    register char *send = s + len;

    while (s < send && d - t <= 256) {
	if (s[1] == '-' && s+2 < send) {
	    for (i = s[0]; i <= s[2]; i++)
		*d++ = i;
	    s += 3;
	}
	else
	    *d++ = *s++;
    }
    *d = '\0';
    *retlen = d - t;
    return nsavestr(t,d-t);
}

char *scantrans (register char *s)
{
    ARG *arg =
	l(make_op(O_TRANS,2,stab2arg(A_STAB,defstab),Nullarg,Nullarg));
    register char *t;
    register char *r;
    register short *tbl;
    register int i;
    register int j;
    int tlen, rlen;
    int squash;
    int delete;
    int complement;

    Newz(803,tbl,256,short);
    arg[2].arg_type = A_NULL;
    arg[2].arg_ptr.arg_cval = (char*) tbl;
    s = scanstr(s);
    if (s >= bufend) {
	yyerror("Translation pattern not terminated");
	yylval.arg = Nullarg;
	return s;
    }
    t = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
	yylval.arg[1].arg_ptr.arg_str->str_cur,&tlen);
    free_arg(yylval.arg);
    s = scanstr(s-1);
    if (s >= bufend) {
	yyerror("Translation replacement not terminated");
	yylval.arg = Nullarg;
	return s;
    }
    complement = delete = squash = 0;
    while (*s == 'c' || *s == 'd' || *s == 's') {
	if (*s == 'c')
	    complement = 1;
	else if (*s == 'd')
	    delete = 2;
	else
	    squash = 1;
	s++;
    }
    r = expand_charset(yylval.arg[1].arg_ptr.arg_str->str_ptr,
	yylval.arg[1].arg_ptr.arg_str->str_cur,&rlen);
    free_arg(yylval.arg);
    arg[2].arg_len = delete|squash;
    yylval.arg = arg;
    if (!rlen && !delete) {
	Safefree(r);
	r = t; rlen = tlen;
    }
    if (complement) {
	Zero(tbl, 256, short);
	for (i = 0; i < tlen; i++)
	    tbl[t[i] & 0377] = -1;
	for (i = 0, j = 0; i < 256; i++,j++) {
	    if (!tbl[i]) {
		if (j >= rlen) {
		    if (delete) {
			tbl[i] = -2;
			continue;
		    }
		    --j;
		}
		tbl[i] = r[j];
	    }
	}
    }
    else {
	for (i = 0; i < 256; i++)
	    tbl[i] = -1;
	for (i = 0, j = 0; i < tlen; i++,j++) {
	    if (j >= rlen) {
		if (delete) {
		    if (tbl[t[i] & 0377] == -1)
			tbl[t[i] & 0377] = -2;
		    continue;
		}
		--j;
	    }
	    if (tbl[t[i] & 0377] == -1)
		tbl[t[i] & 0377] = r[j] & 0377;
	}
    }
    if (r != t)
	Safefree(r);
    Safefree(t);
    return s;
}

char *scanstr (register char *s)
{
    register char term;
    register char *d;
    register ARG *arg;
    register char *send;
    register bool makesingle = FALSE;
    register STAB *stab;
    bool alwaysdollar = FALSE;
    bool hereis = FALSE;
    STR *herewas;
    STR *str;
    char *leave = "\\$@nrtfb0123456789[{]}"; /* which backslash sequences to keep */
    int len;

    arg = op_new(1);
    yylval.arg = arg;
    arg->arg_type = O_ITEM;

    switch (*s) {
    default:			/* a substitution replacement */
	arg[1].arg_type = A_DOUBLE;
	makesingle = TRUE;	/* maybe disable runtime scanning */
	term = *s;
	if (term == '\'')
	    leave = Nullch;
	goto snarf_it;
    case '0':
	{
	    unsigned long i;
	    int shift;

	    arg[1].arg_type = A_SINGLE;
	    if (s[1] == 'x') {
		shift = 4;
		s += 2;
	    }
	    else if (s[1] == '.')
		goto decimal;
	    else
		shift = 3;
	    i = 0;
	    for (;;) {
		switch (*s) {
		default:
		    goto out;
		case '8': case '9':
		    if (shift != 4)
			yyerror("Illegal octal digit");
		    /* FALL THROUGH */
		case '0': case '1': case '2': case '3': case '4':
		case '5': case '6': case '7':
		    i <<= shift;
		    i += *s++ & 15L;
		    break;
		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
		    if (shift != 4)
			goto out;
		    i <<= 4;
		    i += (*s++ & 7L) + 9L;
		    break;
		}
	    }
	  out:
	    str = Str_new(92,0);
	    str_numset(str,(double)i);
	    if (str->str_ptr) {
		Safefree(str->str_ptr);
		str->str_ptr = Nullch;
		str->str_len = str->str_cur = 0;
	    }
	    arg[1].arg_ptr.arg_str = str;
	}
	break;
    case '1': case '2': case '3': case '4': case '5':
    case '6': case '7': case '8': case '9': case '.':
      decimal:
	arg[1].arg_type = A_SINGLE;
	d = tokenbuf;
	while (isdigit(*s) || *s == '_') {
	    if (*s == '_')
		s++;
	    else
		*d++ = *s++;
	}
	if (*s == '.' && s[1] && index("0123456789eE ;",s[1])) {
	    *d++ = *s++;
	    while (isdigit(*s) || *s == '_') {
		if (*s == '_')
		    s++;
		else
		    *d++ = *s++;
	    }
	}
	if (*s && index("eE",*s) && index("+-0123456789",s[1])) {
	    *d++ = *s++;
	    if (*s == '+' || *s == '-')
		*d++ = *s++;
	    while (isdigit(*s))
		*d++ = *s++;
	}
	*d = '\0';
	str = Str_new(92,0);
	str_numset(str,atof(tokenbuf));
	if (str->str_ptr) {
	    Safefree(str->str_ptr);
	    str->str_ptr = Nullch;
	    str->str_len = str->str_cur = 0;
	}
	arg[1].arg_ptr.arg_str = str;
	break;
    case '<':
	if (*++s == '<') {
	    hereis = TRUE;
	    d = tokenbuf;
	    if (!rsfp)
		*d++ = '\n';
	    if (*++s && index("`'\"",*s)) {
		term = *s++;
		s = cpytill(d,s,bufend,term,&len);
		if (s < bufend)
		    s++;
		d += len;
	    }
	    else {
		if (*s == '\\')
		    s++, term = '\'';
		else
		    term = '"';
		while (isascii(*s) && (isalpha(*s) || isdigit(*s) || *s == '_'))
		    *d++ = *s++;
	    }				/* assuming tokenbuf won't clobber */
	    *d++ = '\n';
	    *d = '\0';
	    len = d - tokenbuf;
	    d = "\n";
	    if (rsfp || (d=ninstr(s,bufend,d,d+1)) == Nullch)
		herewas = str_make(s,bufend-s);
	    else
		s--, herewas = str_make(s,d-s);
	    s += herewas->str_cur;
	    if (term == '\'')
		goto do_single;
	    if (term == '`')
		goto do_back;
	    goto do_double;
	}
	d = tokenbuf;
	s = cpytill(d,s,bufend,'>',&len);
	if (s < bufend)
	    s++;
	if (*d == '$') d++;
	while (*d &&
	  (isalpha(*d) || isdigit(*d) || *d == '_' || *d == '\''))
	    d++;
	if (d - tokenbuf != len) {
	    d = tokenbuf;
	    arg[1].arg_type = A_GLOB;
	    d = nsavestr(d,len);
	    arg[1].arg_ptr.arg_stab = stab = genstab();
	    stab_io(stab) = stio_new();
	    stab_val(stab) = str_make(d,len);
	    Safefree(d);
	}
	else {
	    d = tokenbuf;
	    if (!len)
		(void)strcpy(d,"ARGV");
	    if (*d == '$') {
		arg[1].arg_type = A_INDREAD;
		arg[1].arg_ptr.arg_stab = stabent(d+1,TRUE);
	    }
	    else {
		arg[1].arg_type = A_READ;
		arg[1].arg_ptr.arg_stab = stabent(d,TRUE);
		if (!stab_io(arg[1].arg_ptr.arg_stab))
		    stab_io(arg[1].arg_ptr.arg_stab) = stio_new();
		if (strEQ(d,"ARGV")) {
		    (void)aadd(arg[1].arg_ptr.arg_stab);
		    stab_io(arg[1].arg_ptr.arg_stab)->flags |=
		      IOF_ARGV|IOF_START;
		}
	    }
	}
	break;

    case 'q':
	s++;
	if (*s == 'q') {
	    s++;
	    goto do_double;
	}
	/* FALL THROUGH */
    case '\'':
      do_single:
	term = *s;
	arg[1].arg_type = A_SINGLE;
	leave = Nullch;
	goto snarf_it;

    case '"': 
      do_double:
	term = *s;
	arg[1].arg_type = A_DOUBLE;
	makesingle = TRUE;	/* maybe disable runtime scanning */
	alwaysdollar = TRUE;	/* treat $) and $| as variables */
	goto snarf_it;
    case '`':
      do_back:
	term = *s;
	arg[1].arg_type = A_BACKTICK;
	alwaysdollar = TRUE;	/* treat $) and $| as variables */
      snarf_it:
	{
	    STR *tmpstr;
	    char *tmps;

	    multi_start = curcmd->c_line;
	    if (hereis)
		multi_open = multi_close = '<';
	    else {
		multi_open = term;
		if (term && (tmps = index("([{< )]}> )]}>",term)) != Nullch)
		    term = tmps[5];
		multi_close = term;
	    }
	    tmpstr = Str_new(87,80);
	    if (hereis) {
		term = *tokenbuf;
		if (!rsfp) {
		    d = s;
		    while (s < bufend &&
		      (*s != term || bcmp(s,tokenbuf,len) != 0) ) {
			if (*s++ == '\n')
			    curcmd->c_line++;
		    }
		    if (s >= bufend) {
			curcmd->c_line = multi_start;
			fatal("EOF in string");
		    }
		    str_nset(tmpstr,d+1,s-d);
		    s += len - 1;
		    str_ncat(herewas,s,bufend-s);
		    str_replace(linestr,herewas);
		    oldoldbufptr = oldbufptr = bufptr = s = str_get(linestr);
		    bufend = linestr->str_ptr + linestr->str_cur;
		    hereis = FALSE;
		}
		else
		    str_nset(tmpstr,"",0);   /* avoid "uninitialized" warning */
	    }
	    else
		s = str_append_till(tmpstr,s+1,bufend,term,leave);
	    while (s >= bufend) {	/* multiple line string? */
		if (!rsfp ||
		 (oldoldbufptr = oldbufptr = s = str_gets(linestr, rsfp, 0)) == Nullch) {
		    curcmd->c_line = multi_start;
		    fatal("EOF in string");
		}
		curcmd->c_line++;
		if (perldb) {
		    STR *str = Str_new(88,0);

		    str_sset(str,linestr);
		    astore(stab_xarray(curcmd->c_filestab),
		      (int)curcmd->c_line,str);
		}
		bufend = linestr->str_ptr + linestr->str_cur;
		if (hereis) {
		    if (*s == term && bcmp(s,tokenbuf,len) == 0) {
			s = bufend - 1;
			*s = ' ';
			str_scat(linestr,herewas);
			bufend = linestr->str_ptr + linestr->str_cur;
		    }
		    else {
			s = bufend;
			str_scat(tmpstr,linestr);
		    }
		}
		else
		    s = str_append_till(tmpstr,s,bufend,term,leave);
	    }
	    multi_end = curcmd->c_line;
	    s++;
	    if (tmpstr->str_cur + 5 < tmpstr->str_len) {
		tmpstr->str_len = tmpstr->str_cur + 1;
		Renew(tmpstr->str_ptr, tmpstr->str_len, char);
	    }
	    if ((arg[1].arg_type & A_MASK) == A_SINGLE) {
		arg[1].arg_ptr.arg_str = tmpstr;
		break;
	    }
	    tmps = s;
	    s = tmpstr->str_ptr;
	    send = s + tmpstr->str_cur;
	    while (s < send) {		/* see if we can make SINGLE */
		if (*s == '\\' && s[1] && isdigit(s[1]) && !isdigit(s[2]) &&
		  !alwaysdollar && s[1] != '0')
		    *s = '$';		/* grandfather \digit in subst */
		if ((*s == '$' || *s == '@') && s+1 < send &&
		  (alwaysdollar || (s[1] != ')' && s[1] != '|'))) {
		    makesingle = FALSE;	/* force interpretation */
		}
		else if (*s == '\\' && s+1 < send) {
		    s++;
		}
		s++;
	    }
	    s = d = tmpstr->str_ptr;	/* assuming shrinkage only */
	    while (s < send) {
		if ((*s == '$' && s+1 < send &&
		    (alwaysdollar || /*(*/ (s[1] != ')' && s[1] != '|')) ) ||
		    (*s == '@' && s+1 < send) ) {
		    len = scanreg(s,send,tokenbuf) - s;
		    if (*s == '$' || strEQ(tokenbuf,"ARGV")
		      || strEQ(tokenbuf,"ENV")
		      || strEQ(tokenbuf,"SIG")
		      || strEQ(tokenbuf,"INC") )
			(void)stabent(tokenbuf,TRUE); /* make sure it exists */
		    while (len--)
			*d++ = *s++;
		    continue;
		}
		else if (*s == '\\' && s+1 < send) {
		    s++;
		    switch (*s) {
		    default:
			if (!makesingle && (!leave || (*s && index(leave,*s))))
			    *d++ = '\\';
			*d++ = *s++;
			continue;
		    case '0': case '1': case '2': case '3':
		    case '4': case '5': case '6': case '7':
			*d = *s++ - '0';
			if (s < send && *s && index("01234567",*s)) {
			    *d <<= 3;
			    *d += *s++ - '0';
			}
			if (s < send && *s && index("01234567",*s)) {
			    *d <<= 3;
			    *d += *s++ - '0';
			}
			d++;
			continue;
		    case 'b':
			*d++ = '\b';
			break;
		    case 'n':
			*d++ = '\n';
			break;
		    case 'r':
			*d++ = '\r';
			break;
		    case 'f':
			*d++ = '\f';
			break;
		    case 't':
			*d++ = '\t';
			break;
		    }
		    s++;
		    continue;
		}
		*d++ = *s++;
	    }
	    *d = '\0';

	    if ((arg[1].arg_type & A_MASK) == A_DOUBLE && makesingle)
		    arg[1].arg_type = A_SINGLE;	/* now we can optimize on it */

	    tmpstr->str_cur = d - tmpstr->str_ptr;
	    arg[1].arg_ptr.arg_str = tmpstr;
	    s = tmps;
	    break;
	}
    }
    if (hereis)
	str_free(herewas);
    return s;
}

FCMD *load_format (void)
{
    FCMD froot;
    FCMD *flinebeg;
    char *eol;
    register FCMD *fprev = &froot;
    register FCMD *fcmd;
    register char *s;
    register char *t;
    register STR *str;
    bool noblank;
    bool repeater;

    Zero(&froot, 1, FCMD);
    s = bufptr;
    while (s < bufend || (rsfp && (s = str_gets(linestr,rsfp, 0)) != Nullch)) {
	curcmd->c_line++;
	if (in_eval && !rsfp) {
	    eol = index(s,'\n');
	    if (!eol++)
		eol = bufend;
	}
	else
	    eol = bufend = linestr->str_ptr + linestr->str_cur;
	if (perldb) {
	    STR *tmpstr = Str_new(89,0);

	    str_nset(tmpstr, s, eol-s);
	    astore(stab_xarray(curcmd->c_filestab), (int)curcmd->c_line,tmpstr);
	}
	if (*s == '.') {
	    for (t = s+1; *t == ' ' || *t == '\t'; t++) ;
	    if (*t == '\n') {
		bufptr = s;
		return froot.f_next;
	    }
	}
	if (*s == '#') {
	    s = eol;
	    continue;
	}
	flinebeg = Nullfcmd;
	noblank = FALSE;
	repeater = FALSE;
	while (s < eol) {
	    Newz(804,fcmd,1,FCMD);
	    fprev->f_next = fcmd;
	    fprev = fcmd;
	    for (t=s; t < eol && *t != '@' && *t != '^'; t++) {
		if (*t == '~') {
		    noblank = TRUE;
		    *t = ' ';
		    if (t[1] == '~') {
			repeater = TRUE;
			t[1] = ' ';
		    }
		}
	    }
	    fcmd->f_pre = nsavestr(s, t-s);
	    fcmd->f_presize = t-s;
	    s = t;
	    if (s >= eol) {
		if (noblank)
		    fcmd->f_flags |= FC_NOBLANK;
		if (repeater)
		    fcmd->f_flags |= FC_REPEAT;
		break;
	    }
	    if (!flinebeg)
		flinebeg = fcmd;		/* start values here */
	    if (*s++ == '^')
		fcmd->f_flags |= FC_CHOP;	/* for doing text filling */
	    switch (*s) {
	    case '*':
		fcmd->f_type = F_LINES;
		*s = '\0';
		break;
	    case '<':
		fcmd->f_type = F_LEFT;
		while (*s == '<')
		    s++;
		break;
	    case '>':
		fcmd->f_type = F_RIGHT;
		while (*s == '>')
		    s++;
		break;
	    case '|':
		fcmd->f_type = F_CENTER;
		while (*s == '|')
		    s++;
		break;
	    case '#':
	    case '.':
		/* Catch the special case @... and handle it as a string
		   field. */
		if (*s == '.' && s[1] == '.') {
		    goto default_format;
		}
		fcmd->f_type = F_DECIMAL;
		{
		    char *p;

		    /* Read a format in the form @####.####, where either group
		       of ### may be empty, or the final .### may be missing. */
		    while (*s == '#')
			s++;
		    if (*s == '.') {
			s++;
			p = s;
			while (*s == '#')
			    s++;
			fcmd->f_decimals = s-p;
			fcmd->f_flags |= FC_DP;
		    } else {
			fcmd->f_decimals = 0;
		    }
		}
		break;
	    default:
	    default_format:
		fcmd->f_type = F_LEFT;
		break;
	    }
	    if (fcmd->f_flags & FC_CHOP && *s == '.') {
		fcmd->f_flags |= FC_MORE;
		while (*s == '.')
		    s++;
	    }
	    fcmd->f_size = s-t;
	}
	if (flinebeg) {
	  again:
	    if (s >= bufend &&
	      (!rsfp || (s = str_gets(linestr, rsfp, 0)) == Nullch) )
		goto badform;
	    curcmd->c_line++;
	    if (in_eval && !rsfp) {
		eol = index(s,'\n');
		if (!eol++)
		    eol = bufend;
	    }
	    else
		eol = bufend = linestr->str_ptr + linestr->str_cur;
	    if (perldb) {
		STR *tmpstr = Str_new(90,0);

		str_nset(tmpstr, s, eol-s);
		astore(stab_xarray(curcmd->c_filestab),
		    (int)curcmd->c_line,tmpstr);
	    }
	    if (strnEQ(s,".\n",2)) {
		bufptr = s;
		yyerror("Missing values line");
		return froot.f_next;
	    }
	    if (*s == '#') {
		s = eol;
		goto again;
	    }
	    str = flinebeg->f_unparsed = Str_new(91,eol - s);
	    str->str_u.str_hash = curstash;
	    str_nset(str,"(",1);
	    flinebeg->f_line = curcmd->c_line;
	    eol[-1] = '\0';
	    if (!flinebeg->f_next->f_type || index(s, ',')) {
		eol[-1] = '\n';
		str_ncat(str, s, eol - s - 1);
		str_ncat(str,",$$);",5);
		s = eol;
	    }
	    else {
		eol[-1] = '\n';
		while (s < eol && isspace(*s))
		    s++;
		t = s;
		while (s < eol) {
		    switch (*s) {
		    case ' ': case '\t': case '\n': case ';':
			str_ncat(str, t, s - t);
			str_ncat(str, "," ,1);
			while (s < eol && (isspace(*s) || *s == ';'))
			    s++;
			t = s;
			break;
		    case '$':
			str_ncat(str, t, s - t);
			t = s;
			s = scanreg(s,eol,tokenbuf);
			str_ncat(str, t, s - t);
			t = s;
			if (s < eol && *s && index("$'\"",*s))
			    str_ncat(str, ",", 1);
			break;
		    case '"': case '\'':
			str_ncat(str, t, s - t);
			t = s;
			s++;
			while (s < eol && (*s != *t || s[-1] == '\\'))
			    s++;
			if (s < eol)
			    s++;
			str_ncat(str, t, s - t);
			t = s;
			if (s < eol && *s && index("$'\"",*s))
			    str_ncat(str, ",", 1);
			break;
		    default:
			yyerror("Please use commas to separate fields");
		    }
		}
		str_ncat(str,"$$);",4);
	    }
	}
    }
  badform:
    bufptr = str_get(linestr);
    yyerror("Format not terminated");
    return froot.f_next;
}
