trc: move free carat handling into parser - plan9port - [fork] Plan 9 from user space
 (HTM) git clone git://src.adamsgaard.dk/plan9port
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 7d6a248f2c68d70f58387afc69e73e695c3d940c
 (DIR) parent 3caf5c238a886d06b438ec6d42f2609b8625463f
 (HTM) Author: Russ Cox <rsc@swtch.com>
       Date:   Mon,  4 May 2020 23:20:08 -0400
       
       rc: move free carat handling into parser
       
       This fixes at least one shell script (printfont) that expected
       
               'x'`{y}'z'
       
       tto mean
       
               'x'^`{y}^'z'
       
       as it now does. Before it meant:
       
               'x'^`{y} 'z'
       
       One surprise is that adjacent lists get a free carat:
       
               (x y z)(1 2 3)
       
       is
       
               (x1 y2 z3)
       
       This doesn't affect any rc script in Plan 9 or plan9port.
       
       Diffstat:
         M man/man1/rc.1                       |      26 +++-----------------------
         M src/cmd/rc/lex.c                    |       6 +++---
         M src/cmd/rc/parse.c                  |     158 +++++++++++++++++--------------
         M src/cmd/rc/syn.y                    |       2 +-
         M src/cmd/rc/test.rc                  |      11 +++++++++++
       
       5 files changed, 106 insertions(+), 97 deletions(-)
       ---
 (DIR) diff --git a/man/man1/rc.1 b/man/man1/rc.1
       t@@ -290,28 +290,10 @@ then one operand must have one component, and the other must be non-empty,
        and concatenation is distributive.
        .PD
        .SS Free Carets
       -In most circumstances,
       -.I rc
       +.I Rc
        will insert the
        .B ^
        operator automatically between words that are not separated by white space.
       -Whenever one of
       -.B $
       -.B '
       -.B `
       -follows a quoted or unquoted word or an unquoted word follows a quoted word
       -with no intervening blanks or tabs,
       -a
       -.B ^
       -is inserted between the two.
       -If an unquoted word immediately follows a
       -.BR $ 
       -and contains a character other than an alphanumeric, underscore,
       -or
       -.BR * ,
       -a
       -.B ^
       -is inserted before the first such character.
        Thus
        .IP
        .B cc -$flags $stem.c
       t@@ -367,7 +349,7 @@ or
        .I Fd1
        is a previously opened file descriptor and
        .I fd0
       -becomes a new copy (in the sense of 
       +becomes a new copy (in the sense of
        .IR dup (3))
        of it.
        A file descriptor may be closed by writing
       t@@ -477,7 +459,7 @@ is executed.
        The
        .I command
        is executed once for each
       -.IR argument 
       +.IR argument
        with that argument assigned to
        .IR name .
        If the argument list is omitted,
       t@@ -982,8 +964,6 @@ changes
        .PP
        Functions that use here documents don't work.
        .PP
       -Free carets don't get inserted next to keywords.
       -.PP
        The
        .BI <{ command }
        syntax depends on the underlying operating system
 (DIR) diff --git a/src/cmd/rc/lex.c b/src/cmd/rc/lex.c
       t@@ -202,7 +202,7 @@ yylex(void)
                 * if the next character is the first character of a simple or compound word,
                 * we insert a `^' before it.
                 */
       -        if(lastword){
       +        if(lastword && flag['Y']){
                        lastword = 0;
                        if(d=='('){
                                advance();
       t@@ -215,8 +215,8 @@ yylex(void)
                        }
                }
                inquote = 0;
       -        if(skipwhite() && flag['Z'])
       -                return SP;
       +        if(skipwhite() && !flag['Y'])
       +                return ' ';
                switch(c = advance()){
                case EOF:
                        lastdol = 0;
 (DIR) diff --git a/src/cmd/rc/parse.c b/src/cmd/rc/parse.c
       t@@ -23,7 +23,15 @@ static jmp_buf yyjmp;
        static int
        dropnl(int tok)
        {
       -        while(tok == '\n')
       +        while(tok == ' ' || tok == '\n')
       +                tok = yylex();
       +        return tok;
       +}
       +
       +static int
       +dropsp(int tok)
       +{
       +        while(tok == ' ')
                        tok = yylex();
                return tok;
        }
       t@@ -49,7 +57,7 @@ parse(void)
                // rc:                                { return 1;}
                // |        line '\n'                {return !compile($1);}
        
       -        tok = yylex();
       +        tok = dropsp(yylex());
                if(tok == EOF)
                        return 1;
                t = line(tok, &tok);
       t@@ -117,6 +125,7 @@ brace(int tok)
        
                // brace:        '{' body '}'                {$$=tree1(BRACE, $2);}
        
       +        tok = dropsp(tok);
                if(tok != '{')
                        syntax(tok);
                t = body(yylex(), &tok);
       t@@ -132,6 +141,7 @@ paren(int tok)
        
                // paren:        '(' body ')'                {$$=tree1(PCMD, $2);}
        
       +        tok = dropsp(tok);
                if(tok != '(')
                        syntax(tok);
                t = body(yylex(), &tok);
       t@@ -172,11 +182,12 @@ yyredir(int tok, int *ptok)
                        syntax(tok);
                case DUP:
                        r = yylval.tree;
       -                *ptok = yylex();
       +                *ptok = dropsp(yylex());
                        break;
                case REDIR:
                        r = yylval.tree;
       -                w = yyword(yylex(), ptok);
       +                w = yyword(yylex(), &tok);
       +                *ptok = dropsp(tok);
                        r = mung1(r, r->rtype==HERE?heredoc(w):w);
                        break;
                }
       t@@ -186,17 +197,67 @@ yyredir(int tok, int *ptok)
        static tree*
        cmd(int tok, int *ptok)
        {
       -        tree *t1, *t2, *t3, *t4;
       -
       +        tok = dropsp(tok);
                switch(tok) {
                default:
                        return cmd2(tok, ptok);
        
       +        }
       +}
       +
       +static tree*
       +cmd2(int tok, int *ptok)
       +{
       +        int op;
       +        tree *t1, *t2;
       +
       +        // |        cmd ANDAND cmd                {$$=tree2(ANDAND, $1, $3);}
       +        // |        cmd OROR cmd                {$$=tree2(OROR, $1, $3);}
       +
       +        t1 = cmd3(tok, &tok);
       +        while(tok == ANDAND || tok == OROR) {
       +                op = tok;
       +                t2 = cmd3(dropnl(yylex()), &tok);
       +                t1 = tree2(op, t1, t2);
       +        }
       +        *ptok = tok;
       +        return t1;
       +}
       +
       +static tree*
       +cmd3(int tok, int *ptok)
       +{
       +        tree *t1, *t2, *t3;
       +
       +        // |        cmd PIPE cmd                {$$=mung2($2, $1, $3);}
       +        t1 = cmd4(tok, &tok);
       +        while(tok == PIPE) {
       +                t2 = yylval.tree;
       +                t3 = cmd4(dropnl(yylex()), &tok);
       +                t1 = mung2(t2, t1, t3);
       +        }
       +        *ptok = tok;
       +        return t1;
       +}
       +
       +static tree*
       +cmd4(int tok, int *ptok)
       +{
       +        tree *t1, *t2, *t3, *t4;
       +
       +        tok = dropsp(tok);
       +        switch(tok) {
       +        case ';':
       +        case '&':
       +        case '\n':
       +                *ptok = tok;
       +                return nil;
       +
                case IF:
                        // |        IF paren {skipnl();} cmd        {$$=mung2($1, $2, $4);}
                        // |        IF NOT {skipnl();} cmd        {$$=mung1($2, $4);}
                        t1 = yylval.tree;
       -                tok = yylex();
       +                tok = dropsp(yylex());
                        if(tok == NOT) {
                                t1 = yylval.tree;
                                t2 = cmd(dropnl(yylex()), ptok);
       t@@ -212,7 +273,7 @@ cmd(int tok, int *ptok)
                        // |        FOR '(' word ')' {skipnl();} cmd
                        //                {$$=mung3($1, $3, (tree *)0, $6);}
                        t1 = yylval.tree;
       -                tok = yylex();
       +                tok = dropsp(yylex());
                        if(tok != '(')
                                syntax(tok);
                        t2 = yyword(yylex(), &tok);
       t@@ -247,62 +308,8 @@ cmd(int tok, int *ptok)
                        t1 = yyword(yylex(), &tok);
                        tok = dropnl(tok); // doesn't work in yacc grammar but works here!
                        t2 = brace(tok);
       -                *ptok = yylex();
       +                *ptok = dropsp(yylex());
                        return tree2(SWITCH, t1, t2);
       -        }
       -}
       -
       -static tree*
       -cmd2(int tok, int *ptok)
       -{
       -        int op;
       -        tree *t1, *t2;
       -
       -        // |        cmd ANDAND cmd                {$$=tree2(ANDAND, $1, $3);}
       -        // |        cmd OROR cmd                {$$=tree2(OROR, $1, $3);}
       -
       -        t1 = cmd3(tok, &tok);
       -        while(tok == ANDAND || tok == OROR) {
       -                op = tok;
       -                t2 = cmd3(dropnl(yylex()), &tok);
       -                t1 = tree2(op, t1, t2);
       -        }
       -        *ptok = tok;
       -        return t1;
       -}
       -
       -static tree*
       -cmd3(int tok, int *ptok)
       -{
       -        tree *t1, *t2, *t3;
       -
       -        // |        cmd PIPE cmd                {$$=mung2($2, $1, $3);}
       -        t1 = cmd4(tok, &tok);
       -        while(tok == PIPE) {
       -                t2 = yylval.tree;
       -                t3 = cmd4(dropnl(yylex()), &tok);
       -                t1 = mung2(t2, t1, t3);
       -        }
       -        *ptok = tok;
       -        return t1;
       -}
       -
       -static tree*
       -cmd4(int tok, int *ptok)
       -{
       -        tree *t1, *t2, *t3;
       -
       -        switch(tok) {
       -        case ';':
       -        case '&':
       -        case '\n':
       -                *ptok = tok;
       -                return nil;
       -
       -        case IF:
       -        case FOR:
       -        case SWITCH:
       -        case WHILE:
                        // Note: cmd: a && for(x) y && b is a && {for (x) {y && b}}.
                        return cmd(tok, ptok);
        
       t@@ -315,7 +322,7 @@ cmd4(int tok, int *ptok)
                                return tree1(FN, t1);
                        }
                        t2 = brace(tok);
       -                *ptok = yylex();
       +                *ptok = dropsp(yylex());
                        return tree2(FN, t1, t2);
        
                case TWIDDLE:
       t@@ -344,7 +351,7 @@ cmd4(int tok, int *ptok)
                case '{':
                        // |        brace epilog                {$$=epimung($1, $2);}
                        t1 = brace(tok);
       -                tok = yylex();
       +                tok = dropsp(yylex());
                        t2 = epilog(tok, ptok);
                        return epimung(t1, t2);
                }
       t@@ -396,6 +403,7 @@ words(int tok, int *ptok)
                // |        words word                {$$=tree2(WORDS, $1, $2);}
        
                t = nil;
       +        tok = dropsp(tok);
                while(iswordtok(tok))
                        t = tree2(WORDS, t, yyword(tok, &tok));
                *ptok = tok;
       t@@ -428,9 +436,19 @@ yyword(int tok, int *ptok)
                // word1: keyword | comword
        
                t = word1(tok, &tok);
       -        while(tok == '^')
       -                t = tree2('^', t, word1(yylex(), &tok));
       -        *ptok = tok;
       +        for(;;) {
       +                if(iswordtok(tok)) {
       +                        t = tree2('^', t, word1(tok, &tok));
       +                        continue;
       +                }
       +                tok = dropsp(tok);
       +                if(tok == '^') {
       +                        t = tree2('^', t, word1(yylex(), &tok));
       +                        continue;
       +                }
       +                break;
       +        }
       +        *ptok = dropsp(tok);
                return t;
        }
        
       t@@ -439,6 +457,7 @@ word1(int tok, int *ptok)
        {
                tree *w, *sub, *t;
        
       +        tok = dropsp(tok);
                switch(tok) {
                default:
                        syntax(tok);
       t@@ -458,7 +477,6 @@ word1(int tok, int *ptok)
                        // keyword: FOR|IN|WHILE|IF|NOT|TWIDDLE|BANG|SUBSHELL|SWITCH|FN
                        t = yylval.tree;
                        t->type = WORD;
       -                lastword = 1;
                        *ptok = yylex();
                        return t;
        
       t@@ -466,7 +484,7 @@ word1(int tok, int *ptok)
                        // comword: '$' word1                {$$=tree1('$', $2);}
                        // |        '$' word1 SUB words ')'        {$$=tree2(SUB, $2, $4);}
                        w = word1(yylex(), &tok);
       -                if(tok == SUB) {
       +                if(tok == '(') {
                                sub = words(yylex(), &tok);
                                if(tok != ')')
                                        syntax(tok);
 (DIR) diff --git a/src/cmd/rc/syn.y b/src/cmd/rc/syn.y
       t@@ -1,4 +1,4 @@
       -%term FOR IN WHILE IF NOT TWIDDLE BANG SUBSHELL SWITCH FN SP
       +%term FOR IN WHILE IF NOT TWIDDLE BANG SUBSHELL SWITCH FN
        %term WORD REDIR REDIRW DUP PIPE SUB
        %term SIMPLE ARGLIST WORDS BRACE PAREN PCMD PIPEFD /* not used in syntax */
        /* operator priorities -- lowest first */
 (DIR) diff --git a/src/cmd/rc/test.rc b/src/cmd/rc/test.rc
       t@@ -1,5 +1,9 @@
        # test for parser
        
       +a
       +a b
       +a|b
       +a | b
        {a; b; c}
        x=y a && b || c
        x=y a | b | c
       t@@ -63,3 +67,10 @@ x ||
        y
        x |
        y
       +switch x {y} && z
       +switch x {} | y
       +
       +OPTIONS=$OPTIONS' /axescount '^`{echo $1 | sed s/-a//}^' def'
       +
       +# bug in old printfont script - expected more free carats
       +# OPTIONS=$OPTIONS' /axescount '`{echo $1 | sed s/-a//}' def'