python - Syntax error at ',' in PLY Pascal-like interpreter - Stack Overflow

admin2025-05-01 46

I'm working on a simple Pascal-like interpreter using PLY, but I'm encountering a Syntax error at ',' during parsing. The issue arises when trying to parse a source file that includes commas. Below is the code for the PLY code, as well as a sample of the source code I'm using that's causing the error.

Pascal.py

import ply.lex as lex
import ply.yacc as yacc

# Lexical Analysis
tokens = (
    'PROGRAM', 'VAR', 'BEGIN', 'END', 'INTEGER', 'REAL', 'ASSIGN',
    'SEMICOLON', 'COLON', 'COMMA', 'PLUS', 'DIVIDE', 'LPAREN', 'RPAREN',
    'WRITELN', 'STRING', 'ID', 'NUMBER', 'DOT'
)

# Reserved words
reserved = {
    'PROGRAM': 'PROGRAM',
    'VAR': 'VAR',
    'BEGIN': 'BEGIN',
    'END': 'END',
    'integer': 'INTEGER',
    'real': 'REAL',
    'WRITELN': 'WRITELN'
}

# Token definitions
t_ASSIGN = r':='
t_SEMICOLON = r';'
t_COLON = r':'
t_COMMA = r','
t_PLUS = r'\+'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_DOT = r'\.'
t_ignore = ' \t'  # Ignore spaces and tabs

def t_STRING(t):
    r"'[^']*'"
    t.value = t.value[1:-1]  # Remove surrounding quotes
    return t

def t_ID(t):
    r'[a-zA-Z_][a-zA-Z_0-9]*'
    t.type = reserved.get(t.value, 'ID')  # Check if it's a reserved word
    return t

def t_NUMBER(t):
    r'\d+(\.\d+)?'
    t.value = float(t.value) if '.' in t.value else int(t.value)
    return t

def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)

def t_error(t):
    print(f"Illegal character '{t.value[0]}'")
    t.lexer.skip(1)

lexer = lex.lex()

# Syntax Analysis
precedence = (
    ('left', 'PLUS', 'DIVIDE'),
)

def p_program(p):
    'program : PROGRAM ID SEMICOLON declarations BEGIN statements END DOT'
    interpret(p[5])  # Pass the parsed statements to the interpreter

def p_declarations(p):
    '''declarations : VAR var_declaration
                    | empty'''

def p_var_declaration(p):
    '''var_declaration : ID COLON type SEMICOLON var_declaration
                       | ID COLON type SEMICOLON'''

def p_type(p):
    '''type : INTEGER
            | REAL'''

def p_statements(p):
    '''statements : statements statement SEMICOLON
                  | statement SEMICOLON'''
    if len(p) == 4:
        p[0] = p[1] + [p[2]]
    else:
        p[0] = [p[1]]

def p_statement(p):
    '''statement : ID ASSIGN expression
                 | WRITELN LPAREN writeln_args RPAREN'''
    if p[1] == 'WRITELN':
        p[0] = ('WRITELN', p[3])
    else:
        p[0] = ('ASSIGN', p[1], p[3])

def p_writeln_args(p):
    '''writeln_args : writeln_args COMMA writeln_arg
                    | writeln_arg'''
    if len(p) == 4:
        p[0] = p[1] + [p[3]]
    else:
        p[0] = [p[1]]

def p_writeln_arg(p):
    '''writeln_arg : STRING
                   | expression'''
    p[0] = p[1]

def p_expression(p):
    '''expression : expression PLUS term
                  | expression DIVIDE term
                  | term'''
    if len(p) == 4:
        p[0] = (p[2], p[1], p[3])
    else:
        p[0] = p[1]

def p_term(p):
    '''term : ID
            | NUMBER'''
    p[0] = p[1]

def p_empty(p):
    'empty :'
    p[0] = []

def p_error(p):
    print(f"Syntax error at '{p.value}'" if p else "Syntax error at EOF")

parser = yacc.yacc()

# Interpreter
variables = {}

def evaluate(tree):
    if isinstance(tree, (int, float)):
        return tree
    if isinstance(tree, str):
        return variables.get(tree, 0)
    if tree[0] == '+':
        return evaluate(tree[1]) + evaluate(tree[2])
    if tree[0] == '/':
        return evaluate(tree[1]) / evaluate(tree[2])
    return 0

def interpret(statements):
    for stmt in statements:
        if stmt[0] == 'ASSIGN':
            variables[stmt[1]] = evaluate(stmt[2])
        elif stmt[0] == 'WRITELN':
            print(" ".join(str(evaluate(arg)) if not isinstance(arg, str) else arg for arg in stmt[1]))

# Run the Program
if __name__ == '__main__':
    import sys
    if len(sys.argv) < 2:
        print("Usage: python Pascal.py <file>")
        sys.exit(1)
    with open(sys.argv[1], 'r') as file:
        source = file.read()
    lexer.input(source)
    parser.parse(source)

Sample Source Code (SumAndAverage.pas)

PROGRAM SUMANDAVERAGE;
VAR num1,num2,num3: integer;
    sum:integer;
    avg:real;
BEGIN
    num1:=10;
    num2:=20;
    num3:=30;
    sum:=num1+num2+num3;
    avg:=sum/3;
    WRITELN('Num1 is ',num1);
    WRITELN('Num2 is ',num2);
    WRITELN('Num3 is ',num3);
    WRITELN('Sum 3 numbers is ',sum);
    WRITELN('Average is ',avg)
END.

Input and the expected Output

python Pascal.py SumAndAverage.pas

Num1 is 10
Num2 is 20
Num3 is 30
Sum 3 numbers is 60
Average is 2.0000000000000000E+001

I've reviewed my token definitions and grammar, but I can't figure out why the parser is having trouble with the comma in the WRITELN statement. Could someone help me figure out what's going wrong?

Pascal.py

import ply.lex as lex
import ply.yacc as yacc

# Lexical Analysis
tokens = (
    'PROGRAM', 'VAR', 'BEGIN', 'END', 'INTEGER', 'REAL', 'ASSIGN',
    'SEMICOLON', 'COLON', 'COMMA', 'PLUS', 'DIVIDE', 'LPAREN', 'RPAREN',
    'WRITELN', 'STRING', 'ID', 'NUMBER', 'DOT'
)

# Reserved words
reserved = {
    'PROGRAM': 'PROGRAM',
    'VAR': 'VAR',
    'BEGIN': 'BEGIN',
    'END': 'END',
    'integer': 'INTEGER',
    'real': 'REAL',
    'WRITELN': 'WRITELN'
}

# Token definitions
t_ASSIGN = r':='
t_SEMICOLON = r';'
t_COLON = r':'
t_COMMA = r','
t_PLUS = r'\+'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_DOT = r'\.'
t_ignore = ' \t'  # Ignore spaces and tabs

def t_STRING(t):
    r"'[^']*'"
    t.value = t.value[1:-1]  # Remove surrounding quotes
    return t

def t_ID(t):
    r'[a-zA-Z_][a-zA-Z_0-9]*'
    t.type = reserved.get(t.value, 'ID')  # Check if it's a reserved word
    return t

def t_NUMBER(t):
    r'\d+(\.\d+)?'
    t.value = float(t.value) if '.' in t.value else int(t.value)
    return t

def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)

def t_error(t):
    print(f"Illegal character '{t.value[0]}'")
    t.lexer.skip(1)

lexer = lex.lex()

# Syntax Analysis
precedence = (
    ('left', 'PLUS', 'DIVIDE'),
)

def p_program(p):
    'program : PROGRAM ID SEMICOLON declarations BEGIN statements END DOT'
    interpret(p[5])  # Pass the parsed statements to the interpreter

def p_declarations(p):
    '''declarations : VAR var_declaration
                    | empty'''

def p_var_declaration(p):
    '''var_declaration : ID COLON type SEMICOLON var_declaration
                       | ID COLON type SEMICOLON'''

def p_type(p):
    '''type : INTEGER
            | REAL'''

def p_statements(p):
    '''statements : statements statement SEMICOLON
                  | statement SEMICOLON'''
    if len(p) == 4:
        p[0] = p[1] + [p[2]]
    else:
        p[0] = [p[1]]

def p_statement(p):
    '''statement : ID ASSIGN expression
                 | WRITELN LPAREN writeln_args RPAREN'''
    if p[1] == 'WRITELN':
        p[0] = ('WRITELN', p[3])
    else:
        p[0] = ('ASSIGN', p[1], p[3])

def p_writeln_args(p):
    '''writeln_args : writeln_args COMMA writeln_arg
                    | writeln_arg'''
    if len(p) == 4:
        p[0] = p[1] + [p[3]]
    else:
        p[0] = [p[1]]

def p_writeln_arg(p):
    '''writeln_arg : STRING
                   | expression'''
    p[0] = p[1]

def p_expression(p):
    '''expression : expression PLUS term
                  | expression DIVIDE term
                  | term'''
    if len(p) == 4:
        p[0] = (p[2], p[1], p[3])
    else:
        p[0] = p[1]

def p_term(p):
    '''term : ID
            | NUMBER'''
    p[0] = p[1]

def p_empty(p):
    'empty :'
    p[0] = []

def p_error(p):
    print(f"Syntax error at '{p.value}'" if p else "Syntax error at EOF")

parser = yacc.yacc()

# Interpreter
variables = {}

def evaluate(tree):
    if isinstance(tree, (int, float)):
        return tree
    if isinstance(tree, str):
        return variables.get(tree, 0)
    if tree[0] == '+':
        return evaluate(tree[1]) + evaluate(tree[2])
    if tree[0] == '/':
        return evaluate(tree[1]) / evaluate(tree[2])
    return 0

def interpret(statements):
    for stmt in statements:
        if stmt[0] == 'ASSIGN':
            variables[stmt[1]] = evaluate(stmt[2])
        elif stmt[0] == 'WRITELN':
            print(" ".join(str(evaluate(arg)) if not isinstance(arg, str) else arg for arg in stmt[1]))

# Run the Program
if __name__ == '__main__':
    import sys
    if len(sys.argv) < 2:
        print("Usage: python Pascal.py <file>")
        sys.exit(1)
    with open(sys.argv[1], 'r') as file:
        source = file.read()
    lexer.input(source)
    parser.parse(source)

Sample Source Code (SumAndAverage.pas)

PROGRAM SUMANDAVERAGE;
VAR num1,num2,num3: integer;
    sum:integer;
    avg:real;
BEGIN
    num1:=10;
    num2:=20;
    num3:=30;
    sum:=num1+num2+num3;
    avg:=sum/3;
    WRITELN('Num1 is ',num1);
    WRITELN('Num2 is ',num2);
    WRITELN('Num3 is ',num3);
    WRITELN('Sum 3 numbers is ',sum);
    WRITELN('Average is ',avg)
END.

Input and the expected Output

python Pascal.py SumAndAverage.pas

Num1 is 10
Num2 is 20
Num3 is 30
Sum 3 numbers is 60
Average is 2.0000000000000000E+001

I've reviewed my token definitions and grammar, but I can't figure out why the parser is having trouble with the comma in the WRITELN statement. Could someone help me figure out what's going wrong?

Share Improve this question asked Jan 2 at 19:59 Juju 12 bronze badges

1 How do you know the problem is with the WRITELN statement? It seems that you have omitted part of the error message. Please post the full error message. – John Gordon Commented Jan 2 at 20:04
2 I suspect the problem is the VAR statement. Your grammar doesn't allow a comma-separated list of IDs. – Barmar Commented Jan 2 at 20:11
p.__dict__ --> {'value': ',', 'lineno': 2, 'lexpos': 31, 'type': 'COMMA', 'lexer': <ply.lex.Lexer object at 0x102a5c2d0>} There error is not in the WRITELN statement. – BeRT2me Commented Jan 2 at 20:11

Add a comment |

1 Answer 1

Sorted by: Reset to default 0

As mentioned in the comments: your p_var_declaration only accepts one ID:

def p_var_declaration(p):
    '''var_declaration : ID COLON type SEMICOLON var_declaration
                       | ID COLON type SEMICOLON'''

change that into:

def p_var_declaration(p):
    '''var_declaration : vars COLON type SEMICOLON var_declaration
                       | vars COLON type SEMICOLON'''

def p_vars(p):
    '''vars : ID
            | vars COMMA ID'''

Also, your last WRITELN('Average is ',avg) is missing a semi-colon at the end.

转载请注明原文地址:http://anycun.com/QandA/1746099401a91656.html

python - Syntax error at ',' in PLY Pascal-like interpreter - Stack Overflow

1 Answer 1

pythonSyntax error at 39

39 in PLY Pascallike interpreterStack Overflow

python - Syntax error at &#39;,&#39; in PLY Pascal-like interpreter - Stack Overflow

1 Answer 1

pythonSyntax error at 39

39 in PLY Pascallike interpreterStack Overflow

python - Syntax error at ',' in PLY Pascal-like interpreter - Stack Overflow