I'm working on a simple Pascal-like interpreter using PLY, but I'm encountering a Syntax error at ',' during parsing. The issue arises when trying to parse a source file that includes commas. Below is the code for the PLY code, as well as a sample of the source code I'm using that's causing the error.
Pascal.py
import ply.lex as lex
import ply.yacc as yacc
# Lexical Analysis
tokens = (
    'PROGRAM', 'VAR', 'BEGIN', 'END', 'INTEGER', 'REAL', 'ASSIGN',
    'SEMICOLON', 'COLON', 'COMMA', 'PLUS', 'DIVIDE', 'LPAREN', 'RPAREN',
    'WRITELN', 'STRING', 'ID', 'NUMBER', 'DOT'
)
# Reserved words
reserved = {
    'PROGRAM': 'PROGRAM',
    'VAR': 'VAR',
    'BEGIN': 'BEGIN',
    'END': 'END',
    'integer': 'INTEGER',
    'real': 'REAL',
    'WRITELN': 'WRITELN'
}
# Token definitions
t_ASSIGN = r':='
t_SEMICOLON = r';'
t_COLON = r':'
t_COMMA = r','
t_PLUS = r'\+'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_DOT = r'\.'
t_ignore = ' \t'  # Ignore spaces and tabs
def t_STRING(t):
    r"'[^']*'"
    t.value = t.value[1:-1]  # Remove surrounding quotes
    return t
def t_ID(t):
    r'[a-zA-Z_][a-zA-Z_0-9]*'
    t.type = reserved.get(t.value, 'ID')  # Check if it's a reserved word
    return t
def t_NUMBER(t):
    r'\d+(\.\d+)?'
    t.value = float(t.value) if '.' in t.value else int(t.value)
    return t
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)
def t_error(t):
    print(f"Illegal character '{t.value[0]}'")
    t.lexer.skip(1)
lexer = lex.lex()
# Syntax Analysis
precedence = (
    ('left', 'PLUS', 'DIVIDE'),
)
def p_program(p):
    'program : PROGRAM ID SEMICOLON declarations BEGIN statements END DOT'
    interpret(p[5])  # Pass the parsed statements to the interpreter
def p_declarations(p):
    '''declarations : VAR var_declaration
                    | empty'''
def p_var_declaration(p):
    '''var_declaration : ID COLON type SEMICOLON var_declaration
                       | ID COLON type SEMICOLON'''
def p_type(p):
    '''type : INTEGER
            | REAL'''
def p_statements(p):
    '''statements : statements statement SEMICOLON
                  | statement SEMICOLON'''
    if len(p) == 4:
        p[0] = p[1] + [p[2]]
    else:
        p[0] = [p[1]]
def p_statement(p):
    '''statement : ID ASSIGN expression
                 | WRITELN LPAREN writeln_args RPAREN'''
    if p[1] == 'WRITELN':
        p[0] = ('WRITELN', p[3])
    else:
        p[0] = ('ASSIGN', p[1], p[3])
def p_writeln_args(p):
    '''writeln_args : writeln_args COMMA writeln_arg
                    | writeln_arg'''
    if len(p) == 4:
        p[0] = p[1] + [p[3]]
    else:
        p[0] = [p[1]]
def p_writeln_arg(p):
    '''writeln_arg : STRING
                   | expression'''
    p[0] = p[1]
def p_expression(p):
    '''expression : expression PLUS term
                  | expression DIVIDE term
                  | term'''
    if len(p) == 4:
        p[0] = (p[2], p[1], p[3])
    else:
        p[0] = p[1]
def p_term(p):
    '''term : ID
            | NUMBER'''
    p[0] = p[1]
def p_empty(p):
    'empty :'
    p[0] = []
def p_error(p):
    print(f"Syntax error at '{p.value}'" if p else "Syntax error at EOF")
parser = yacc.yacc()
# Interpreter
variables = {}
def evaluate(tree):
    if isinstance(tree, (int, float)):
        return tree
    if isinstance(tree, str):
        return variables.get(tree, 0)
    if tree[0] == '+':
        return evaluate(tree[1]) + evaluate(tree[2])
    if tree[0] == '/':
        return evaluate(tree[1]) / evaluate(tree[2])
    return 0
def interpret(statements):
    for stmt in statements:
        if stmt[0] == 'ASSIGN':
            variables[stmt[1]] = evaluate(stmt[2])
        elif stmt[0] == 'WRITELN':
            print(" ".join(str(evaluate(arg)) if not isinstance(arg, str) else arg for arg in stmt[1]))
# Run the Program
if __name__ == '__main__':
    import sys
    if len(sys.argv) < 2:
        print("Usage: python Pascal.py <file>")
        sys.exit(1)
    with open(sys.argv[1], 'r') as file:
        source = file.read()
    lexer.input(source)
    parser.parse(source)
Sample Source Code (SumAndAverage.pas)
PROGRAM SUMANDAVERAGE;
VAR num1,num2,num3: integer;
    sum:integer;
    avg:real;
BEGIN
    num1:=10;
    num2:=20;
    num3:=30;
    sum:=num1+num2+num3;
    avg:=sum/3;
    WRITELN('Num1 is ',num1);
    WRITELN('Num2 is ',num2);
    WRITELN('Num3 is ',num3);
    WRITELN('Sum 3 numbers is ',sum);
    WRITELN('Average is ',avg)
END.
Input and the expected Output
python Pascal.py SumAndAverage.pas
Num1 is 10
Num2 is 20
Num3 is 30
Sum 3 numbers is 60
Average is 2.0000000000000000E+001
I've reviewed my token definitions and grammar, but I can't figure out why the parser is having trouble with the comma in the WRITELN statement. Could someone help me figure out what's going wrong?
I'm working on a simple Pascal-like interpreter using PLY, but I'm encountering a Syntax error at ',' during parsing. The issue arises when trying to parse a source file that includes commas. Below is the code for the PLY code, as well as a sample of the source code I'm using that's causing the error.
Pascal.py
import ply.lex as lex
import ply.yacc as yacc
# Lexical Analysis
tokens = (
    'PROGRAM', 'VAR', 'BEGIN', 'END', 'INTEGER', 'REAL', 'ASSIGN',
    'SEMICOLON', 'COLON', 'COMMA', 'PLUS', 'DIVIDE', 'LPAREN', 'RPAREN',
    'WRITELN', 'STRING', 'ID', 'NUMBER', 'DOT'
)
# Reserved words
reserved = {
    'PROGRAM': 'PROGRAM',
    'VAR': 'VAR',
    'BEGIN': 'BEGIN',
    'END': 'END',
    'integer': 'INTEGER',
    'real': 'REAL',
    'WRITELN': 'WRITELN'
}
# Token definitions
t_ASSIGN = r':='
t_SEMICOLON = r';'
t_COLON = r':'
t_COMMA = r','
t_PLUS = r'\+'
t_DIVIDE = r'/'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_DOT = r'\.'
t_ignore = ' \t'  # Ignore spaces and tabs
def t_STRING(t):
    r"'[^']*'"
    t.value = t.value[1:-1]  # Remove surrounding quotes
    return t
def t_ID(t):
    r'[a-zA-Z_][a-zA-Z_0-9]*'
    t.type = reserved.get(t.value, 'ID')  # Check if it's a reserved word
    return t
def t_NUMBER(t):
    r'\d+(\.\d+)?'
    t.value = float(t.value) if '.' in t.value else int(t.value)
    return t
def t_newline(t):
    r'\n+'
    t.lexer.lineno += len(t.value)
def t_error(t):
    print(f"Illegal character '{t.value[0]}'")
    t.lexer.skip(1)
lexer = lex.lex()
# Syntax Analysis
precedence = (
    ('left', 'PLUS', 'DIVIDE'),
)
def p_program(p):
    'program : PROGRAM ID SEMICOLON declarations BEGIN statements END DOT'
    interpret(p[5])  # Pass the parsed statements to the interpreter
def p_declarations(p):
    '''declarations : VAR var_declaration
                    | empty'''
def p_var_declaration(p):
    '''var_declaration : ID COLON type SEMICOLON var_declaration
                       | ID COLON type SEMICOLON'''
def p_type(p):
    '''type : INTEGER
            | REAL'''
def p_statements(p):
    '''statements : statements statement SEMICOLON
                  | statement SEMICOLON'''
    if len(p) == 4:
        p[0] = p[1] + [p[2]]
    else:
        p[0] = [p[1]]
def p_statement(p):
    '''statement : ID ASSIGN expression
                 | WRITELN LPAREN writeln_args RPAREN'''
    if p[1] == 'WRITELN':
        p[0] = ('WRITELN', p[3])
    else:
        p[0] = ('ASSIGN', p[1], p[3])
def p_writeln_args(p):
    '''writeln_args : writeln_args COMMA writeln_arg
                    | writeln_arg'''
    if len(p) == 4:
        p[0] = p[1] + [p[3]]
    else:
        p[0] = [p[1]]
def p_writeln_arg(p):
    '''writeln_arg : STRING
                   | expression'''
    p[0] = p[1]
def p_expression(p):
    '''expression : expression PLUS term
                  | expression DIVIDE term
                  | term'''
    if len(p) == 4:
        p[0] = (p[2], p[1], p[3])
    else:
        p[0] = p[1]
def p_term(p):
    '''term : ID
            | NUMBER'''
    p[0] = p[1]
def p_empty(p):
    'empty :'
    p[0] = []
def p_error(p):
    print(f"Syntax error at '{p.value}'" if p else "Syntax error at EOF")
parser = yacc.yacc()
# Interpreter
variables = {}
def evaluate(tree):
    if isinstance(tree, (int, float)):
        return tree
    if isinstance(tree, str):
        return variables.get(tree, 0)
    if tree[0] == '+':
        return evaluate(tree[1]) + evaluate(tree[2])
    if tree[0] == '/':
        return evaluate(tree[1]) / evaluate(tree[2])
    return 0
def interpret(statements):
    for stmt in statements:
        if stmt[0] == 'ASSIGN':
            variables[stmt[1]] = evaluate(stmt[2])
        elif stmt[0] == 'WRITELN':
            print(" ".join(str(evaluate(arg)) if not isinstance(arg, str) else arg for arg in stmt[1]))
# Run the Program
if __name__ == '__main__':
    import sys
    if len(sys.argv) < 2:
        print("Usage: python Pascal.py <file>")
        sys.exit(1)
    with open(sys.argv[1], 'r') as file:
        source = file.read()
    lexer.input(source)
    parser.parse(source)
Sample Source Code (SumAndAverage.pas)
PROGRAM SUMANDAVERAGE;
VAR num1,num2,num3: integer;
    sum:integer;
    avg:real;
BEGIN
    num1:=10;
    num2:=20;
    num3:=30;
    sum:=num1+num2+num3;
    avg:=sum/3;
    WRITELN('Num1 is ',num1);
    WRITELN('Num2 is ',num2);
    WRITELN('Num3 is ',num3);
    WRITELN('Sum 3 numbers is ',sum);
    WRITELN('Average is ',avg)
END.
Input and the expected Output
python Pascal.py SumAndAverage.pas
Num1 is 10
Num2 is 20
Num3 is 30
Sum 3 numbers is 60
Average is 2.0000000000000000E+001
I've reviewed my token definitions and grammar, but I can't figure out why the parser is having trouble with the comma in the WRITELN statement. Could someone help me figure out what's going wrong?
As mentioned in the comments: your p_var_declaration only accepts one ID:
def p_var_declaration(p):
    '''var_declaration : ID COLON type SEMICOLON var_declaration
                       | ID COLON type SEMICOLON'''
change that into:
def p_var_declaration(p):
    '''var_declaration : vars COLON type SEMICOLON var_declaration
                       | vars COLON type SEMICOLON'''
def p_vars(p):
    '''vars : ID
            | vars COMMA ID'''
Also, your last WRITELN('Average is ',avg) is missing a semi-colon at the end.

VARstatement. Your grammar doesn't allow a comma-separated list of IDs. – Barmar Commented Jan 2 at 20:11p.__dict__-->{'value': ',', 'lineno': 2, 'lexpos': 31, 'type': 'COMMA', 'lexer': <ply.lex.Lexer object at 0x102a5c2d0>}There error is not in theWRITELNstatement. – BeRT2me Commented Jan 2 at 20:11