Staging
v0.5.1
swh:1:snp:635f4099902912592851108bcac178ff574f7c5f
Raw File
Tip revision: 69dec9c8d23606180e660b30d327000e9463b82d authored by Ɓukasz Langa on 02 July 2020, 17:57:45 UTC
Python 3.9.0b4
Tip revision: 69dec9c
test_first_sets.py
import unittest

from test import test_tools
from typing import Dict, Set

test_tools.skip_if_missing('peg_generator')
with test_tools.imports_under_tool('peg_generator'):
    from pegen.grammar_parser import GeneratedParser as GrammarParser
    from pegen.testutil import parse_string
    from pegen.first_sets import FirstSetCalculator
    from pegen.grammar import Grammar


class TestFirstSets(unittest.TestCase):
    def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]:
        grammar: Grammar = parse_string(grammar_source, GrammarParser)
        return FirstSetCalculator(grammar.rules).calculate()

    def test_alternatives(self) -> None:
        grammar = """
            start: expr NEWLINE? ENDMARKER
            expr: A | B
            A: 'a' | '-'
            B: 'b' | '+'
        """
        self.assertEqual(self.calculate_first_sets(grammar), {
            "A": {"'a'", "'-'"},
            "B": {"'+'", "'b'"},
            "expr": {"'+'", "'a'", "'b'", "'-'"},
            "start": {"'+'", "'a'", "'b'", "'-'"},
        })

    def test_optionals(self) -> None:
        grammar = """
            start: expr NEWLINE
            expr: ['a'] ['b'] 'c'
        """
        self.assertEqual(self.calculate_first_sets(grammar), {
            "expr": {"'c'", "'a'", "'b'"},
            "start": {"'c'", "'a'", "'b'"},
        })

    def test_repeat_with_separator(self) -> None:
        grammar = """
        start: ','.thing+ NEWLINE
        thing: NUMBER
        """
        self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})

    def test_optional_operator(self) -> None:
        grammar = """
        start: sum NEWLINE
        sum: (term)? 'b'
        term: NUMBER
        """
        self.assertEqual(self.calculate_first_sets(grammar), {
            "term": {"NUMBER"},
            "sum": {"NUMBER", "'b'"},
            "start": {"'b'", "NUMBER"},
        })

    def test_optional_literal(self) -> None:
        grammar = """
        start: sum NEWLINE
        sum: '+' ? term
        term: NUMBER
        """
        self.assertEqual(self.calculate_first_sets(grammar), {
            "term": {"NUMBER"},
            "sum": {"'+'", "NUMBER"},
            "start": {"'+'", "NUMBER"},
        })

    def test_optional_after(self) -> None:
        grammar = """
        start: term NEWLINE
        term: NUMBER ['+']
        """
        self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}})

    def test_optional_before(self) -> None:
        grammar = """
        start: term NEWLINE
        term: ['+'] NUMBER
        """
        self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}})

    def test_repeat_0(self) -> None:
        grammar = """
        start: thing* "+" NEWLINE
        thing: NUMBER
        """
        self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}})

    def test_repeat_0_with_group(self) -> None:
        grammar = """
        start: ('+' '-')* term NEWLINE
        term: NUMBER
        """
        self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}})

    def test_repeat_1(self) -> None:
        grammar = """
        start: thing+ '-' NEWLINE
        thing: NUMBER
        """
        self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})

    def test_repeat_1_with_group(self) -> None:
        grammar = """
        start: ('+' term)+ term NEWLINE
        term: NUMBER
        """
        self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}})

    def test_gather(self) -> None:
        grammar = """
        start: ','.thing+ NEWLINE
        thing: NUMBER
        """
        self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})

    def test_positive_lookahead(self) -> None:
        grammar = """
        start: expr NEWLINE
        expr: &'a' opt
        opt: 'a' | 'b' | 'c'
        """
        self.assertEqual(self.calculate_first_sets(grammar), {
            "expr": {"'a'"},
            "start": {"'a'"},
            "opt": {"'b'", "'c'", "'a'"},
        })

    def test_negative_lookahead(self) -> None:
        grammar = """
        start: expr NEWLINE
        expr: !'a' opt
        opt: 'a' | 'b' | 'c'
        """
        self.assertEqual(self.calculate_first_sets(grammar), {
            "opt": {"'b'", "'a'", "'c'"},
            "expr": {"'b'", "'c'"},
            "start": {"'b'", "'c'"},
        })

    def test_left_recursion(self) -> None:
        grammar = """
        start: expr NEWLINE
        expr: ('-' term | expr '+' term | term)
        term: NUMBER
        foo: 'foo'
        bar: 'bar'
        baz: 'baz'
        """
        self.assertEqual(self.calculate_first_sets(grammar), {
            "expr": {"NUMBER", "'-'"},
            "term": {"NUMBER"},
            "start": {"NUMBER", "'-'"},
            "foo": {"'foo'"},
            "bar": {"'bar'"},
            "baz": {"'baz'"},
        })

    def test_advance_left_recursion(self) -> None:
        grammar = """
        start: NUMBER | sign start
        sign: ['-']
        """
        self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}})

    def test_mutual_left_recursion(self) -> None:
        grammar = """
        start: foo 'E'
        foo: bar 'A' | 'B'
        bar: foo 'C' | 'D'
        """
        self.assertEqual(self.calculate_first_sets(grammar), {
            "foo": {"'D'", "'B'"},
            "bar": {"'D'"},
            "start": {"'D'", "'B'"},
        })

    def test_nasty_left_recursion(self) -> None:
        # TODO: Validate this
        grammar = """
        start: target '='
        target: maybe '+' | NAME
        maybe: maybe '-' | target
        """
        self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}})

    def test_nullable_rule(self) -> None:
        grammar = """
        start: sign thing $
        sign: ['-']
        thing: NUMBER
        """
        self.assertEqual(self.calculate_first_sets(grammar), {
            "sign": {"", "'-'"},
            "thing": {"NUMBER"},
            "start": {"NUMBER", "'-'"},
        })

    def test_epsilon_production_in_start_rule(self) -> None:
        grammar = """
        start: ['-'] $
        """
        self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}})

    def test_multiple_nullable_rules(self) -> None:
        grammar = """
        start: sign thing other another $
        sign: ['-']
        thing: ['+']
        other: '*'
        another: '/'
        """
        self.assertEqual(self.calculate_first_sets(grammar), {
            "sign": {"", "'-'"},
            "thing": {"'+'", ""},
            "start": {"'+'", "'-'", "'*'"},
            "other": {"'*'"},
            "another": {"'/'"},
        })
back to top