housecarpenter: A drawing of Bob Dylan singing into a microphone with white make-up on his face. (Default)
Realized today that you can write the parse function from my earlier post in a much simpler way using iterators and recursion:

Expr = t.Union[str, t.Tuple['Expr']]

def parse(tokens: t.Iterable[str]) -> t.Iterator[Expr]:
    """Parse an iterable of Lisp tokens.

    Unmatched parentheses will be ignored.

    >>> tuple(parse(lex('a (b (c d))')))
    ('a', ('b', ('c', 'd')))
    >>> tuple(parse(lex('((a b) c (d e))')))
    ((('a', 'b'), 'c', ('d', 'e')),)
    >>> tuple(parse(lex('(a (b)')))
    (('a', ('b',)),)
    >>> tuple(parse(lex('a)(')))
    ('a',)
    """
    for token in tokens:
        if token == ')':
            return
        elif token == '(':
            yield tuple(parse(tokens))
        else:
            yield token
housecarpenter: A drawing of Bob Dylan singing into a microphone with white make-up on his face. (Default)
I'm proud of this pair of little S-expression-parsing functions (nonrecursive!):

from typing import Iterable, Iterator, List, Union
import string

def lex(code: str) -> Iterator[str]:
    """Lexes a string of Lisp code, returning an iterator over its tokens.

    >>> list(lex(''))
    []
    >>> list(lex('()ab cde'))
    ['(', ')', 'ab', 'cde']
    >>> list(lex('ab ( (b ))'))
    ['ab', '(', '(', 'b', ')', ')']
    """
    token: List[str] = []

    for c in code:
        if c == '(':
            if token:
                yield ''.join(token)
                token.clear()
            yield c
        elif c == ')':
            if token:
                yield ''.join(token)
                token.clear()
            yield c
        elif c in string.whitespace:
            if token:
                yield ''.join(token)
                token.clear()
        else:
            token.append(c)

    if token:
        yield ''.join(token)

class UnmatchedParenthesis(Exception):
    pass

Expression = Union[str, List['Expression']]

def parse(tokens: Iterable[str]) -> List[Expression]:
    """Parses an iterable of Lisp tokens, returning an abstract syntax tree.

    >>> parse(lex(''))
    []
    >>> parse(lex('a ((b) (c d) (e)) f'))
    ['a', [['b'], ['c', 'd'], ['e']], 'f']
    >>> parse(lex('(a)'))
    [['a']]
    >>> parse(lex('('))
    Traceback (most recent call last):
        ...
    UnmatchedParenthesis: 1 unmatched opening parenthesis
    >>> parse(lex('(((a)'))
    Traceback (most recent call last):
        ...
    UnmatchedParenthesis: 2 unmatched opening parentheses
    >>> parse(lex(')'))
    Traceback (most recent call last):
        ...
    UnmatchedParenthesis: unmatched closing parenthesis
    """
    tree: List[Expression] = []
    parent_nodes: List[List[Expression]] = [tree]

    for token in tokens:
        if token == '(':
            subtree: List[Expression] = []
            parent_nodes[-1].append(subtree)
            parent_nodes.append(subtree)
        elif token == ')':
            if len(parent_nodes) <= 1:
                raise UnmatchedParenthesis('unmatched closing parenthesis')
            parent_nodes.pop()
        else:
            parent_nodes[-1].append(token)
 
    if len(parent_nodes) > 1:
        unmatched_count: int = len(parent_nodes) - 1
        ending: str = (
            'is' if unmatched_count == 1
            else 'es'
        )
        raise UnmatchedParenthesis(
            f'{unmatched_count} unmatched opening parenthes{ending}'
        )

    return tree

if __name__ == '__main__':
    import doctest
    doctest.testmod()

Profile

housecarpenter: A drawing of Bob Dylan singing into a microphone with white make-up on his face. (Default)
housecarpenter

April 2019

S M T W T F S
 123456
78910111213
14151617181920
21222324252627
2829 30    

Syndicate

RSS Atom

Most Popular Tags

Style Credit

Expand Cut Tags

No cut tags
Page generated Dec. 14th, 2025 09:18 am
Powered by Dreamwidth Studios