housecarpenter

I'm proud of this pair of little S-expression-parsing functions (nonrecursive!):

from typing import Iterable, Iterator, List, Union
import string

def lex(code: str) -> Iterator[str]:
    """Lexes a string of Lisp code, returning an iterator over its tokens.

    >>> list(lex(''))
    []
    >>> list(lex('()ab cde'))
    ['(', ')', 'ab', 'cde']
    >>> list(lex('ab ( (b ))'))
    ['ab', '(', '(', 'b', ')', ')']
    """
    token: List[str] = []

    for c in code:
        if c == '(':
            if token:
                yield ''.join(token)
                token.clear()
            yield c
        elif c == ')':
            if token:
                yield ''.join(token)
                token.clear()
            yield c
        elif c in string.whitespace:
            if token:
                yield ''.join(token)
                token.clear()
        else:
            token.append(c)

    if token:
        yield ''.join(token)

class UnmatchedParenthesis(Exception):
    pass

Expression = Union[str, List['Expression']]

def parse(tokens: Iterable[str]) -> List[Expression]:
    """Parses an iterable of Lisp tokens, returning an abstract syntax tree.

    >>> parse(lex(''))
    []
    >>> parse(lex('a ((b) (c d) (e)) f'))
    ['a', [['b'], ['c', 'd'], ['e']], 'f']
    >>> parse(lex('(a)'))
    [['a']]
    >>> parse(lex('('))
    Traceback (most recent call last):
        ...
    UnmatchedParenthesis: 1 unmatched opening parenthesis
    >>> parse(lex('(((a)'))
    Traceback (most recent call last):
        ...
    UnmatchedParenthesis: 2 unmatched opening parentheses
    >>> parse(lex(')'))
    Traceback (most recent call last):
        ...
    UnmatchedParenthesis: unmatched closing parenthesis
    """
    tree: List[Expression] = []
    parent_nodes: List[List[Expression]] = [tree]

    for token in tokens:
        if token == '(':
            subtree: List[Expression] = []
            parent_nodes[-1].append(subtree)
            parent_nodes.append(subtree)
        elif token == ')':
            if len(parent_nodes) <= 1:
                raise UnmatchedParenthesis('unmatched closing parenthesis')
            parent_nodes.pop()
        else:
            parent_nodes[-1].append(token)
 
    if len(parent_nodes) > 1:
        unmatched_count: int = len(parent_nodes) - 1
        ending: str = (
            'is' if unmatched_count == 1
            else 'es'
        )
        raise UnmatchedParenthesis(
            f'{unmatched_count} unmatched opening parenthes{ending}'
        )

    return tree

if __name__ == '__main__':
    import doctest
    doctest.testmod()

S	M	T	W	T	F	S
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28	29	30

S-expression parsing code snippet

S-expression parsing code snippet

Profile

April 2019

Most Popular Tags

Style Credit

Expand Cut Tags