How to parse multiple tokens using python Sly Parser?

289 Views Asked by At

I have a lexer:

from sly import Lexer

class BasicLexer(Lexer):
    tokens = {OBJECT, FUNCTON}
    ignore = '.'

    OBJECT = r'object\(\"(.*?)\"\)'
    FUNCTION = r'function\(\"(.*?)\"\)'

    def OBJECT(self, t):
        match = re.search(r'object\("(.*?)"\)', t.value)
        t.value =  match.group(1)
        return t

    def FUNCTION(self, t):
        match = re.search(r'function\("(.*?)"\)', t.value)
        t.value =  match.group(1)
        return t


When I run it, it returns 2 tokens:

if __name__ == '__main__':
    data = '''object("cars").function("work")'''
    lexer = BasicLexer()
    for tok in lexer.tokenize(data):
        print('type=%r, value=%r' % (tok.type, tok.value))

type='OBJECT', value='cars'

type='FUNCTION', value='work'

Now, creating parser:

from sly import Parser

class BasicParser(Parser):
    tokens = BasicLexer.tokens

    def __init__(self):
        self.env = { }

    @_('')
    def statement(self, p):
        pass

    @_('OBJECT')
    def statement(self, p):
        return ('object', p.OBJECT)

    @_('FUNCTION')
    def statement(self, p):
        return ('function', p.FUNCTION)

if __name__ == '__main__':
    lexer = BasicLexer()
    parser = BasicParser()
    text =  '''object("cars").function("work")'''
    result = parser.parse(lexer.tokenize(text))
    print(result)

returns the following error:

sly: Syntax error at line 1, token=FUNCTION

None

For some reason, it can't parse when lexer.tokenize(text) returns a generator generating multiple tokens. Any idea why?

1

There are 1 best solutions below

0
Sean Duggan On

As noted above in rici's comments, you probably need to take a step or two back to decompose your object and function tokens into smaller pieces that can be processed. Something like the following may make more sense:

from sly import Lexer

class BasicLexer(Lexer):
    tokens = {OBJECT, FUNCTION, LPAREN, RPAREN, SCONST, DOT}

    # String literal
    SCONST = r'\"([^\\\n]|(\\.))*?\"'

    def SCONST(self, t):
        t.value = t.value[1:-1] #Strip the quotation marks
        return t

    DOT = r'\.'
    LPAREN = r'\('
    RPAREN = r'\)'
    
    OBJECT = r'object'
    FUNCTION = r'function'

Then, for your parser, you'd be building something more like the following:

from sly import Parser

class BasicParser(Parser):
    tokens = BasicLexer.tokens

    def __init__(self):
        self.env = { }

    @_('object DOT function')
    def statement(self, p):
        return ('statement', p.object, p.function)

    @_('OBJECT LPAREN SCONST RPAREN')
    def object(self, p):
        return ('object', p.SCONST)

    @_('FUNCTION LPAREN SCONST RPAREN')
    def function(self, p):
        return ('function', p.SCONST)

if __name__ == '__main__':
    lexer = BasicLexer()
    parser = BasicParser()
    text =  '''object("cars").function("work")'''
    tokens_bak = lexer.tokenize(text)
    for tok in tokens_bak:
        print('type=%r, value=%r, lineno=%r, index=%r, end=%r' % (tok.type, tok.value, tok.lineno, tok.index, tok.end))


    result = parser.parse(lexer.tokenize(text))
    print(result)

Note that this only allows a single statement.