So I'm making a Lexical Analyzer as a personal project but I'm having trouble tryna build it's symbol table as I'm fairly a beginner to this language, so far I've managed to make a simple LA but without Symbol Table to insert and retrieve tokens
import re #Regular Expression
token_pair = []
with open('SourceCodeForTesting.cpp', "r") as Source_Code:
for line in Source_Code:
for word in line.split():
if word in ['str', 'int', 'bool', 'float']:
token_pair.append([word , 'DATATYPE'])
elif re.match("[a-z]", word) or re.match("[A-Z]", word):
token_pair.append([word , 'IDENTIFIER'])
elif word in '*-/+%=""':
token_pair.append([word ,'OPERATOR'])
elif word in '(':
token_pair.append([word ,'Left Parenthesis'])
elif word in ')':
token_pair.append([word ,'Right Parenthesis'])
elif word in '{':
token_pair.append([word ,'Left Curly Bracket'])
elif word in '}':
token_pair.append([word ,'Right Curly Bracket'])
elif re.match(".[0-9]", word):
if word[len(word) - 1] == ';':
token_pair.append([ word[:-1] , 'INTEGER'])
token_pair.append([';' , 'END_STATEMENT'])
else:
token_pair.append([word , 'Others'])
print(token_pair)