Spreadsheet engine problems : String concatenation, saving and loading workbooks as JSON

34 Views Asked by At

I have made some headway with building my engine. I want my cells to handle concatenation when '&' is in place, for cell names, and strings alike. I am using lark for my formula evaluation:

// String concatenation

?concat_expr : (concat_expr "&")? base

This is my current code dealing with it.

def set_cell_contents(self, sheet_name: str, location: str,
                          contents: Optional[str]) -> None:
        # Set the contents of the specified cell on the specified sheet.
        #
        # The sheet name match is case-insensitive; the text must match but the
        # case does not have to.  Additionally, the cell location can be
        # specified in any case.
        #
        # If the specified sheet name is not found, a KeyError is raised.
        # If the cell location is invalid, a ValueError is raised.
        #
        # A cell may be set to "empty" by specifying a contents of None.
        #
        # Leading and trailing whitespace are removed from the contents before
        # storing them in the cell.  Storing a zero-length string "" (or a
        # string composed entirely of whitespace) is equivalent to setting the
        # cell contents to None.
        #
        # If the cell contents appear to be a formula, and the formula is
        # invalid for some reason, this method does not raise an exception;
        # rather, the cell's value will be a CellError object indicating the
        # naure of the issue.
        
        # finds if sheet inputed is found in the list of sheets
        #move this to its own function FUTURE
        from .interp import FormulaEvaluator
        
        if(type(contents) == str):
            contents = contents.strip()
        
        sheetNum = self.get_sheet_num(sheet_name)
        
        #if valid, set current sheet to sheet_name
        self.currentSheet = sheet_name
        location = location.upper() # we assume cell is uppercase
        
        # if location outside of extent, expand sheet
        self.increase_sheet_size(sheet_name, location)
        
        # if cell in contents is outside of extent, expand sheet
        
        # store sheet_name in cell
        

        self.list_sheets_contents[sheetNum][location].set_cell_contents(contents) # set contents even if there is a error displayed
        
        # reset cell error on calcuation
        self.list_sheets_contents[sheetNum][location].cellError = None

        #if cell contents is a cell error, assign the cell error to the cell
        if type(contents) == CellError:
            self.list_sheets_contents[sheetNum][location].set_cellError(contents)
            return        
        elif type(contents) == str and contents[0] == '#':
            if contents.lower() == "#error!":
                self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.PARSE_ERROR, "Error"))
            elif contents.lower() == "#circref!":
                self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.CIRCULAR_REFERENCE, "Circular reference detected"))
            elif contents.lower() == '#ref!':
                self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.BAD_REFERENCE, "Bad reference"))
            elif contents.lower() == '#name?':
                self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.BAD_NAME, "Bad name"))
            elif contents.lower() == '#value!':
                self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.TYPE_ERROR, "Type error"))
            elif contents.lower() == '#div/0!':
                self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.DIVIDE_BY_ZERO, "Divide by zero error"))
            return    

        # if cell contents is None, done with function
        if contents == None or contents == "": # if the cell is empty, set it to None
            self.list_sheets_contents[sheetNum][location].contents = None
            return
        
        #if cell contents is a float, set it to a decimal
        is_decimal = False
        try:
            float(contents)
            is_decimal = True
        except ValueError:
            is_decimal =  False
        if(is_decimal):
            contents = decimal.Decimal(contents)
            self.list_sheets_contents[sheetNum][location].contents = contents
            return
            
        
        # if the cell is not a formula, set it to the contents
        if (type(contents) == str and contents[0] != '=') or type(contents) != str: 
            self.list_sheets_contents[sheetNum][location].set_cell_contents(contents)
            return

        # Check if the formula contains the '+' or '&' operator
        if '&' in contents:
            # Split the formula into parts
            parts = re.split('[&]', contents)

            evaluated_parts = []
            for part in parts:
                # Recursively call set_cell_contents to handle nested concatenations or other expressions
                self.set_cell_contents(sheet_name, location, part.strip())
                # Retrieve the evaluated contents after handling the nested expression
                evaluated_part = self.list_sheets_contents[sheetNum][location].contents
                # Append the evaluated part to the list of evaluated parts
                evaluated_parts.append(evaluated_part)

            # Concatenate the evaluated parts
            contents = ''.join(evaluated_parts)



        # if the cell is in bounds, first check for parse errors
        evaluator = FormulaEvaluatorChecker(self)
        try: 
            evaluator.visit(lark.Lark.open('sheets/formulas.lark', start='formula').parse(contents))
        except Exception as error: 
            self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.PARSE_ERROR, str(error)))
            return
        
    
        parser = lark.Lark.open('sheets/formulas.lark', start='formula')
        tree = parser.parse(contents)
        v = CellRefFinder()
        v.visit(tree)
        cellsRefrenced = v.refs
        
        # first we set the new cells outcoming and incoming neighbors            
        for cell in cellsRefrenced:
            cell = cell.upper()
            if '!' in cell: # if the cell has an exclamation mark (!), has a sheetname
                pattern = r'^(.*?)!(.*)'  # Updated regex pattern to match text before and after '!'
                match = re.match(pattern, cell)
                if match:
                    currSheet = match.group(1)
                    #print("currSheet: ", currSheet)
                    cell = match.group(2)
                    #get the number sheet of the sheet name
                    currSheet_int = None
                    for i in range(len(self.list_of_sheets)):
                        if self.list_of_sheets[i].lower() == currSheet.lower():
                            currSheet_int = i
                            break
                    if(type(currSheet) != int):
                        #if sheet is not in list set error to badRefrence
                        self.list_sheets_contents[sheetNum][location].set_cellError(CellError(CellErrorType.BAD_REFERENCE, "Sheet not found"))
                    else:
                        #add error catch here if the cell being referenced is not found bc its in another sheet not ready yet
                        self.increase_sheet_size(currSheet, cell)
                        self.list_sheets_contents[sheetNum][location].add_incoming_neighbor(self.list_sheets_contents[currSheet_int][cell])
                        self.list_sheets_contents[currSheet_int][cell].add_outgoing_neighbor(self.list_sheets_contents[sheetNum][location])
            else:
                self.increase_sheet_size(sheet_name, cell)
                self.list_sheets_contents[sheetNum][location].add_incoming_neighbor(self.list_sheets_contents[sheetNum][cell])
                self.list_sheets_contents[sheetNum][cell].add_outgoing_neighbor(self.list_sheets_contents[sheetNum][location])

        
        self.list_sheets_contents[sheetNum][location].set_cell_contents(contents)
        # then we check for cycles in topological sort
        topoSort = TopologicalSort()
        self.globalStack = topoSort.topological_sort(self.list_sheets_contents[sheetNum])

PS: If someone could help me with the formula evaluation function, I would greatly appreciate that.

This is how I handle my workbook

global_stack = [] # if this becomes a memory problem, switch get_cell_value to traverse all neighbors to get the order we want to evaluate cells
    list_of_sheets: List[str] #list of sheet names
    list_sheets_contents: List[dict] #list of the graphs of sheet contents
    list_sheets_size: List[list] #list of the sizes of the sheets
    globalIteration = 0 #global iteration counter for cycle detection
    currentSheet = ""

    def __init__(self):
        self.list_of_sheets = []
        self.list_sheets_contents = []
        self.list_sheets_size = []
        self.globalIteration = 0
        self.currentSheet = ""

These are the tests I am trying :

# Make a new empty workbook
wb = sheets.Workbook()
(index, name) = wb.new_sheet()

# Should print:  New spreadsheet "Sheet1" at index 0
print(f'New spreadsheet "{name}" at index {index}')

wb.set_cell_contents(name, 'a1', '12')
wb.set_cell_contents(name, 'b1', '34')
wb.set_cell_contents(name, 'c1', '=a1+b1')

...

# test.set_cell_contents("Sheet2", "B3", "=A5 & b2")
# print("B3 = A5 & b2 (concat cells test!)")
# test.set_cell_contents("Sheet2", "B4", "=taco & cat")
# print("B4 = taco & cat (concat strs test!)")

These are the results I am getting:

B3 value (should be helloworld): ERROR[CellErrorType.PARSE_ERROR, "Unexpected end-of-input. Expected one of: 
        * BANG
"]
B4 value (should be tacocat): ERROR[CellErrorType.PARSE_ERROR, "Unexpected end-of-input. Expected one of: 
        * BANG
"]
A15 value (should be #REF!): ERROR[CellErrorType.BAD_REFERENCE, "Sheet not found"]

I also want to work with saving and loading my workbook to and from JSON files This is my code

def load_workbook(fp: TextIO) -> 'Workbook':
        """Load a workbook from a text file or file-like object in JSON format."""
        try:
            data = json.load(fp)
        except json.JSONDecodeError as e:
            raise json.JSONDecodeError("Invalid JSON format") from e

        workbook = Workbook()

        try:
            for sheet in data['sheets']:
                if not isinstance(sheet, dict):
                    raise TypeError("Sheet data should be a dictionary")
                if 'name' not in sheet or 'cell-contents' not in sheet:
                    raise KeyError("Sheet data should contain 'name' and 'cell-contents' keys")
                if not isinstance(sheet['name'], str) or not isinstance(sheet['cell-contents'], dict):
                    raise TypeError("'name' should be a string and 'cell-contents' should be a dictionary")

                workbook.list_of_sheets.append(sheet['name'])
                cell_contents = {k.upper(): v for k, v in sheet['cell-contents'].items()}  # Convert cell names to uppercase
                workbook.list_sheets_contents.append(cell_contents)
                workbook.list_sheets_size.append([len(cell_contents), len(cell_contents[0]) if cell_contents else 0])
        except KeyError as e:
            raise KeyError("Missing key in JSON data") from e
        except TypeError as e:
            raise TypeError("Invalid type in JSON data") from e

        return workbook

    def save_workbook(self, fp: TextIO) -> None:
        """Save the workbook to a text file or file-like object in JSON format."""
        data = {
            'sheets': [
                {
                    'name': name,
                    'cell-contents': {k.upper(): str(v) for k, v in contents.items()}  # Convert cell names to uppercase and values to strings
                }
                for name, contents in zip(self.list_of_sheets, self.list_sheets_contents)
            ]
        }

But my tests are not properly acknowledging my code Off the bat, am I doing something wrong

0

There are 0 best solutions below