How to split string on lexemes?

180 Views Asked by At

I have a string "(2+2)*3S1(2)+Sin(1+2/S1(2))" and list of operations like ["(", ",", ")","+","Sin"...] without S1.

Now I want to split this string on list of lexemes.

In case of S1 not in the available operations - exception or whatever In case of all correct - array of lexems.

What I've already done:

const string input = "(2+2)*3S1(2)+Sin(1+2/S1(2))";;
var lexemes = new List<string>();
for (var i = 0; i < input.Length; i++)
{
    var currentNumber = new StringBuilder();
    while(i < input.Length && (Int32.TryParse(input[i].ToString(), out _) || input[i]=='.'))
    {
        currentNumber.Append(input[i]);
        i++;
    }

    if (currentNumber.Length > 0)
    {
        lexemes.Add(currentNumber.ToString());
    }

    if (i >= input.Length)
    {
        break;
    }
    var currentOp = new StringBuilder();
    while(i < input.Length)
    {
        if (operations.Count(x => x.Name.StartsWith(currentOp.ToString() + input[i])) > 0)
        {
            currentOp.Append(input[i]);
            i++;
        }
        else
        {
             break;
        }
    }

    i--;
            
    var operation = operations.Single(x => x.Name == currentOp.ToString());
    lexemes.Add(operation.Name);
}

My problem is:

  1. I don't know how to detect wrong lexem
  2. I don't understand why this code wrong and skip everythin in Sin arguments
  3. Well.. This code smells not good enough...
1

There are 1 best solutions below

0
Denis Suleimanov On

Hm.. It's alive!

const string input = "(2+2)*3S(2)+Sin(1+2/S(2))";
        var lexemes = new List<string>();
        for (var i = 0; i < input.Length; i++)
        {
            var currentNumber = new StringBuilder();
            while(i < input.Length && (Int32.TryParse(input[i].ToString(), out _) || input[i]=='.'))
            {
                currentNumber.Append(input[i]);
                i++;
            }

            if (currentNumber.Length > 0)
            {
                lexemes.Add(currentNumber.ToString());
            }

            if (i >= input.Length)
            {
                break;
            }

            var currentOp = new StringBuilder();
            var currentSymbol = input[i];

            var availableOperations = operations.Where(x => x.Name.StartsWith(currentSymbol)).ToList();
            availableOperations.Sort(OperationComparator); //By name length
            var operationFound = false;
            foreach (var availableOperation in availableOperations)
            {
                var operationLength = availableOperation.Name.Length;
                var possibleSignature = input.Substring(i, operationLength);
                if (possibleSignature == availableOperation.Name && (availableOperation.Priority==4 && input[i+operationLength]=='(') || (availableOperation.Priority<4))
                {
                    currentOp.Append(possibleSignature);
                    i += operationLength-1;
                    operationFound = true;
                    break;
                }
            }

            if (!operationFound)
            {
                throw new Exception($"operation {currentSymbol} is null");
            }
            lexemes.Add(currentOp.ToString());
        }

And my list op operations smth like:

var operations = new List<Operation>
        {
            new Operation
            {
                Name = "+",
                Priority = 1,
                OperandsCount = 2,
                UnaryPossible = true
            },
            new Operation
            {
                Name = "-",
                Priority = 1,
                OperandsCount = 2,
                UnaryPossible = true
            },
            new Operation
            {
                Name = "*",
                Priority = 2,
                OperandsCount = 2,
                UnaryPossible = false
            },
            new Operation
            {
                Name = "/",
                Priority = 2,
                OperandsCount = 2,
                UnaryPossible = false
            },
            new Operation
            {
                Name = "Sin",
                Priority = 4,
                OperandsCount = 1,
                UnaryPossible = true
            },
            new Operation
            {
                Name = "S1",
                Priority = 4,
                OperandsCount = 1,
                UnaryPossible = true
            },
            new Operation
            {
                Name = "pow",
                Priority = 4,
                OperandsCount = 2,
                UnaryPossible = false
            },
            new Operation
            {
                Name = "^",
                Priority = 3,
                OperandsCount = 2,
                UnaryPossible = false
            },
            new Operation
            {
                Name = "(",
                Priority = 0,
                OperandsCount = 0,
                UnaryPossible = false
            },
            new Operation
            {
                Name = ")",
                Priority = 0,
                OperandsCount = 0,
                UnaryPossible = false
            },
            new Operation
            {
                Name = ",",
                Priority = 0,
                OperandsCount = 0,
                UnaryPossible = false
            },
            
        };

Yeah, 4 and '(' is hardcode and magicwtf, but this is better than previous version..