Remove Blank Lines While Printing An Output

130 Views Asked by At

I have this Java lexical analyzer algorithm that prints out each assigned token to every symbol. The output should print out on each and every individual line without any space in between. But mine does have spaces. I have tried to use trim() method, even replaceAll() method, nothing seems to work. Here is my Java code:

import org.w3c.dom.ls.LSOutput;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Lexer {

public static void Tokenize(String fileName) {
    BufferedReader reader = null;
    try {
        reader = new BufferedReader(new FileReader(fileName));
        String line = null;
        while ((line = reader.readLine()) != null) {
            line = removeComments(line);
            String[] tokens = tokenizeLine(line);
            for (String token : tokens) {
                System.out.println(token);
            }
        }
    } catch (IOException e) {
        System.err.println("Error reading file: " + e.getMessage());
    } finally {
        try {
            if (reader != null) {
                reader.close();
            }
        } catch (IOException e) {
            System.err.println("Error closing file: " + e.getMessage());
        }
    }
    System.out.println("SYNTAX ERROR: INVALID IDENTIFIER NAME");

}

private static String removeComments(String line) {
    // Remove inline comments
    line = line.replaceAll("//.*", "");
    // Remove block comments
    Pattern pattern = Pattern.compile("/\\*.*?\\*/", Pattern.DOTALL);
    Matcher matcher = pattern.matcher(line);
    return matcher.replaceAll("");
}

private static String[] tokenizeLine(String line) {

    String[] tokens = line.split("\\s+ |(?=[\\[\\](){}<>=,;+-/*%|&!])|(?<=[\\[\\](){}<>=,;+-/*%|&!])");
    for (int i = 0; i < tokens.length; i++) {
        String token = tokens[i].replaceAll("\\s+", "");
        if (token.matches("procedure")) {
            tokens[i] = "PROC";
        } else if (token.matches("int")) {
            tokens[i] = "INT";
        } else if (token.matches("[0-9]+")) {
            // Integer constant
            tokens[i] = "INT_CONST";
        } else if (token.matches("[(]")) {
            tokens[i] = "LP";
        } else if (token.matches("[)]")) {
            tokens[i] = "RP";
        } else if (token.matches("\".*\"")) {
            // String constant
            tokens[i] = "STR_CONST";
        } else if (token.matches("String") || token.matches("string")) {
            // String keyword
            tokens[i] = "STR";
        } else if (token.matches("if")) {
            tokens[i] = "IF";
        } else if (token.matches("for")) {
            tokens[i] = "FOR";
        } else if (token.matches("while")) {
            tokens[i] = "WHILE";
        } else if (token.matches("return")) {
            tokens[i] = "RETURN";
        } else if (token.matches("[;]")) {
            tokens[i] = "SEMI";
        } else if (token.matches("do")) {
            tokens[i] = "DO";
        } else if (token.matches("break")) {
            tokens[i] = "BREAK";
        } else if (token.matches("end")) {
            tokens[i] = "END";
        } else if (token.matches("[a-zA-Z][a-zA-Z0-9]*")) {
            // Identifier
            tokens[i] = "IDENT";
        } else if (token.matches("[=]")) {
            tokens[i] = "ASSIGN";
        } else if (token.matches("[<]")) {
            tokens[i] = "LT";
        } else if (token.matches("[>]")) {
            tokens[i] = "RT";
        } else if (token.matches("[++]")) {
            tokens[i] = "INC";
        } else if (token.matches("[{]")) {
            tokens[i] = "RB";
        } else if (token.matches("[}]")) {
            tokens[i] = "LB";
        } else if (token.matches("[*]")) {
            tokens[i] = "MUL_OP";
        } else if (token.matches("[/]")) {
            tokens[i] = "DIV_OP";
        } else if (token.matches("[>=]")) {
            tokens[i] = "GE";
        }
    }
    return tokens;
}
}

and here is my output.

LP
IDENT
RP
 

FOR
LP
IDENT
ASSIGN
INT_CONST
SEMI
IDENT
LT
IDENT
SEMI
IDENT
ASSIGN
IDENT
INC
INC
RP
 
RB
IDENT
ASSIGN
IDENT
MUL_OP
 
LP
IDENT
DIV_OP
INT_CONST
RP
SEMI

IF
LP
IDENT
RT
ASSIGN
INT_CONST
RP
BREAK
SEMI
        
LB

IDENT
SEMI

IDENT


IDENT
ASSIGN
STR_CONST
SEMI
SYNTAX ERROR: INVALID IDENTIFIER NAME``
1

There are 1 best solutions below

0
Eritrean On

Either remove the empty strings in your tokenizeLine from the returned array before returning, for example by changing the return statement from:

return tokens;

to

return Arrays.stream(tokens).filter(Predicate.not(String::isBlank)).toArray(String[]::new);

or just print the non empty strings in your loop by checking before printing

for (String token : tokens) {
    if(!token.isBlank()) {
        System.out.println(token);
    }
}