Check if a string is a valid markdown table row with C

64 Views Asked by At

As a part of a program, this function receives a pointer to a string and check if it's a valid row of a markdown table, of which data can be extracted.

If it recognizes the table-header, the .md-syntax division between header and table body or a empty row, it may return false, else true. example:

| Lebensmittel/Gericht | Gewicht in gr | kcal |  ->false
| -------------------- | ------------- | ---- |  ->false
| Wildkräutersalat    | 42            | 8    |   ->true
| Rucola               | 40            | 10   |  ->true
|                      |               |      |  ->false
| Brot                 | 73            | 161  |  ->true

this is my approach so far:

bool check_line(char* line) {
    
    puts(line); // console output for debugging

    if(strcmp(line,"| Lebensmittel/Gericht | Gewicht in gr | kcal |") != 0) {   // check for specific header
        return false;
    }

    if(line[0] == '|'   //check for underline under header
    && line[1] == ' '
    && line[2] == '-'
    && line[3] == '-'
    && line[4] == '-'
    ) {
        return false;   
    }

                        // check for empty table rows
    char* start;
    char* end;
    if(start = strstr(line,'|')){
        start++;
        if(end = strstr(start,'|')){
            for(size_t i; line[i] != '|'; i++ ){ //Iterate between the two '|' if the string is blank-space
                if( line[i] != ' ');
                return true;      //return true if there are chars between two '|'
            }
            return false;   //return false if the string between the two '|' contains ony blank-space
        }
    }

    return false;
}

I have used this approach to check for an empty row between the '|' chars. But it is not working correctly.

2

There are 2 best solutions below

0
NotFabi On

By changing the code to split the char* with the delimeter "|", you then can check if the second and third elements of the row only consist of numbers and if you only have 3 elements. If so, the line is valid:

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>

static char ** strsplit(const char *s, const char *delim) {
    void *data;
    char *_s = (char *)s;
    const char **ptrs;
    size_t ptrsSize, nbWords = 1, sLen = strlen(s), delimLen = strlen(delim);

    while ((_s = strstr(_s, delim))) {
        _s += delimLen;
        ++nbWords;
    }
    ptrsSize = (nbWords + 1) * sizeof(char *);
    ptrs = data = malloc(ptrsSize + sLen + 1);
    if (data) {
        *ptrs = _s = strcpy(((char *)data) + ptrsSize, s);
        if (nbWords > 1) {
            while ((_s = strstr(_s, delim))) {
                *_s = '\0';
                _s += delimLen;
                *++ptrs = _s;
            }
        }
        *++ptrs = NULL;
    }
    return data;
}

static char *trim_left(char *str) {
    int len = strlen(str);
    char *cur = str;

    while (*cur && isspace(*cur)) {
        ++cur;
        --len;
    }

    if (str != cur) memmove(str, cur, len + 1);

    return str;
}

static char *trim_right(char *str) {
    int len = strlen(str);
    char *cur = str + len - 1;

    while (cur != str && isspace(*cur)) --cur;
    cur[isspace(*cur) ? 0 : 1] = '\0';

    return str;
}

static char *trim(char *str) {
    trim_right(trim_left(str));
    return str;
}

static bool is_valid_number(const char *str) {
    bool is_valid = true;

    for (int i = 0; str[i + 1] != '\0' && is_valid; i++) {
        is_valid = '0' <= str[i] && str[i] <= '9';
    }

    return is_valid;
}

bool check_line(const char *line) {
    const char *delim = "|";
    char **parts = strsplit(line, delim);
    int element_count = 0;
    bool is_valid = true;

    for (int i = 1; parts[i + 1] != NULL && is_valid; i++) {
        char *trimmed_part = trim(parts[i]);

        if (i == 1) {
            is_valid = !is_valid_number(parts[i]);
        } else if (i == 2 || i == 3) {
            is_valid = is_valid_number(parts[i]);
        }
        
        element_count++;
    }

    return is_valid && element_count == 3;
}

int main() {
    const char *data[] = {
        "| Lebensmittel/Gericht | Gewicht in gr | kcal |", // false
        "| -------------------- | ------------- | ---- |", // false
        "| Wildkräutersalat    | 42            | 8    |", // true
        "| Rucola               | 40            | 10   |", // true
        "|                      |               |      |", // false
        "| Brot                 | 73            | 161  |"  // true
    };

    int size = sizeof(data) / sizeof(data[0]);

    for (int i = 0; i < size; i++) {
        bool is_valid_line = check_line(data[i]);
        printf("%s -> %s\n", data[i], is_valid_line ? "True" : "False");
    }

    return 0;
}
0
xing On

sscanf could be used. sscanf returns the number of items successfully scanned. If a string and two integers are scanned, the line is TRUE.

The scanset %24[^|] will scan up to 24 characters that are not a | so trailing whitespace may need to be removed.

The data is extracted into name, gr and kcal.

#include <stdio.h>
#include <string.h>

int main ( void) {
    char lines[6][50] = {
        "| Lebensmittel/Gericht | Gewicht in gr | kcal |"
        ,"| -------------------- | ------------- | ---- |"
        ,"| Wildkräutersalat    | 42            | 8    |"
        ,"| Rucola               | 40            | 10   |"
        ,"|                      |               |      |"
        ,"| Brot                 | 73            | 161  |"};
    char name[25] = "";
    char extra[25] = "";
    int gr = 0;
    int kcal = 0;

    for ( int each = 0; each < 6; ++each) {
        if ( 3 == sscanf ( lines[each], " | %24[^|]|%d |%d |%24s", name, &gr, &kcal, extra)) {
            printf ( "TRUE\t\t\t%s\n", lines[each]);
        }
        else {
            printf ( "\tFALSE\t\t%s\n", lines[each]);
        }
    }
}