I don't know why it exceed the time limit in leetcode

156 Views Asked by At

The problem is https://leetcode.com/problems/wildcard-matching/

I wrote the code with recursion and optimize with memoization. but it occurs 'time limit exceeded'. why does this??

int memo[2000][2000];

int check(char* s, char* p, int sIdx, int pIdx){
    for (;s[sIdx]!=0&&p[pIdx]!=0;sIdx++,pIdx++){
        if (s[sIdx]!=p[pIdx]&&p[pIdx]!='?'){
            if (p[pIdx]=='*'){
                while (p[++pIdx]=='*') {}
                if (p[pIdx]==0){
                    return 1;
                }
                for (;s[sIdx]!=0;sIdx++){
                    if (s[sIdx]==p[pIdx]||p[pIdx]=='?'){ 
                        if (memo[sIdx+1][pIdx+1]==1) return memo[sIdx+1][pIdx+1];
                        if (memo[sIdx+1][pIdx+1]==-1){
                            if (memo[sIdx+1][pIdx+1] = check(s, p, sIdx+1, pIdx+1)) return 1;
                        }
                    } 
                }
                return 0;
            }
            else{ 
                return 0;
            }
        }
    }

    if (s[sIdx]==0&&p[pIdx]==0) {
        return 1;
    }
    else if (s[sIdx]==0) {
        while (p[pIdx]!=0) {
            if (p[pIdx++]!='*'){
                return 0;
            }
        }
        return 1;
    } 
    else{
        return 0;
    }
}

bool isMatch(char* s, char* p) {
    for(int i = 0; i < 2000; i++) {
        for(int j = 0; j < 2000; j++) memo[i][j] = -1;
    }

    return check(s,p,0,0);
}

Even before optimization with memoization, it passes about 1,000 testcases, but after, rather it passes about 600 testcases. I don't know what happens.

2

There are 2 best solutions below

0
Unmitigated On

Your code is overcomplicated for this problem. At each step, you only need to attempt to match one character instead of iterating to check for all possible matches.

Let us break this down into cases for a dynamic programming solution with O(|s||p|) time complexity (where |s| indicates the length of the string s).

Trivial cases:

If there are no characters left in the pattern, then it can only match if the string to match against is itself empty.

If there are no more characters left in the string, then it can only match if the rest of the pattern consists of only asterisks. We can rewrite this as the current character of the pattern being an asterisk and the rest of the pattern also matching.

Other cases:

If the first character of the string and pattern match or the pattern starts with ?, then we just match the rest of the string against the rest of the pattern.

If the first character of the pattern is *, it can either match nothing or the first character of the string, with the possibility of matching more.

Otherwise, there is no match.

int memo[2000][2000];

int check(char* s, char* p, int sIdx, int pIdx){
    if (!p[pIdx]) return !s[sIdx];
    if (!~memo[sIdx][pIdx]) {
        if (!s[sIdx]) memo[sIdx][pIdx] = p[pIdx] == '*' && check(s, p, sIdx, pIdx + 1);
        else if (s[sIdx] == p[pIdx] || p[pIdx] == '?')
            memo[sIdx][pIdx] = check(s, p, sIdx + 1, pIdx + 1);
        else if (p[pIdx] == '*')
            memo[sIdx][pIdx] = check(s, p, sIdx, pIdx + 1) || check(s, p, sIdx + 1, pIdx);
        else 
            memo[sIdx][pIdx] = 0;
    }
    return memo[sIdx][pIdx];
}

bool isMatch(char* s, char* p) {
    memset(memo, -1, sizeof memo);
    return check(s, p, 0, 0);
}
3
Luis Colorado On

Your function is very complicated, you are needing 4000x4000 integer array, which is a huge amount of memory. Despite of the fact that you build such an array (64Mb size) you are limiting probably your pattern searching to 4000 characters string size. Your code is not well formatted, which makes readability a nightmare. Anyway, I've detected a possible point of failure in line:

                            if (memo[sIdx+1][pIdx+1] = check(s, p, sIdx+1, pIdx+1)) return 1;

I cannot guess, but that is an assignment, not an equality check. I cannot say if that was the intended thing, you don't pay for each space you write on the source, so you can use them. Anyway, the recursive form of your function could be something like this (not requiring more than strlen(pattern) + strlen(string) recursive calls, maximum):

bool match(const char *patt, const char *s)
{   
    if (!*patt && !*s)
        return true; /* both ended, this is a match */
    if (*patt) { /* we still have pattern */
        switch (*patt) {
        case '?':
            return *s && match(patt+1, s+1);
        case '*':
            if (*s) {
                /* try to explore first on advancing
                 * the pattern. */
                return match(patt+1, s)
                    /* then try advancing the string pointer */
                    || match(patt, s+1);
                /* any of them result in a match */
            }
            /* else we cannot advance s */
            return match(patt+1, s);
        default:
            /* normal character match */
            return *patt == *s && match(patt+1, s+1);
        }
    }
    /* pattern is ended, but s isn't */
    return false;
} /* match */

A complete solution could be:

#include <getopt.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

bool match(const char *patt, const char *s)
{
    if (!*patt && !*s)
        return true;
    if (*patt) {
        switch (*patt) {
        case '?':
            return *s && match(patt+1, s+1);
        case '*':
            if (*s) {
                /* try to explore first on advancing
                 * the pattern. */
                return match(patt+1, s)
                    || match(patt, s+1);
            }
            return match(patt+1, s);
        default:
            return *patt == *s && match(patt+1, s+1);
        }
    }
    return false; /* pattern is ended, but s isn't */
} /* match */

int main(int argc, char **argv)
{
    char *matcher;
    static char line[1000];

    if (argc < 2) {
        fprintf(stderr, "Usage: %s expr\n", argv[0]);
        exit(EXIT_FAILURE);
    }
    while (fgets(line, sizeof line, stdin)) {
        char *s = line;
        s = strsep(&s, "\n");
        if (!s) {
            fprintf(stderr,
                    "line size exceeded (max = %zd)\n",
                    sizeof line - 1);
            exit(EXIT_FAILURE);
        }
        printf("match(\"%s\", \"%s\") -> %s\n",
            argv[1], s,
            match(argv[1], s)
                ? "true"
                : "false");
    } /* while */
} /* while */

which is tested here:

$ match "*?.*"
pepe.
match("*?.*", "pepe.") -> true
.pepe
match("*?.*", ".pepe") -> false
...
match("*?.*", "...") -> true
.
match("*?.*", ".") -> false
..
match("*?.*", "..") -> true
.p
match("*?.*", ".p") -> false
p.
match("*?.*", "p.") -> true
^D
$ _

Note:

Time limit exceeded comes from a test case that makes recursive solution impractical, as the tested string is "bbabba*bb***bba". In this case as the string doesn't match, all the possibilities have to be exhausted (the pattern doesn't end in bba, while the pattern requires it, but at each * wilcard, the number of function calls duplicate) we can reduce the number of function calls by merging the sequences of * wildcards into one, which will transform the above search from 10 recursion levels to 6 (this is a gain of 16 to 1 in number of cases) but anyway the solution is not practical for many occurrences of *.

A better solution would be to create a finite state automaton, based on the input pattern, that allows you to have a constant time per input character. And no recursion at all. The solution of merging several * characters into a single one works very well. But you can solve the problem with this hint :)

$ time match "b*b*ab**ba*b**b***bba" <<EOF
bbbbbbbabbaabbabbbbaaabbabbabaaabbababbbabbbabaaabaab
EOF
match("b*b*ab**ba*b**b***bba", "bbbbbbbabbaabbabbbbaaabbabbabaaabbababbbabbbabaaabaab") -> false

real    0m4,767s
user    0m4,766s
sys     0m0,000s
$ time match "b*b*ab*ba*b*b*bba" <<EOF
bbbbbbbabbaabbabbbbaaabbabbabaaabbababbbabbbabaaabaab
EOF
match("b*b*ab*ba*b*b*bba", "bbbbbbbabbaabbabbbbaaabbabbabaaabbababbbabbbabaaabaab") -> false

real    0m0,009s
user    0m0,008s
sys     0m0,000s

Well I can tell you, after the optimization stated above, that the recursive approach is not viable, as test 1710 / 1811 was:

s =
"abbabaaabbabbaababbabbbbbabbbabbbabaaaaababababbbabababaabbababaabbbbbbaaaabababbbaabbbbaabbbbababababbaabbaababaabbbababababbbbaaabbbbbabaaaabbababbbbaababaabbababbbbbababbbabaaaaaaaabbbbbaabaaababaaaabb"
p =
"**aa*****ba*a*bb**aa*ab****a*aaaaaa***a*aaaa**bbabb*b*b**aaaaaaaaa*a********ba*bbb***a*ba*bb*bb**a*b*bb" 

which involves 2^24 recursive calls for a negative answer (this is over 16,000,000 recursive calls), and I interrupted it after 4min execution time.

Try a non-recursive approach!!!

Try this (for example):

#include <assert.h>
#include <getopt.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define F(_fmt) "%s:%d:%s: "_fmt, __FILE__, __LINE__, __func__

#define ERR(_efmt, ...) do {              \
        fprintf(stderr, F(_efmt), \
            ##__VA_ARGS__);   \
        exit(EXIT_FAILURE);       \
    } while (0)

#define N 4096

#define FLAG_ACTIVE  (1 << 8)
#define FLAG_IS_AST  (1 << 9)

struct nfa_state {
    char next;
    bool clos:1;
    bool actv:1;
};

static void
pr_al(
        struct nfa_state st[],
        size_t           st_sz,
        const char      *fmt,
        ...)
{
    if (fmt) {
        va_list args;
        va_start(args, fmt);
        vprintf(fmt, args);
        va_end(args);
    }
    int i;
    for (i = 0; i < st_sz; i++) {
        printf("%c%c",
               st[i].clos ? '*' : ' ',
               st[i].next ? st[i].next : '$');
    }
    printf("\n");
    char *sep = "";
    for (i = 0; i < st_sz; i++) {
        printf("%s%c", sep, st[i].actv ? '1' : '0');
        sep = " ";
    }
    printf("\n");
} /* pr_al */

static size_t
comp(
        const char       *patt,
        struct nfa_state *tab,
        size_t            tab_sz)
{
    size_t sz = 0;
    const char *p = patt;
    while (*p) {
        if (sz == tab_sz) {
            ERR("table overflow (%zu entries)\n",
                sz);
        }
        tab[sz].actv = false;
        tab[sz].clos = *p == '*';
        if (tab[sz].clos) {
            while (*p == '*') p++;
        }
        tab[sz].next = *p;
        sz++;
        if (*p) p++;
    }
    if (sz == 0 || tab[sz - 1].next) {
        tab[sz].actv = false;
        tab[sz].clos = false;
        tab[sz].next = '\0';
        sz++;
    }
    pr_al(
        tab, sz,
        F("comp: status after compi"
          "lation (size = %zu)\n"),
        sz);
    return sz;
} /* comp */

static bool
match(
        const char       *s,
        struct nfa_state *tab,
        size_t            tab_sz)
{

    tab[0].actv = true;
    for (int i = 1; i < tab_sz; i++)
        tab[i].actv = 0;

    pr_al(tab, tab_sz, F("Estado inicial\n"));

    /* process... */
    char in;

    while ((in = *s++) != 0) {
        printf(F("Processing <0x%02x> == '%c'\n"),
                in, in);

        if (tab_sz == 1) {
            tab[0].actv = tab[0].clos;
        } else { 
            for (int new = tab_sz - 1, old = new - 1;
                     new >= 0;
                     new--, old--)
            {
                if (!tab[new].clos || !tab[new].actv) {
                    tab[new].actv = tab[old].actv &&
                        (tab[old].next == in ||
                         tab[old].next == '?');
                }
                /* delete old (if not closure) */
                if (!tab[old].clos) {
                    tab[old].actv = false;
                }
            } /* for */
        }
        pr_al(tab, tab_sz, NULL);
    } /* while */
    return tab[tab_sz - 1].actv;
} /* isMatch */


int main(int argc, char **argv)
{
    char *matcher;
    static char line[10000];
    static struct nfa_state tab[N];

    if (argc < 2) {
        fprintf(stderr, "Usage: %s expr\n", argv[0]);
        exit(EXIT_FAILURE);
    }

    size_t tab_sz = comp(argv[1], tab, N);

    while (fgets(line, sizeof line, stdin)) {
        char *s = line;
        s = strsep(&s, "\n");
        if (!s) {
            fprintf(stderr,
                    "line size exceeded (max = %zd)\n",
                    sizeof line - 1);
            exit(EXIT_FAILURE);
        }
        printf("isMatch(\"%s\", \"%s\") -> %s\n",
            s, argv[1],
            match(s, tab, tab_sz)
                ? "true"
                : "false");
    } /* while */
} /* while */