Find all files containing search term except sub elements

60 Views Asked by At

I'm trying to make a C program that will display all the files and folders containing a given search term. The search term is given as an argument when executing the program. A folder / file is displayed to standard output if its name contains the search term (case insensitive). The difficulty though is that I do not want to output files and subfolders that are contained in a folder that contains the search term. Here's an example:

Let's assume my search term is docker, this is the current output:

"/Users/me/.docker"
"/Users/me/.docker/contexts"
"/Users/me/.docker/contexts/meta"
"/Users/me/.docker/config.json"
"/Users/me/.docker/scan"
"/Users/me/.docker/scan/config.json"
"/Users/me/.docker/application-template"
"/Users/me/.docker/application-template/logs"
"/Users/me/.docker/application-template/logs/com.docker.log"
"/Users/me/.docker/daemon.json"
"/Users/me/.docker/run"
"/Users/me/Library/Application Support/Docker Desktop"
"/Users/me/Library/Application Support/Docker Desktop/blob_storage"
"/Users/me/Library/Application Support/Docker Desktop/blob_storage/6965e70b-e33a-4415-b9a8-e19996fe221d"

But this is the output I'm trying to achieve:

"/Users/me/.docker"
"/Users/me/Library/Application Support/Docker Desktop"

Here's my code so far:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <dirent.h>

static int display_info(const char *fpath, const char * fname, const char * term) {
    int what_len = strlen(term);
    int count = 0;

    char *where = fpath;

    if (what_len){
       while ((where = strcasestr(where, term))) {
                where += what_len;
                count++;
        }

        if (count == 1) {
            printf("\"%s/%s\"\n", fpath, fname);
        }

    }

    return 0;
}


static void listFilesRecursively(char * basePath, const char * searchTerm) {
    char path[1000];
    struct dirent * dp;
    DIR * dir = opendir(basePath);

    // Unable to open directory stream
    if (!dir)
        return;

    while ((dp = readdir(dir)) != NULL) {
        if (strcmp(dp -> d_name, ".") != 0 && strcmp(dp -> d_name, "..") != 0) {
//            printf("%s %hhu %s\n", basePath, dp->d_type, dp -> d_name);
            display_info(basePath, dp->d_name, searchTerm);

            // Construct new path from our base path
            strcpy(path, basePath);
            strcat(path, "/");
            strcat(path, dp -> d_name);

            listFilesRecursively(path, searchTerm);
        }
    }

    closedir(dir);
}


int main(int argc, const char * argv[]) {
    char * home = getenv("HOME");

    if (argc == 2) {
        listFilesRecursively(home, argv[1]);
    } else {
        printf("Please provide one argument");
    }
    return 0;
}

Any feedback is greatly appreciated thanks!

2

There are 2 best solutions below

4
On BEST ANSWER
  • I am not sure I understand the logic of display_info()
  • On listFilesRecursively() you can not reuse path from call to call
  • main() should the the first function in your code, maybe in a separate file

an alternative

I will add a C example, changing a bit of your listFilesRecursively()...

  • not using void() so you can return -1 for an error
  • testing for . and .. at the beginning of the loop and just using continue may lead into code easier to read
  • path is allocated locally and free() on return
  • strstr_ign() is a case insensitive version of strstr() for use in the pattern search

code for list_files() after change

int list_files(char* pattern, char* base_path)
{
    struct dirent* dp;
    DIR* dir = opendir(base_path);
    if (!dir) return -1; // Unable to open directory stream
    while ((dp = readdir(dir)) != NULL)
    {
        if (strcmp(dp->d_name, ".") == 0) continue;
        if (strcmp(dp->d_name, "..") == 0) continue;
        if ( strstr_ign((const char*)dp->d_name, pattern) != NULL )
        {
            display_info(base_path, dp->d_name);
        }
        else
        {
            char* path = (char*)malloc(1 + strlen(dp->d_name) + strlen(base_path) + 1);
            sprintf(path, "%s/%s", base_path, dp->d_name);
            list_files(pattern, path);
            free(path); // ok with path
        }
    };  // while()
    closedir(dir);
    return 0;
};  // list_files()

code for strstr_ign()

I hate the arguments order for strstr() but kept it here just to have things equal. This way one can use strstr_ign() as a drop-in replacement for strstr() without changing the order of the arguments. I believe needle should come first :) an in the language: search for a needle in a haystack is far more common than search the haystack for a needle but Ken and Dennis had their reasons to write strstr() the way they did it...

//
// strstr() ignoring case
//
char*       strstr_ign(const char* haystack, const char* needle)
{
    if (needle == NULL) return NULL;
    if (haystack == NULL) return NULL;
    if (*needle == 0)
    {
        if (*haystack == 0)
            return (char*) haystack;
        else
            return NULL;
    }
    int limit = strlen(haystack) - strlen(needle);
    for (int x = 0; x <= limit; x += 1)
    {   // search for needle at position 'x' of 'haystack'
        int here = 1;
        for (unsigned y = 0; y < strlen(needle); y += 1)
        {
            if ( tolower(haystack[x + y]) != tolower(needle[y]) )
            {
                here = 0; break;
            };
        };
        if ( here == 1) return (char*)(haystack + x);
    }
    return NULL;
};

a new display_info()

changed to show last access for folders and file size for regular files that match the search pattern (case insensitive). Below is an example of the output for files and folders. Note the '-' and the 'd' as in the ls -l output.

    - "./hms.c" [size: 1546]
    d "./sub/1/xyzHMSxyz"   [last access: Sat Apr 24 12:38:04 2021]
int display_info(const char* base, const char* file)
{
    struct  stat Stats;
    char*   path = (char*)malloc(1 + strlen(base) + strlen(file) + 1);
    char    atime[40];
    sprintf(path, "%s/%s", base, file);
    if ( stat(path, &Stats) < 0)
    {
        perror("Inside display_info()");
        free(path);
        return -1;
    }
    if ( S_ISDIR(Stats.st_mode) )
    {
        strftime( atime, sizeof(atime), "%a %b %d %H:%M:%S %Y", localtime(&Stats.st_atime) );
        printf("\td \"%s/%s\"\t[last access: %s]\n", base, file, atime );
    }
    else
    {
        if ( S_ISREG(Stats.st_mode) )
            printf("\t- \"%s/%s\"\t[size: %ld]\n", base, file, Stats.st_size );
        else
            printf("is somthing else\n");
    }
    free(path);
    return 0;
}

sample output

Search pattern is "hms" (case is ignored) 
    - "./hms"   [size: 16848]
    - "./hms-soma.c"    [size: 1379]
    - "./hms.c" [size: 1546]
    d "./sub/1/xyzHMSxyz"   [last access: Sat Apr 24 12:38:04 2021]
    d "./sub/2/xyzHMS"  [last access: Sat Apr 24 12:21:11 2021]
    d "./sub/hMs"   [last access: Sat Apr 24 12:21:11 2021]

C code for this test

miminally tested :)

#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <time.h>


int         display_info(const char*, const char*);
int         list_files(char*, char*);
char*       strstr_ign(const char*, const char*);

 int main(int argc, const char * argv[])
 {
     char    search_term[80];
     if (argc >= 2)
     {
         if ( strlen(argv[1]) > (sizeof(search_term)-1) )
         {
             printf("Size of substring (%zd) must not be greater than %zd\n",
             strlen(argv[1]), sizeof(search_term)-1 );
             return -1;  
         }
         for ( int i = 0; i<= strlen(argv[1]); search_term[i] = (char)(tolower(argv[1][i])), i+=1 ); 
         printf("Search pattern is \"%s\" (case is ignored) \n", search_term );
         list_files(search_term,".");
     } else {
         printf("Please provide pattern to search for.\n");
     }
     return 0;
 };  // main()


int display_info(const char* base, const char* file)
{
    struct  stat Stats;
    char*   path = (char*)malloc(1 + strlen(base) + strlen(file) + 1);
    char    atime[40];
    sprintf(path, "%s/%s", base, file);
    if ( stat(path, &Stats) < 0)
    {
        perror("Inside display_info()");
        free(path);
        return -1;
    }
    if ( S_ISDIR(Stats.st_mode) )
    {
        strftime( atime, sizeof(atime), "%a %b %d %H:%M:%S %Y", localtime(&Stats.st_atime) );
        printf("\td \"%s/%s\"\t[last access: %s]\n", base, file, atime );
    }
    else
    {
        if ( S_ISREG(Stats.st_mode) )
            printf("\t- \"%s/%s\"\t[size: %ld]\n", base, file, Stats.st_size );
        else
            printf("is somthing else\n");
    }
    free(path);
    return 0;
}


int list_files(char* pattern, char* base_path)
{
    struct dirent* dp;
    DIR* dir = opendir(base_path);
    if (!dir) return -1; // Unable to open directory stream
    while ((dp = readdir(dir)) != NULL)
    {
        if (strcmp(dp->d_name, ".") == 0) continue;
        if (strcmp(dp->d_name, "..") == 0) continue;
        if ( strstr_ign((const char*)dp->d_name, pattern) != NULL )
        {
            display_info(base_path, dp->d_name);
        }
        else
        {
            char* path = (char*)malloc(1 + strlen(dp->d_name) + strlen(base_path) + 1);
            sprintf(path, "%s/%s", base_path, dp->d_name);
            list_files(pattern, path);
            free(path); // ok with path
        }
    };  // while()
    closedir(dir);
    return 0;
};  // list_files()

//
// strstr() ignoring case
//
char*       strstr_ign(const char* haystack, const char* needle)
{
    if (needle == NULL) return NULL;
    if (haystack == NULL) return NULL;
    if (*needle == 0)
    {
        if (*haystack == 0)
            return (char*) haystack;
        else
            return NULL;
    }
    int limit = strlen(haystack) - strlen(needle);
    for (int x = 0; x <= limit; x += 1)
    {   // search for needle at position 'x' of 'haystack'
        int here = 1;
        for (unsigned y = 0; y < strlen(needle); y += 1)
        {
            if ( tolower(haystack[x + y]) != tolower(needle[y]) )
            {
                here = 0; break;
            };
        };
        if ( here == 1) return (char*)(haystack + x);
    }
    return NULL;
};
2
On

Thanks to @KamilCuk I was able to solve my issue. Here's my final listFilesRecursively function:

static void listFilesRecursively(char * basePath, const char * searchTerm) {
    char path[1000];
    struct dirent * dp;
    DIR * dir = opendir(basePath);
    
    // Unable to open directory stream
    if (!dir)
        return;
    
    while ((dp = readdir(dir)) != NULL) {
        if (strcmp(dp -> d_name, ".") != 0 && strcmp(dp -> d_name, "..") != 0) {
            if (strcasestr(dp->d_name, searchTerm)) {
                printf("%s/%s\n",basePath,dp->d_name);
                
                listFilesRecursively(path, searchTerm);
            } else {
                // Construct new path from our base path
                strcpy(path, basePath);
                strcat(path, "/");
                strcat(path, dp -> d_name);
                
                listFilesRecursively(path, searchTerm);
            }
        }
    }
    
    closedir(dir);
}