Problem in printing the files which are different in 2 folders

47 Views Asked by At
import os

def compare_files(file1, file2):
    # Function to compare files
    pass

def compare_folders(folder1, folder2):
    differences_found = False
    
    # Compare files in folder1
    for root, dirs, files in os.walk(folder1):
        for file in files:
            file1 = os.path.join(root, file)
            file2 = os.path.join(root.replace(folder1, folder2, 1), file)
            if not os.path.exists(file2):
                print(f"File {file} exists in {folder1} but not in {folder2}")
                differences_found = True
            elif os.path.isfile(file2):
                # If the file exists in both folders, compare them
                compare_files(file1, file2)
    
    # Compare files in folder2
    for root, dirs, files in os.walk(folder2):
        for file in files:
            file1 = os.path.join(root.replace(folder2, folder1, 1), file)
            file2 = os.path.join(root, file)
            if not os.path.exists(file1):
                print(f"File {file} exists in {folder2} but not in {folder1}")
                differences_found = True

    if not differences_found:
        print("No differences found between the files in the two folders.")

if __name__ == "__main__":
    folder1 = input("Enter path to the first folder: ")
    folder2 = input("Enter path to the second folder: ")
    
    compare_folders(folder1, folder2)

I am trying to compare the files in both folders and print out the contents which are different from each other. As well the file name which are different in both folders

1

There are 1 best solutions below

9
Booboo On BEST ANSWER

There are several issues with the code:

  1. compare_files is not currently doing any comparison. It should compare the two input files first for size and only then check whether the contents are the same. It should return True or False according to whether the files compare equal and that return value should be used to set differences_found.
  2. If you find a difference between two files or if the file exists in one directory but not the other, the message you produce does not display the full paths to the files; you are only outputting the root folder names.
  3. You have:
def compare_folders(folder1, folder2):
    differences_found = False
    
    # Compare files in folder1
    for root, dirs, files in os.walk(folder1):
        for file in files:
            file1 = os.path.join(root, file)
            file2 = os.path.join(root.replace(folder1, folder2, 1), file)
            if not os.path.exists(file2):
                print(f"File {file} exists in {folder1} but not in {folder2}")
                differences_found = True
            elif os.path.isfile(file2):
                # If the file exists in both folders, compare them
                compare_files(file1, file2)
...

I believe this should be:

def compare_folders(folder1, folder2):
    differences_found = False

    # Compare files in folder1
    for root1, dirs, files in os.walk(folder1):
        for file in files:
            file1 = os.path.join(root1, file)
            root2 = root1.replace(folder1, folder2, 1)
            file2 = os.path.join(root2, file)
            if not os.path.isfile(file2):
                print(f"File {file} exists in {root1} but not in {root2}")
                differences_found = True
            else:
                # If the file exists in both folders, compare them
                if not compare_files(file1, file2):
                    print(f"Files {file} differ in {root1} and {root2}")
                    differences_found = True
...

This not only prints out the full paths to the files starting from the initial folders but also corrects a small bug: What if we have file folder1/a/b/test but folder2/a/b/test exists but it is a directory instead of a file. Your current would not show that file test exists in one directory but not the other and will not have set the differences_found flag.

I have provided an implementation of compare_files in the complete code below:

import os
import mmap

def compare_files(file1: str, file2: str) -> bool:
    """Function to compare files."""

    with open(file1, 'rb') as f1, open(file2, 'rb') as f2:
        size1 = f1.seek(0, 2)
        size2 = f2.seek(0, 2)
        if size1 != size2:
            return False
        if size1 == 0:
            return True

        with mmap.mmap(f1.fileno(), 0, access=mmap.ACCESS_READ) as mm1, \
        mmap.mmap(f2.fileno(), 0, access=mmap.ACCESS_READ) as mm2:
            return mm1[0:] == mm2[0:]

def compare_folders(folder1: str, folder2: str) -> None:
    differences_found = False

    # Compare files in folder1
    for root1, dirs, files in os.walk(folder1):
        for file in files:
            file1 = os.path.join(root1, file)
            root2 = root1.replace(folder1, folder2, 1)
            file2 = os.path.join(root2, file)
            if not os.path.isfile(file2):
                print(f"File {file} exists in {root1} but not in {root2}")
                differences_found = True
            else:
                # If the file exists in both folders, compare them
                if not compare_files(file1, file2):
                    print(f"Files {file} differ in {root1} and {root2}")
                    differences_found = True

    # Compare files in folder2
    for root2, dirs, files in os.walk(folder2):
        for file in files:
            root1 = root2.replace(folder2, folder1, 1)
            file1 = os.path.join(root1, file)
            file2 = os.path.join(root2, file)
            if not os.path.isfile(file1):
                print(f"File {file} exists in {root2} but not in {root1}")
                differences_found = True

    if not differences_found:
        print("No differences found between the files in the two folders.")

if __name__ == "__main__":
    folder1 = input("Enter path to the first folder: ")
    folder2 = input("Enter path to the second folder: ")

    compare_folders(folder1, folder2)

The above code works for me, so if you are still having difficulties, you need to explicitly show minimal directory structures that are not producing the results you expect along with what your expected result is and what you actually display.