The python script is using os to walk on a directory and it is assessing to see if any file is inside the folder for ten or more seconds, if there is a file that meets this condition, three things happen:
the event is logged with a timestamp into a log file
A file is created inside an "expired" folder, which is any file that meets the condition in the format: {log_timestamp}_{Path(file_path).stem}.txt which contains in its contents, the file path of
The file is recorded in the format (full-file-path, name-of-file-in-expired_Folder) inside a metadata.txt file
In the following iterations (every 60 seconds) after the first, a delete-files function runs where the directory is os.walk()-ed to see if there is a file that doesn't exist from the last iteration, if it doesn't, the corresponding file in the expired folder is deleted and the line in 'metadata.txt' that corresponds to the file that was deleted/moved out of the directory.
fileCheck.py:
import os
import time
import datetime
import logging
from pathlib import Path
import argparse
import shutil
def parse_arguments():
parser = argparse.ArgumentParser(description='directory tree & file logging')
parser.add_argument("--directory", "-d", required=True, type=str, default='.',
dest="dirToW", help="directory to watch, specify path")
parser.add_argument("--log-file", "-lf", required=True, type=str, default='.',
dest="lf", help="log file location with full path")
parser.add_argument("--interval", "-i", type=int, default=60, dest="intl",
help="interval for periodic checks (in seconds)")
parser.add_argument("--exclude-file", "-ef", type=str, dest="excl_file",
help="list of files to exclude from fileCheck")
parser.add_argument("--expired-folder", "-exp", type=str, required=True, dest="exp_folder",
help="folder to move expired files to")
parser.add_argument("--metadata-file", "-meta", type=str, required=True, dest="meta_file",
help="metadata file to help delete expired files")
return parser.parse_args()
def configure_logging(log_file_path):
logging.basicConfig(filename=log_file_path, level=logging.DEBUG,
format='%(asctime)s - %(message)s',
filemode='a+')
# Add a stream handler to display log messages to the console
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(message)s')
console_handler.setFormatter(formatter)
logging.getLogger().addHandler(console_handler)
def read_exclude_list(exclude_file):
exclude_list = set()
if exclude_file:
try:
with open(exclude_file, 'r') as file:
# Read full file paths and resolve them
exclude_list.update(Path(line.strip()).resolve() for line in file)
except Exception as e:
print(f"Error reading exclude file: {e}")
return exclude_list
def check_existing_files(checked_files, directory_to_watch, log_file_path, exclusion_list, exp_folder, meta_file):
current_time = datetime.datetime.now()
print("Before checking files:", checked_files)
directory_to_watch_path = Path(directory_to_watch).resolve()
# Set to store files in directory
current_files = set()
for root, subdirs, files in os.walk(directory_to_watch_path):
for filename in files:
file_path = os.path.join(root, filename)
print("Processing file:", file_path)
current_files.add(file_path)
try:
file_creation_time = datetime.datetime.fromtimestamp(os.path.getctime(file_path))
except FileNotFoundError:
continue
time_difference = current_time - file_creation_time
full_path = Path(file_path).resolve()
if (int(time_difference.total_seconds()) > 10 and full_path not in checked_files and full_path not in exclusion_list):
log_msg = f"File '{full_path}' has been in the directory for more than ten seconds."
logging.info(log_msg)
checked_files.add(full_path) # Add full path to checked_files
copy_to_expired_folder(full_path, exp_folder, meta_file)
log_created_file(full_path, log_file_path)
print("After checking files:", checked_files)
return checked_files
def log_created_file(file_path, lf):
current_time = datetime.datetime.now()
log_message = f"File {file_path} added at {current_time}"
logging.info(log_message)
def copy_to_expired_folder(file_path, exp_folder, meta_file):
current_time = datetime.datetime.now()
log_timestamp = current_time.strftime("%Y.%m.%d_%H.%M.%S.%f")
try:
# Copy the file to the expired files folder
expired_file_path = Path(exp_folder) / f"{log_timestamp}_{Path(file_path).stem}.txt"
with open(expired_file_path, 'w') as expired_file:
expired_file.write(str(file_path) + '\n')
# Write to metadata file
with open(meta_file, 'a') as f:
f.write(f"{Path(expired_file_path).name}, {file_path}\n")
except Exception as e:
print(f"Error copying file: {e}")
def delete_files(directory_to_watch, exp_folder, meta_file):
checked_files = set()
for root, subdirs, files in os.walk(directory_to_watch):
for filename in files:
file_path = os.path.join(root, filename)
checked_files.add(file_path)
expired_files = os.listdir(exp_folder)
for expired_file in expired_files:
expired_file_path = os.path.join(exp_folder, expired_file)
if os.path.isfile(expired_file_path):
# Read the content of the expired file
with open(expired_file_path, 'r') as f:
expired_content = f.read().strip()
# Check if the expired content (full path) is not in the checked_files (full path of dirToW files)
if expired_content not in checked_files:
os.remove(expired_file_path)
logging.info(f"Deleted expired file: {expired_file_path}")
with open(meta_file, 'r') as f:
lines = f.readlines()
with open(meta_file, 'w') as f:
for line in lines:
if expired_file in line:
continue
f.write(line)
def main():
args = parse_arguments()
# Configure logging, logs are appended to a file
configure_logging(args.lf)
# Initial check for existing files
checked_files = set()
exclude_list = read_exclude_list(args.excl_file)
checked_files = check_existing_files(checked_files, args.dirToW, args.lf, exclude_list, args.exp_folder, args.meta_file)
print(f"Log file path: {args.lf}")
try:
while True:
# Periodic check, specify in argument, default 60 seconds
time.sleep(args.intl)
exclude_list = read_exclude_list(args.excl_file)
checked_files = check_existing_files(checked_files, args.dirToW, args.lf, exclude_list, args.exp_folder, args.meta_file)
delete_files(args.dirToW, args.exp_folder, args.meta_file)
print("another loop")
except KeyboardInterrupt:
pass
if __name__ == "__main__":
main()
fileCheck_test.py:
import unittest
import os
import logging
import datetime
import tempfile
from pathlib import Path
from unittest.mock import patch
from fileCheck import *
class TestScript(unittest.TestCase):
@classmethod
def setUpClass(cls):
# Create a single temporary directory for all tests
cls.test_dir = tempfile.mkdtemp(dir='/home/semir-testing/floor/test-env/')
def setUp(self):
# Create mock directory within the single temporary directory
self.mock_dir = os.path.join(self.test_dir, "mock_Dir")
if not os.path.exists(self.mock_dir):
os.makedirs(self.mock_dir)
self.another_dir = os.path.join(self.mock_dir, "anotherDir")
if not os.path.exists(self.another_dir):
os.makedirs(self.another_dir)
# Create the expired folder
self.expired_folder = os.path.join(self.test_dir, "expired")
if not os.path.exists(self.expired_folder):
os.makedirs(self.expired_folder)
current_time = datetime.datetime.now()
creation_time = current_time - datetime.timedelta(seconds=20) # 20 seconds ago
with open(os.path.join(self.mock_dir, "file1"), "w") as f:
f.write("content1\n")
os.utime(f.name, (creation_time.timestamp(), creation_time.timestamp()))
with open(os.path.join(self.mock_dir, "file2"), "w") as f:
f.write("content2\n")
os.utime(f.name, (creation_time.timestamp(), creation_time.timestamp()))
with open(os.path.join(self.another_dir, "file3.txt"), "w") as f:
f.write("content3\n")
os.utime(f.name, (creation_time.timestamp(), creation_time.timestamp()))
# Create the exclude file
self.exclude_file = os.path.join(self.test_dir, "exclude.txt")
with open(self.exclude_file, "w") as f:
f.write(os.path.join(self.mock_dir, "file1\n"))
# Create the metadata file
self.meta_file = os.path.join(self.test_dir, "metadata.txt")
# Create the log file
self.log_file = os.path.join(self.test_dir, "test.log")
logging.basicConfig(filename=self.log_file, level=logging.DEBUG,
format='%(asctime)s - %(message)s', filemode='a+')
# Configure logging for the tests
configure_logging(self.log_file)
def test_read_exclude_list(self):
exclude_file = os.path.join(self.test_dir, 'exclude.txt')
with open(exclude_file, 'w') as f:
f.write('\n'.join([os.path.join(self.mock_dir, 'another_Dir', 'file3.txt')]))
exclude_list = read_exclude_list(exclude_file)
self.assertEqual(len(exclude_list), 1)
self.assertIn(Path(os.path.join(self.mock_dir, 'another_Dir', 'file3.txt')), exclude_list)
def test_check_existing_files(self):
# Mock the logging functions
with patch('fileCheck.logging') as mock_logging:
# Set the current time to 10 seconds ago
current_time = datetime.datetime.now() - datetime.timedelta(seconds=10)
# Set up exclude list
exclude_list = set()
# Call the function
checked_files = set()
checked_files = check_existing_files(checked_files, self.test_dir, 'test.log', exclude_list, self.expired_folder, self.meta_file)
# Check if the files were copied correctly
for root, _, files in os.walk(self.mock_dir):
for test_file in files:
if os.path.join(root, test_file) not in exclude_list:
self.assertTrue(os.path.exists(os.path.join(self.expired_folder, f"{test_file}.txt")))
def test_copy_to_expired_folder(self):
# Set up test data
file_path = os.path.join(self.mock_dir, "file1")
exp_folder = self.expired_folder
meta_file = self.meta_file
# Call the function
copy_to_expired_folder(file_path, exp_folder, meta_file)
# Check if the file was copied and metadata was written
log_timestamp = datetime.datetime.now().strftime("%Y.%m.%d_%H.%M.%S.%f")
expected_expired_file_path = os.path.join(exp_folder, f"{log_timestamp}_file1.txt")
self.assertTrue(os.path.exists(expected_expired_file_path))
# Check if metadata was written
with open(meta_file, 'r') as f:
metadata_content = f.read()
self.assertIn(f"{log_timestamp}_file1.txt, {file_path}", metadata_content)
if __name__ == '__main__':
unittest.main()
The main issue I'm having is that the files created in the setUp method are the three events described above aren't taking place when running the test.
Errors in test script:
Before checking files: set()
Processing file: /home/semir-testing/floor/test-env/tmpfggyv_pv/mock_Dir/file2
After checking files: set()
Processing file: /home/semir-testing/floor/test-env/tmpfggyv_pv/mock_Dir/file1
After checking files: set()
Processing file: /home/semir-testing/floor/test-env/tmpfggyv_pv/mock_Dir/anotherDir/file3.txt
After checking files: set()
Checked Files (After): set()
FF.
======================================================================
FAIL: test_check_existing_files (__main__.TestScript)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/semir-testing/floor/fileCheck_test.py", line 90, in test_check_existing_files
self.assertTrue(os.path.exists(os.path.join(self.expired_folder, f"{test_file}.txt")))
AssertionError: False is not true
======================================================================
FAIL: test_copy_to_expired_folder (__main__.TestScript)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/home/semir-testing/floor/fileCheck_test.py", line 105, in test_copy_to_expired_folder
self.assertTrue(os.path.exists(expected_expired_file_path))
AssertionError: False is not true
----------------------------------------------------------------------
Ran 3 tests in 0.010s
FAILED (failures=2)
Even though I'm not testing the logging currently, the logging works fine, but that's not an issue I'm worried about at the moment. Any tips would be greatly appreciated