I have a very computationally costly code to execute in Google Colab, which takes a very long time to finish executing. The thing is that I generate a log file from my execution using prints, with the following function:
log_file = None
execution_uid = env['EXEC_ID']
log_folder_path = '../logs'
class Tee:
def __init__(self, *files):
self.files = files
def write(self, obj):
for f in self.files:
if hasattr(f, "write"):
if f.name == "<stdout>":
# Handle tqdm separately
try:
f.write(obj)
except ValueError:
pass
else:
f.write(obj)
f.flush()
def flush(self):
for f in self.files:
f.flush()
def setLogFile(execution_uid = None):
if execution_uid == None:
execution_uid = env['EXEC_ID']
log_file_path = log_folder_path + "/" + str(execution_uid) + '_execution_log_'+ str(int(time.time())) +'.log'
if env['LOGS']:
os.makedirs(log_folder_path, exist_ok=True)
log_file = open(log_file_path, "w")
print("\n\n \t\t Results of the execution will be available at: " + log_file_path + "\n\n")
encoding = sys.stdout.encoding if sys.stdout.encoding else 'utf-8'
# Create a Tee object to duplicate the output
tee = Tee(sys.stdout, io.TextIOWrapper(log_file.buffer, encoding=encoding, errors='ignore'))
sys.stdout = tee
return log_file
else:
print("\n\n \t\t Using CONSOLE as execution printer \n\n")
sys.stdout = sys.__stdout__ # Reset sys.stdout to the original value
return None
This works fine in general, but in Google Colab, if the execution does not finish and the environment disconnects or there is a crash, the log file is not generated (while it is if I manually stop the cell from executing or if a crash occurs locally not using Colab). My question is, how can I generate this file "on the fly" so that even if the execution crashes or the environment disconnects I do not loose that log information?