I have spectrum that has been coded to include a straight line to continuum correct the (b)laze corrected, (N)ormalised and simple (c)osmic ray trimmed data (bNc data). I think my code has become very messy, and I would like to ensure that when I save it to a file called "Prepare.mat" in a folder I can access all variables that I need for the next step. I think there is some duplication, but I want to ensure I have saved it and all the variables correctly. I the spectrum plots correctly, BUT, it still loads bad observations. Can someone cast an eye on this and check, duplications, streamlining if possible.
import os
import numpy as np
import matplotlib.pyplot as plt
import h5py
from scipy.io import loadmat, savemat
from scipy.signal import medfilt
import scipy.io
import scipy.io as sio # Import scipy.io as sio
%matplotlib inline
%matplotlib widget
# Directory where the MATLAB files are located
output_folder = '/Users/XXX/Desktop/Starx/'
# Define dimensions
row_size = 29 # Define the number of rows
col_size = 2047 # Define the number of columns
num_obs = 23
# Get a list of .mat files in the directory
mat_files = [filename for filename in os.listdir(directory) if filename.startswith('202') and filename.endswith('.mat')]
# Initialize arrays to store data
num_files = len(mat_files)
ALLcalspec1 = np.empty((row_size, col_size, num_obs))
ALLcalspec2 = np.empty((row_size, col_size, num_obs))
ALLfib4spec = np.empty((row_size, col_size, num_obs))
ALLfib4spec0 = np.empty((row_size, col_size, num_obs))
ALLfib4specwave = np.empty((row_size, col_size, num_obs))
ALLfib4specbNc = np.empty((row_size, col_size, num_obs))
ALLfib4specerr = np.empty((row_size, col_size, num_obs))
ALLfib4specS = np.empty((row_size, col_size, num_obs))
ALLJD = np.empty(num_files)
# Load data from .mat files
for s, filename in enumerate(mat_files):
data = sio.loadmat(os.path.join(directory, filename))
calspec1 = data.get('calspec1')
calspec2 = data.get('calspec2')
fib4_spec = data.get('fib4_spec')
fib4_spec0 = data.get('fib4_spec0')
fib4_spec_wave = data.get('fib4_spec_wave')
fib4_spec_bNc = data.get('fib4_spec_bNc')
fib4_specerr = data.get('fib4_specerr')
fib4_specS = data.get('fib4_specS')
JD = data.get('JD')
ALLcalspec1[:, :, s] = calspec1
ALLcalspec2[:, :, s] = calspec2
ALLfib4spec[:, :, s] = fib4_spec
ALLfib4spec0[:, :, s] = fib4_spec0
ALLfib4specwave[:, :, s] = fib4_spec_wave
ALLfib4specbNc[:, :, s] = fib4_spec_bNc
ALLfib4specerr[:, :, s] = np.real(fib4_specerr)
ALLfib4specS[:, :, s] = fib4_specS
ALLJD[s] = JD
# Directory where the synthetic spectrum MATLAB file is located
synthetic_spectrum_file = '/Users/XXX/Desktop/StarX/synth_teff4500_logg45.mat'
# Load the synthetic spectrum data using scipy's loadmat function
synthetic_spectrum_data = loadmat(synthetic_spectrum_file)
# Extract the relevant data from the loaded dictionary
synthetic_wave = synthetic_spectrum_data.get('wavelength', np.array([]))
# Load the 'bcorr_fib4_all.mat' file using h5py
hdf5_file_path = '/Users/XXX/Desktop/StarX/bcorr_fib4_all.mat'
with h5py.File(hdf5_file_path, 'r') as hdf5_file:
# Assuming 'bcorr_fib4_all' is a dataset in the HDF5 file, you can load it like this:
bcorr_fib4_all = hdf5_file['bcorr_fib4_all'][:]
# Calculate 'ti' without transposing, and calculate median along axis 0 (first dimension)
ti = np.median(ALLfib4spec, axis=0)
# Create a boolean mask for 'bad' observations
bad_obs_mask = np.median(ti, axis=1) < 10000
# Apply the filter along the first dimension for all relevant arrays
ALLfib4spec0 = ALLfib4spec0[:, ~bad_obs_mask, :]
ALLfib4specbNc = ALLfib4specbNc[:, ~bad_obs_mask, :]
ALLfib4specerr = ALLfib4specerr[:, ~bad_obs_mask, :]
ALLfib4specwave = ALLfib4specwave[:, ~bad_obs_mask, :]
ALLcalspec1 = ALLcalspec1[:, ~bad_obs_mask]
ALLcalspec2 = ALLcalspec2[:, ~bad_obs_mask]
ALLJD = [jd for i, jd in enumerate(ALLJD) if not bad_obs_mask[i]]
bcorr_fib4_all = [bcorr for i, bcorr in enumerate(bcorr_fib4_all) if not bad_obs_mask[i]]
# Continuum correction
nALLfib4specbNc = np.copy(ALLfib4specbNc)
print('obs: ', end='')
for obs in range(nALLfib4specbNc.shape[2]):
print(f'{obs + 1} ', end='')
A = nALLfib4specbNc[:, :, obs]
nA = np.empty_like(A)
for ord in range(A.shape[0]):
x = A[ord, :]
window_size = 301 # Adjust this as needed
x_med = medfilt(x, kernel_size=window_size)
nA[ord, :] = np.divide(x, x_med, out=np.zeros_like(x), where=x_med != 0)
nALLfib4specbNc[:, :, obs] = nA
print('\n')
# Clip values to remove outliers
nALLfib4specbNc[nALLfib4specbNc > 1.2] = 1.2
nALLfib4specbNc[nALLfib4specbNc < 0.01] = 0.01
# Save 'nALLfib4specbNc' to 'Prepared.mat'
nALLfib4specbNc_variable_name = 'nALLfib4specbNc'
# Create a dictionary to store the variables you want to save
data_to_save = {}
# Collect all variables that start with 'ALL' from your variables
for var_name in globals().keys():
if var_name.startswith('ALL'):
data_to_save[var_name] = globals()[var_name]
# Specify the file name for saving
file_name = os.path.join(output_folder, 'Prepared.mat')
# Save the variables to a .mat file
scipy.io.savemat(file_name, data_to_save)