I have two files which are read.
File 1 has all user names
File 2 has only the user names which made a donation
Effectively I want 1 outcome where:
column 1 has all user names
column 2 has the donations of the users where amount is 0 or empty if user was not mentioned in file 2
Here is my code (see below) which now generate 2 excel sheets.
As a novice, I'm not sure how to go on with the comparing from 2 files, any help would be apreciated
from bs4 import BeautifulSoup
import pandas as pd
names= "member_info.txt"
donations= "donations.txt"
user_names = []
donation_user_names = []
user_amounts = []
with open(names, 'r') as file:
html_content_names = file.read()
with open(donations, 'r') as file:
html_content_donations = file.read()
name_soup = BeautifulSoup(html_content_names, 'html.parser')
name_rows = name_soup.find_all('div', class_='rank_row')
donation_soup = BeautifulSoup(html_content_donations, 'html.parser')
donation_name_rows = donation_soup.find_all('div', class_='rank_row')
for name_row in name_rows:
user_name = name_row.find('span', class_='username').text
user_names.append(user_name)
for donation_name_row in donation_name_rows:
donation_user_name = donation_name_row.find('span', class_='username').text
user_amount = donation_name_row.find('div', class_='amount plus')['data-sort']
donation_user_names.append(donation_user_name)
user_amounts.append(user_amount)
df = pd.DataFrame({'Username': user_names})
df = df.sort_values(by='Username', ascending=True, key=lambda col: col.str.lower())
name_excel = "donations.xlsx"
df.to_excel(name_excel, index=False)
dg = pd.DataFrame({'Don_Uname': donation_user_names, 'Amount in M': user_amounts})
dg = dg.sort_values(by='Don_Uname', ascending=True, key=lambda col: col.str.lower())
dg['Amount in M'] = dg['Amount in M'].astype(float).apply(lambda x: x/1000000).apply(lambda x: round(x, 1))
name_excel = "donations2.xlsx"
dg.to_excel(name_excel, index=False)
print(f"Write data to excel {name_excel}")
