I am trying to build a tool to pull news articles mentioning a key word in the text body but cannot get the date range to work.
Ideally I only want to view articles in the past 24 hours so I am using this:
googlenews=GoogleNews(period='1d')
However in my DataFrame results I am getting fields in the date column like "3 weeks ago", "1 month ago" etc
perhaps a solution would be to specify where the date is <today() and then drop the lines where the text contains "week", "weeks", "month", "months" etc.
from GoogleNews import GoogleNews
from newspaper import Article
import pandas as pd
search_term = "Binoculars"
googlenews=GoogleNews(period='1d')
googlenews=GoogleNews(lang='en', region='US')
googlenews.search(search_term)
results = []
final_results = []
for page in range(1,5):
googlenews.getpage(page)
results.extend(googlenews.result())
for item in results:
if (item not in final_results and (search_term in item["desc"])):
final_results.append(item)
else:
pass
df=pd.DataFrame(results)
df