Using the google scholarly by other languages

38 Views Asked by At

I have problem in scraping other languages keyword by using the scholarly or paperscraper.

I have below code (code link):

!pip install scholarly

And :

import scholarly
import time
import random
import csv
from openpyxl import Workbook

def slow_scrape_scholarly(keyword, num_results):
    scholarly.scholarly.set_timeout(30)
    results = []

    # Search for publications based on the keyword
    search_query = scholarly.scholarly.search_keyword(keyword)
    for i in range(num_results):
        try:
            # Get the first result from the search query
            pub = next(search_query)
            bib = pub.get('bib')
            # Check if the 'title' key is present
            if bib is not None:
                # Extract the author name and publication information
                
                print(bib['title'])
                pub_url = pub.get('pub_url')
                print(pub_url)
                author = bib['author']
                # Convert list of author names to comma-separated string
                author_str = ', '.join(author) if isinstance(author, list) else author
                publication = bib['title']
                pub_url = pub.get('pub_url')
                result = {
                    'author': author_str,
                    'publication': publication,
                    'pub_url': pub_url
                }
                results.append(result)
                
                # Write current result to a CSV file
                with open('scholarly_results.csv', mode='a', newline='') as file:
                    writer = csv.writer(file)
                    writer.writerow([result['author'], result['publication'], result['pub_url']])
                    
                # Write current result to an Excel file
                wb = Workbook()
                ws = wb.active
                ws.title = 'Scholarly Results'
                ws.append(['Author', 'Publication', 'Publication URL'])
                for r in results:
                    ws.append([r['author'], r['publication'], r['pub_url']])
                wb.save('scholarly_results.xlsx')
            else:
                print("Skipping result due to missing 'title' key")
        except StopIteration:
            # If there are no more results, break out of the loop
            break

        # Simulate a human-like delay between requests
        delay = random.uniform(10.1, 25.0)
        print(f"Waiting for {delay:.2f} seconds before the next request...")
        time.sleep(delay)

    
    return results
  

# Example usage
keyword = 'خشونت علیه زنان'

def get_total_results(keyword):
    search_query = scholarly.scholarly.search_pubs(keyword)
    total_results = search_query.total_results
    return total_results

# Example usage
#keyword = 'violence against women'
def get_total_results(keyword):
    search_query = scholarly.scholarly.search_keyword(keyword)
    total_results = search_query.total_results
    print(f"Total number for the {keyword} search is {total_results}")
    return total_results
num_results = get_total_results(keyword)
results = slow_scrape_scholarly(keyword, num_results)

try:
        num_results = get_total_results(keyword)
        results = slow_scrape_scholarly(keyword, num_results)
except:
         
        print("Error getting total results")

And I get the below error:

------------------------------------------ AttributeError                            Traceback (most recent call last) <ipython-input-3-926f6369e88d> in
<cell line: 80>()
     78     print(f"Total number for the {keyword} search is {total_results}")
     79     return total_results
---> 80 num_results = get_total_results(keyword)
     81 results = slow_scrape_scholarly(keyword, num_results)
     82 

<ipython-input-3-926f6369e88d> in get_total_results(keyword)
     75 def get_total_results(keyword):
     76     search_query = scholarly.scholarly.search_keyword(keyword)
---> 77     total_results = search_query.total_results
     78     print(f"Total number for the {keyword} search is {total_results}")
     79     return total_results

AttributeError: 'generator' object has no attribute 'total_results'
0

There are 0 best solutions below