Stop Twitter streaming by timer with twython

166 Views Asked by At

I have a code that receives tweets and writes them to csv files. I want to stop this process after a certain amount of time. How can I add a timer to the program. Probably for this you need to use datetime somewhere.

Test code, don't swear too much)

from twython import Twython, TwythonError, TwythonStreamer
import pandas as pd
from datetime import datetime
import csv
import os

APP_KEY = '***'
APP_SECRET = '***'
OAUTH_TOKEN = '***'
OAUTH_TOKEN_SECRET = '***'

# OAuth 2 
twitter = Twython(APP_KEY, APP_SECRET, oauth_version=2)
ACCESS_TOKEN = twitter.obtain_access_token()
twitter = Twython(APP_KEY, access_token=ACCESS_TOKEN)

# OAuth 1
twitter2 = Twython(APP_KEY, APP_SECRET)
auth = twitter2.get_authentication_tokens(callback_url='https://twitter.com')
OAUTH_TOKEN2 = auth['oauth_token']
OAUTH_TOKEN_SECRET2 = auth['oauth_token_secret']

def process_tweet(tweet): 
    
    filtered_data = {}
    dict_test = []
    
    initial_format = '%a %b %d %H:%M:%S %z %Y'
    date_formats = '%d-%m-%Y'
    time_formats = '%H:%M:%S'
    
    filtered_data['id_post'] = tweet['id']
    filtered_data['hashtags'] = [hashtag['text'] for hashtag in tweet['entities']['hashtags']] 
    filtered_data['date'] = datetime.strptime(tweet['created_at'], initial_format).strftime(date_formats)
    filtered_data['time'] = datetime.strptime(tweet['created_at'], initial_format).strftime(time_formats)
    filtered_data['geo'] = tweet['geo']
    filtered_data['text'] = tweet['text'] 
    filtered_data['user'] = tweet['user']['screen_name'] 
    filtered_data['user_loc'] = tweet['user']['location'] 
    filtered_data['user_id'] = tweet['in_reply_to_user_id']
    filtered_data['source_device'] = tweet['source']
                                   
    dict_test.append(filtered_data)
    
    print(dict_test)

    return dict_test

class MyStreamer(TwythonStreamer):
    
    def on_success(self, data):
        if data['lang'] == 'ru':
            tweet_data = process_tweet(data)
            self.save_to_csv(tweet_data) 
            
    def on_error(self, status_code, data):
        print status_code
        
    def save_to_csv(self, tweet, encoding = 'utf-8'):
        file_name = 'Twitter_{date}.csv'.format(date = str(datetime.now().strftime('%d-%m-%Y')))
        try:
            with open(file_name, 'a', newline='', encoding=encoding) as file:
                size_path_file =  os.path.getsize(file_name)
                print(size_path_file)
                if size_path_file == 0:
                    writer = csv.DictWriter(file, fieldnames=tweet[0].keys())
                    writer.writeheader()
                    for data in tweet:
                        writer.writerow(data)
                else:
                    writer = csv.DictWriter(file, fieldnames=tweet[0].keys())
                    for data in tweet:
                        writer.writerow(data)
                        

        except IOError:
            print("I/O error")

if __name__ == '__main__':
    stream = MyStreamer(APP_KEY, APP_SECRET, OAUTH_TOKEN, OAUTH_TOKEN_SECRET)
    stream.statuses.filter(track=['Короновирус', 'Covid-19', 'Вакцинация', 'Вакцина', 'Карантин', 'Заболевание', 'Ограничения', 'Тесты', 'Ковид'])

Those. I need the code to run for a certain amount of time, and then stop collecting. This process, as I understand it, is also necessary, because you can resist the limit of API requests. Maybe you can use some solution from the library tweepy, are they pretty similar? Maybe need use datetime, but where and how?

1

There are 1 best solutions below

0
kostya ivanov On

I think I found a solution, maybe not as expressive, but it already works, which is not bad for a start. But still I want to do it more correctly.

class MyStreamer(TwythonStreamer):

    now = time.time()
    stop_timer = now + 50

    def on_success(self, data):

        if time.time() < self.stop_timer:
            tweet_data = process_tweet(data)

            if data['lang'] == 'ru':
                self.save_to_csv(tweet_data)

        else:
            print('Time off')
            self.disconnect()

You can also move the language setting a little higher, and then we will not receive what we are not interested in)