Running this code to get classroom submissions on an overnight run. Another code like this runs fine but this one seems to randomly stop in random places after a batch. I am Using cloud sql to store the assignments to loop through no issues when looping through as doing a count on the loop shows all the submissions.
It gets to a loop around the 12 to 17th batch and just stops with no exception and shows that it is complete. There seems to be no reason for this behaviour? We have about 180,000 assignments and it's not completing.
from sqlalchemy import create_engine
import psycopg2
from google.oauth2 import service_account
import json
import base64
# import httplib2
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pandas as pd
from pandas.io.json import json_normalize
from google.auth.transport.requests import AuthorizedSession
from apiclient.http import BatchHttpRequest
import httplib2
import time
from apiclient.discovery import build
import pandas as pd
import random
from googleapiclient.errors import HttpError
import email, smtplib, ssl
from email import encoders
from email.mime.base import MIMEBase
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
import socket
socket.setdefaulttimeout(100000)
start = time.time()
http = httplib2.Http()
log = []
array = []
batch_array = []
assignments = []
listwithtokens = []
listwithtokenstokens = []
credentials = service_account.Credentials.from_service_account_info(
{
},
scopes=['https://www.googleapis.com/auth/admin.directory.user', 'https://www.googleapis.com/auth/classroom.courses',
'https://www.googleapis.com/auth/classroom.rosters',
'https://www.googleapis.com/auth/classroom.coursework.students'
]
)
email_sub = '[email protected]'
delegated_credentials = credentials.with_subject(email_sub)
service_two = build('classroom', 'v1', credentials=delegated_credentials)
engine = create_engine('postgresql+psycopg2://USER:PASSWORD@IP:PORT/DQ')
sql = '''SELECT "courseId", id FROM public.classroom_assignments;'''
def callback(request_id, response, exception):
ids = request_id.split('|')
if exception is not None:
print('Error getting assignments "{0}" for course: "{1}"'.format(request_id, exception))
tokendata = {'courseId': ids[0], 'id' :ids[1],'token': None}
listwithtokens.append(tokendata)
else:
assignments.extend(response.get("studentSubmissions", []))
nextPageToken = response.get("nextPageToken", None)
if nextPageToken:
tokendata = {'courseId': ids[0], 'id' :ids[1],'token': None}
listwithtokens.append(tokendata)
else:
pass
with engine.connect() as con:
try:
rs = con.execute(sql)
bcount = 0
i = 0
batch_n = 0
batch = service_two.new_batch_http_request(callback=callback)
start_time = time.time()
for row in rs:
i += 1
response = service_two.courses().courseWork().studentSubmissions().list(pageToken=None, courseId=row[0] , courseWorkId =row[1], pageSize=500)
ids = f"{row[0]}|{row[1]}"
batch.add(response, request_id=ids)
bcount += 1
if bcount == 999:
batch_n += 1
print(batch_n)
batch.execute()
batch = service_two.new_batch_http_request(callback=callback)
bcount = 0
end = time.time() - start_time
print('batch count',i)
if end < 60:
diff = 60 - end
print(diff, 'seconds to sleep')
time.sleep(60)
start_time = time.time()
if bcount > 0:
batch.execute()
except HttpError as err:
if err.resp.get('content-type', '').startswith('application/json'):
print(err.content)
This is the error <HttpError 502 when requesting classroom.googleapis.com/batch returned "Bad Gateway">