When I use python 3.9 and pymysql 1.1.0, I find the memory will increase after I get data from cursor. I need run a model in cycle way. The input datas need download from mysql database.I use dbutils to manager my pymysql connection. Then I use memory_profiler to check the code that I find the memory increase when I use "for row in cursor:". What should I do to fix the problem? Thank you!
def databaseSelect(dataConnect,unit, kkspoint, kksname, starttime, endtimePlus):
conn = dataConnect.connection()
cursor = conn.cursor()
backdict = {}
data1 = {}
tables = []
data = []
unit = str(unit)
kkspoint = str(kkspoint)
kksname = str(kksname)
starttime = str(starttime)
endtimePlus = int(endtimePlus)
starttime = datetime.datetime.strptime(starttime, "%Y-%m-%d %H:%M:%S")
endtime = (starttime + datetime.timedelta(minutes=endtimePlus)).strftime('%Y-%m-%d %H:%M:%S')
sql = 'show tables;'
cursor.execute(sql)
for row in cursor:
# print('#' * 50 + "checkpoint5" + '#' * 50)
# print(row)
tables.append(str(row[0].decode('utf-8')))
del row
cursor = conn.close()
conn.close()
del (cursor,conn)
findState = False
for table in tables:
a = table.split('y')[0].split('a')[2]
if a == kkspoint:
b = table.split('y')[1].split('m')[0]
c = table.split('y')[1].split('m')[1].split('d')
times = b + '-' + c[0] + '-' + c[1] + ' 00:00:00'
originTime = datetime.datetime.strptime(times, '%Y-%m-%d %H:%M:%S')
backdict.setdefault(a, []).append(originTime)
del (a,b,c,times,originTime)
del table
for key in backdict.keys():
backdict[key].sort()
data1[key] = backdict[key]
for i, timefind in enumerate(data1[key]):
if starttime >= timefind and starttime < backdict[key][i + 1]:
checkTimeFind = timefind
findState = True
break
if findState:
break
del key
if unit == 'zj':
kkspoint = 'Data' + kkspoint + 'y' + str(checkTimeFind.year) + 'm' + str(
checkTimeFind.month) + 'd' + str(
checkTimeFind.day)
elif unit == 'fw':
kkspoint = 'fwData' + kkspoint + 'y' + str(checkTimeFind.year) + 'm' + str(
checkTimeFind.month) + 'd' + str(
checkTimeFind.day)
else:
print('#' * 100 + '\nIPdownloader.databaseSelect\nwrong unit')
raise IndexError
sql = "select {} from {} where time between '{}' and '{}';".format(kksname, kkspoint, starttime, endtime)
conn = dataConnect.connection()
cursor = conn.cursor()
cursor.execute(sql)
for row in cursor:
data.append(row)
del row
cursor.close()
conn.close()
del (backdict, data1, sql, tables, cursor, conn, unit, kkspoint, kksname, starttime, endtime, findState)
return data
I find the problem is the pandas bring after the pymysql back the data. In my code I use pandas version is 2.1.0. It will leave a weakref during the code running one time. So that the memory will increase. The problem fixed after I change the pandas version to 1.5.3. Now It is seem good.