Python request API transfer speeds are slower when data is collect to memory

71 Views Asked by At

While attempting to analyse the network transfer speeds using a hypercorn ASGI app and requests API, there is a significant drop in speed when the data is collected than otherwise.

ASGI app

from typing import Any

ONE_KB = 1024
ONE_MB = ONE_KB * ONE_KB
ONE_GB = ONE_MB * ONE_KB

chunk_size = 256 * ONE_KB

dump = bytes(ONE_GB)  # 1GB


async def app(scope: Any, receive: Any, send: Any) -> None:
    '''
    Run: `hypercorn app:app --bind localhost:7001`
    '''
    assert scope["type"] == "http"

    await send(
        {
            "type": "http.response.start",
            "status": 200,
            "headers": [[b"Content-Type", b"application/octet-stream"], [b"Content-Length", str(len(dump)).encode()]],
        }
    )

    chunks = len(dump) // chunk_size

    for i in range(chunks):
        await send(
            {
                "type": "http.response.body",
                "body": dump[i * chunk_size : (i + 1) * chunk_size],
                "more_body": i != chunks - 1,
            }
        )

test.py

import time
import requests

ONE_MB = 2**20

data_size_mb = 1000


async def test_speed():
    # Parse the URL to get host and path
    path = "/"

    print(psutil.Process(os.getpid()).memory_info())
    load_mem = [b"a" * ONE_MB for _ in range(20 * 1000)] # << ==== This line doesn't impact the transfer speeds!
    print(psutil.Process(os.getpid()).memory_info())

    start_time = time.time()

    # Create a TCP socket
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        # Connect to the server
        s.connect((host, port))

        # Send HTTP GET request
        request = f"GET {path} HTTP/1.1\r\nHost: {host}\r\n\r\n"
        s.sendall(request.encode())

        # Create a BufferedReader from the socket
        bufferedReader = io.BufferedReader(s.makefile("rb"))

        try:
            # Read response headers
            headers = bufferedReader.readline()
            while headers.strip():
                headers = bufferedReader.readline()
            bufferedReader.flush()

            # Receive response in chunks and write to file
            recv_size = 0
            chunks = []
            for _ in range(20 * 1000):
                data = bufferedReader.read(ONE_MB)
                chunks.append(data)  # <<< ===== This line is causing the above line to slow down !!!
                recv_size += len(data)
            psutil.Process(os.getpid()).memory_info()
        finally:
            bufferedReader.close()

    recv_data_mb = recv_size / ONE_MB
    print("[*] Received data ", recv_size / ONE_MB, "MB")
    print("[*] Chunks size {}".format(str(len(chunks))))

    end_time = time.time()

    recv_time = end_time - start_time
    recv_speed = recv_data_mb / recv_time

    print("===========================================")
    print("Data size: {:.2f} MB".format(data_size_mb))
    print("Recv data size: {:.2f} MB".format(recv_data_mb))
    print("Recv time: {:.6f} seconds".format(recv_time))
    print("Recv speed: {:.2f} mbps".format(recv_speed))
    print("===========================================")

Expectation: The transfer speed must be identical whether we collect the data into chunks variable or not.

Actual: The transfer speed when chunks.append(chunk) is commented was around 2.6 gbps and when it was NOT commented it reduced to 1.4 gbps.

EDIT #1:

  • The machine has over 1 TB of RAM and to check if its a memory issue I created a dummy list of size 20 GB before the response streaming and the transfer speed was 2.6 gbps. But if I create the list DURING the streaming and the speeds dropped to 1.4 gbps.

  • Running cProfile for the two runs showed that for the faster run (<method 'recv_into' of '_socket.socket' objects>) was called by the BufferedReader almost 10 times more leading me to suspect that the buffer size was changed for when I increase the memory during streaming.

EDIT #2:

  • To close in on the bug, I'm now using sockets + buffered reader instead of request lib which is having the same behaviour.
  • As pointed out by Steffen, I'm ensuring that actual physical memory is being used while.

EDIT #3:

  • Adding line profiler outputs to highlight the slow down in bufferedReader.read(..) where the time taken almost doubled.

Fast Run

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    95         1          0.2      0.2      0.0          try:                                                                                                     
    96                                                       # Read response headers                                                                              
    97         1       1178.9   1178.9      0.0              headers = bufferedReader.readline()                                                                  
    98         7         14.1      2.0      0.0              while headers.strip():                                                                               
    99         6          3.8      0.6      0.0                  headers = bufferedReader.readline()                                                              
   100         1          3.9      3.9      0.0              bufferedReader.flush()                                                                               
   101                                                                                                                                                            
   102                                                       # Receive response in chunks and write to file                                                       
   103         1          0.8      0.8      0.0              recv_size = 0                                                                                        
   104         1          0.7      0.7      0.0              chunks = []                                                                                          
   105     20001       7028.7      0.4      0.0              for _ in range(20 * 1000):                                                                           
   106     20000    7692581.8    384.6     38.2                  data = bufferedReader.read(ONE_MB)                                                               
   107                                                           # chunks.append(data)  # <<< ===== This line is causing the above line to slow down !!!          
   108     20000      27589.1      1.4      0.1                  recv_size += len(data)                                                                                                                                                      

Slow Run

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    95         1          0.4      0.4      0.0          try:                                                                                                     
    96                                                       # Read response headers                                                                              
    97         1       1073.7   1073.7      0.0              headers = bufferedReader.readline()                                                                  
    98         7          7.8      1.1      0.0              while headers.strip():                                                                               
    99         6          3.7      0.6      0.0                  headers = bufferedReader.readline()                                                              
   100         1          1.5      1.5      0.0              bufferedReader.flush()                                                                               
   101                                                                                                                                                            
   102                                                       # Receive response in chunks and write to file                                                       
   103         1          0.2      0.2      0.0              recv_size = 0                                                                                        
   104         1          0.4      0.4      0.0              chunks = []                                                                                          
   105     20001       6652.0      0.3      0.0              for _ in range(20 * 1000):                                                                           
   106     20000   13057516.0    652.9     50.6                  data = bufferedReader.read(ONE_MB)                                                               
   107     20000      17805.7      0.9      0.1                  chunks.append(data)  # <<< ===== This line is causing the above line to slow down !!!            
   108     20000      22505.9      1.1      0.1                  recv_size += len(data)                                                                           

EDIT #4:

  • Based on Steffen's comment, I tried to not append the response from server but a random 1MB data to a list. Oddly enough the bufferedReader.read() were still high though the memory allocation is unrelated. But when I perform a del data at the end, the bufferedReader.read() speeds are down 50%

Additionally just adding random 1 MB to chunks every iteration

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   106     20001       6580.4      0.3      0.0              for _ in range(20 * 1000):                                  
   107     20000   14320784.1    716.0     50.3                  data = bufferedReader.read(ONE_MB)                      
   108     20000      25183.2      1.3      0.1                  recv_size += len(data)                                  
   109     20000    1284105.9     64.2      4.5                  chunks.append(b"b" * ONE_MB)                            
   110                                                           # bufferedReader.flush()  # <<< ===== This line doesn't…
   111                                                           # chunks.append(copy(data))  # <<< ===== This line is c…
   112                                                           # del data  # <<< ===== Running this, improves the runt…
   113         1        470.7    470.7      0.0              psutil.Process(os.getpid()).memory_info()  

Adding random 1 MB to chunks and deleting data every iteration

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
106     20001       5896.1      0.3      0.0              for _ in range(20 * 1000):                                  
   107     20000    5089738.8    254.5     17.2                  data = bufferedReader.read(ONE_MB)                      
   108     20000      21017.8      1.1      0.1                  recv_size += len(data)                                  
   109     20000   12253592.9    612.7     41.3                  chunks.append(b"b" * ONE_MB)                            
   110                                                           # bufferedReader.flush()  # <<< ===== This line doesn't…
   111                                                           # chunks.append(copy(data))  # <<< ===== This line is c…
   112     20000      13553.3      0.7      0.0                  del data  # <<< ===== Running this, improves the runtim…
0

There are 0 best solutions below