How to find _ transferSize in har file exported using browsermob-proxy in python

266 Views Asked by At

I am trying to export .har file using firefox-selenium-browsermob-proxy-python. Using the below code.

bmp_loc = "/Users/project/browsermob-proxy-2.1.4/bin/browsermob-proxy"
server = Server(bmp_loc)
server.start()
proxy = server.create_proxy(params={'trustAllServers': 'true'})

selenium_proxy = proxy.selenium_proxy()
caps = webdriver.DesiredCapabilities.FIREFOX
caps['marionette'] = False
proxy_settings = {
    "proxyType": "MANUAL",
    "httpProxy": selenium_proxy.httpProxy,
    "sslProxy": selenium_proxy.sslProxy,
}
caps['proxy'] = proxy_settings

driver = webdriver.Firefox(desired_capabilities=caps)
proxy.new_har("generated_har",options={'captureHeaders': True})
driver.get("someurl")
browser_logs = proxy.har

I am interested to get _transferSize in the .har file to perform some analysis but unable to get that, instead I am getting that as 'comment':

"redirectURL": "", "headersSize": 1023, "bodySize": 38, "comment": ""

whereas manually downloading the .har file using firefox I am getting _transferSize

Version used:

browsermob_proxy==2.1.4
selenium==4.0.0

Can anybody please help me to resolve this?

1

There are 1 best solutions below

2
Abhishek Maheshwari On BEST ANSWER

You can get _transferSize by adding headersSize and bodySize from the har file itself.

 urls = ["https://google.com"]
    for ur in urls:
        server = proxy.start_server()
        client = proxy.start_client()
        client.new_har("demo.com")
        # print(client.proxy)
        
        options = webdriver.ChromeOptions()
      
        options.add_argument("--disk-cache-size=0")
        
        options = {
        'enable_har': True 
        }
        driver = webdriver.Chrome(seleniumwire_options=options)
        driver.request_interceptor = proxy.interceptor

        driver.get(ur)
        time.sleep(40)
        row_list = []
        json_dictionary = json.loads(driver.har)
        repeat_url_list = []
        repeat_urls = defaultdict(lambda:[])
        resp_size = 0
        count_url = 0
        url_time = 0 
        status_list = []
        status_url = defaultdict(lambda:[])
        a_list = []
        
        with open("network_log2.har", "w", encoding="utf-8") as f:
            # f.write(json.dumps(driver.har))
            for i in json_dictionary['log']['entries']:
                f.write(str(i))
                f.write("\n")
                url = i['request']['url']
                a_list.append(url)
                timing = i['time']
                if timing>2000:
                    timing = round(timing/2000,1)
                    url_time += 1
                status =  i['response']['status']
                if status in status_list:
                    status_url[status] = status_url[status] + 1
                else:
                    status_url[status] = 1
                    status_list.append(status)
                
                size = i['response']['headersSize'] + i['response']['bodySize']
                if size//1000 > 500:
                    resp_size += 1
                if url in repeat_url_list:
                    repeat_urls[url] =  1
                else:
                    repeat_url_list.append(url)

            rurl_count = len(repeat_urls)