Unpacking a list within a list Google's Page Speed API response in R

112 Views Asked by At

I am having some difficulties unpacking what looks like a list within a list in Google's Page Speed API response.

Ideally, I want only audit results exported as a CSV file. So I can compare the website load times and performance of my client's website.

'''

library(httr)
library(tidyverse)
library(tidyr)

#URL to submit GET request to
url <- "https://www.googleapis.com/pagespeedonline/v5/runPagespeedurl=https://www.google.com/"


# GET request returned as list
raw_list <- url %>% 
httr::GET() %>% 
httr::content()


#turning the list into a dataframe 
df_pagespeed <- as.data.frame(do.call(rbind, raw_list))

#attempted unpack list in audit results with no luck
df_pagespeed <- tidyr::unnest(df_pagespeed, cols = audits)

# select only the audit results. 
df_pagespeed_final <- df_pagespeed[c(audits)]

#export to csv file
write.csv(df_pagespeed_final,"test-pagespeed.csv", row.names = FALSE)

'''

Ideally I want the second dataframe (df_pagespeed_final) to contain information related to pagespeed audit results. Meaningful insights like my first-contentful-paint

Hopefully that is clear enough for someone to understand. If not, please let me know and I will revise the question.

Thanks for your help.

1

There are 1 best solutions below

0
AudioBubble On

I managed to figure it out. Probably not the best solution or cleanest, but it works.

Hopefully this helps someone else out, who is working with the Google Pagespeed API and R.


library(httr)
library(tidyverse)
library(tidyr)
library(purrr)
library(magrittr)
library(ggplot2)
library(reshape)

#URL to submit GET request to
url <- "https://www.googleapis.com/pagespeedonline/v5/runPagespeed?url=https://www.google.com/"


# GET request returned as list
raw_list <- url %>% 
  httr::GET() %>% 
  httr::content()



#turning the list into a dataframe 
df_pagespeed <- as.data.frame(do.call(rbind, raw_list))

df_all_audit <- df_pagespeed["lighthouseResult", "audits"]

df_all_audit <- as.data.frame(do.call(rbind, df_all_audit))


#df for meaningful paint 
df_first_meaningful_paint <- df_all_audit["lighthouseResult", "first-meaningful-paint"]

df_first_meaningful_paint <- as.data.frame(do.call(rbind, df_first_meaningful_paint))

df_first_meaningful_paint <- df_first_meaningful_paint$numericValue[1]



#df for largest content paint
df_largest_content_paint <- df_all_audit["lighthouseResult", "largest-contentful-paint"]

df_largest_content_paint <- as.data.frame(do.call(rbind, df_largest_content_paint))

df_largest_content_paint <- df_largest_content_paint$numericValue[1]



#df for total-blocking time
df_total_blocking_time <- df_all_audit["lighthouseResult", "total-blocking-time"]

df_total_blocking_time <- as.data.frame(do.call(rbind, df_total_blocking_time))

df_total_blocking_time <- df_total_blocking_time$numericValue[1]


#df for total-blocking time
df_speed_index <- df_all_audit["lighthouseResult", "speed-index"]

df_speed_index <- as.data.frame(do.call(rbind, df_speed_index ))

df_speed_index  <- df_speed_index$numericValue[1]



#df for content paint 
df_first_content_paint <- df_all_audit["lighthouseResult", "first-contentful-paint"]

df_first_content_paint <- as.data.frame(do.call(rbind, df_first_content_paint))

df_first_content_paint <- df_first_content_paint$numericValue[1]


#df for cumulative layout shift 
df_cumulative_shift <- df_all_audit["lighthouseResult", "cumulative-layout-shift"]

df_cumulative_shift <- as.data.frame(do.call(rbind, df_cumulative_shift))

df_cumulative_shift <- df_cumulative_shift$numericValue[1]



#df for server response time
df_server_response_time <- df_all_audit["lighthouseResult", "server-response-time"]

df_server_response_time<- as.data.frame(do.call(rbind, df_server_response_time))

df_server_response_time <- df_server_response_time$numericValue[1]



now <- Sys.time()
time <- data.frame(now)


#put all data frames into list
df_list <- bind_cols(time, df_first_content_paint, df_first_meaningful_paint, df_largest_content_paint, df_total_blocking_time, df_speed_index, df_cumulative_shift, df_server_response_time)

# renaming columns in dataframe
names(df_list)[1] <- "time"
names(df_list)[2] <- "first_content_paint"
names(df_list)[3] <- "first_meaningful_paint"
names(df_list)[4] <- "largest_content_paint"
names(df_list)[5] <- "total_blocking_time"
names(df_list)[6] <- "speed_index"
names(df_list)[7] <- "cumulative_shift"
names(df_list)[8] <- "server_response_time"

#assigning data to df_pagespeed_new
df_pagespeed_new <- df_list

# loading in old pagespeed file
df_pagespeed_old <- read_csv("pagespeed.csv")

#adding additional row to df page speed 
total <- rbind(df_pagespeed_old, df_pagespeed_new)

#writing the new new dataframe (larger more rows to dataframe)
write.csv(total,"pagespeed.csv", row.names = FALSE)


#plotting the graph
p <- ggplot()+
  geom_line(data=total,aes(y=first_content_paint,x= time,colour="first_content_paint"),size=1 )+
  geom_line(data=total,aes(y=largest_content_paint,x= time,colour="largest_content_paint"),size=1 )+
  geom_line(data=total,aes(y=total_blocking_time,x= time,colour="total_blocking_time"),size=1 )+
  geom_line(data=total,aes(y=speed_index,x= time,colour="speed_index"),size=1 )+
  geom_line(data=total,aes(y=cumulative_shift,x= time,colour="cumulative_shift"),size=1 )+
  geom_line(data=total,aes(y=server_response_time,x= time,colour="server_response_time"),size=1) +
  scale_color_manual(name = "Speed Metrics", values = c("first_content_paint" = "#008080", "largest_content_paint" = "#58508d", "total_blocking_time" = "#bc5090", "speed_index" = "#ff6361", "cumulative_shift" = "#ffa600", "server_response_time" = "#003f5c")) +
  xlab("Time & Date") +
  scale_y_continuous("Loadtime (milliseconds)") + 
  labs(title="www.google.com Page Speed Metrics")+ 
  theme(plot.title=element_text(hjust=0.5))

p + theme_classic() # Classic theme