Get percentage of months with positive return and average return per month

59 Views Asked by At

I'm running the following code to download stock's data with monthly periodicity.

Also I added a couple of columns with monthly percentage return and a positive or negative return ( 0 or 1) per month.

library(quantmod)

Symbols<-c  ("AA","AAL","AAOI","ABBV","ABC","ABNB","AAPL","TSLA","AMZN","AMD","NKE",
             "NVDA","AMC","META")

start_date=as.Date("2013-06-01")


getSymbols(Symbols,from=start_date, src = 'yahoo', periodicity = 'monthly')

stock_data = sapply(.GlobalEnv, is.xts)

all_stocks <- do.call(list, mget(names(stock_data)[stock_data]))




# Calculate return function
Return <- function(x) {
  stock_name <- stringi::stri_extract(names(x)[1], regex = "^[A-Z]+")
  stock_name <- paste0(stock_name, ".Return")
  column_names <- c(names(x), stock_name)
  
  # Calculate the range (difference between closing price and opening price)
  range <- quantmod::Cl(x) - quantmod::Op(x)
  
  # Merge the range with the original x object
  x <- merge.xts(x, range)
  colnames(x) <- column_names
  
  return(x)
}

# Function to determine if monthly return is positive
Positive <- function(x) {
  stock_name <- stringi::stri_extract(names(x)[1], regex = "^[A-Z]+")
  stock_name <- paste0(stock_name, ".Return")
  positive_name <- paste0(stock_name, ".Positive")
  column_names <- c(names(x), positive_name)
  
  # Calculate the positive indicator
  x$positive <- ifelse(x[, stock_name] > 0, TRUE, FALSE)
  
  # Update the column names
  colnames(x) <- column_names
  
  return(x)
}
all_stocks <- lapply(all_stocks, Return)

all_stocks <- lapply(all_stocks, Positive)


Now, I want to get a new list to get each month performance details overview per stock:

For example, January performance. That will include average Return of all January months and Percentage of positive return over the years)

i.e stock XYZ January Performance,Average return= 11%, % Positive = 80%

Same for February,March,April...

I'm trying this to calculate percentage of positive return but getting error. Not sure if this is the best approach or what I'm doing wrong.

# Filter stocks with 100% positive return
positive_stocks <- list()

for (i in seq_along(all_stocks)) {
  stock <- all_stocks[[i]]
  positive_name <- paste0(names(stock)[1], ".Return.Positive")
  
  if (!is.null(stock[, positive_name]) && all(stock[, positive_name])) {
    positive_stocks[[names(stock)[1]]] <- stock
  }
}

# Extract stock names with 100% positive return
positive_stock_names <- names(positive_stocks)

# Print the names of stocks with 100% positive return
print(positive_stock_names)
 
2

There are 2 best solutions below

0
NicChr On BEST ANSWER

This is how I might attempt the problem using dplyr and lubridate with your initial setup.

Helper to convert xts to tibble

xts_as_tibble <- function(x, name = "time", value = "value", group = "group"){
  time <- attr(x, "index")
  class_match <- match(attr(time, "tclass"), c("POSIXt", "Date"))
  is_datetime <- isTRUE(1L %in% class_match)
  is_date <- isTRUE(2L %in% class_match)
  if (is_date || is_datetime){
    if (!is.null(attr(time, "tzone"))){
      time <- lubridate::as_datetime(
        `attributes<-`(time, NULL), tz = attr(time, "tzone")
        )
    } else {
      time <- lubridate::as_datetime(
        `attributes<-`(time, NULL)
        )
    }
    if (is_date){
      time <- lubridate::as_date(time)
    }
  } else {
    time <- as.numeric(time)
  }
  ncol <- ncol(x)
  groups <- rep(colnames(x), each = length(time))
  if (is.null(groups)){
    groups <- rep(seq_len(ncol), each = length(time))
  }
  time <- rep(time, times = ncol)
  dplyr::tibble(!!group := groups,
                !!name := time,
                !!value := as.vector(x))
}

Calculation

library(dplyr)
library(lubridate)
library(tidyr)
for (i in seq_along(all_stocks)) {
  stock <- xts_as_tibble(all_stocks[[i]])
  positive_name <- paste0(names(all_stocks)[i], ".Return.Positive")
  return_name <- paste0(names(all_stocks)[i], ".Return")
  
  positive_stocks[[i]] <- stock %>%
    filter(group %in% c(return_name, positive_name)) %>%
    pivot_wider(names_from = group, values_from = value) %>%
    mutate(month = month(time, label = TRUE)) %>%
    group_by(month) %>%
    summarise(mean_return = mean(.data[[return_name]]),
              proportion_pos = sum(.data[[positive_name]])/n())
}
names(positive_stocks) <- names(all_stocks)
positive_stocks[1]
$AA
# A tibble: 12 × 3
   month mean_return proportion_pos
   <ord>       <dbl>          <dbl>
 1 Jan       -0.459           0.4  
 2 Feb        1.33            0.4  
 3 Mar       -0.187           0.4  
 4 Apr       -1.26            0.6  
 5 May       -2.35            0.3  
 6 Jun       -1.50            0.455
 7 Jul        1.38            0.727
 8 Aug        0.370           0.417
 9 Sep       -0.963           0.6  
10 Oct       -0.0321          0.6  
11 Nov        1.46            0.7  
12 Dec        1.52            0.6 
2
G. Grothendieck On

Normally the return is based on the close to close value but we assume that you really did intend the open to close value.

First we read the stock data into environment e, calculate and append Return and Positive, remove the symbol names from the column names, convert to data.frame and bind the components together as a data.table. Then calculate the means by month and the mean by symbol.

library(data.table)
library(quantmod)

Symbols <- c("AA","AAL","AAOI")
start_date <- as.Date("2013-06-01")
symbols <- getSymbols(Symbols, from = start_date, periodicity = 'monthly',
  env = e <- new.env())

# remove chars before dot in each colname of input, x,
# and return the modified object
rmPrefix <- function(x) setNames(x, sub(".*\\.", "", names(x)))

# data.table with Return and Positive columns added
stock_data <- Map(function(nm, x = e[[nm]], d = Cl(x) - Op(x)) x |>
      merge(setNames(d, paste(nm, "Return", sep = "."))) |>
      merge(setNames(d > 0, paste(nm, "Positive", sep = "."))) |>
      rmPrefix() |>
      fortify.zoo(), symbols) |>
    rbindlist(idcol = "Symbol")
stock_data[, Month := cycle(as.yearmon(Index))]     # append Month column

# avg Return and avg Positive by month - result is 12x3
stock_month <- stock_data[, lapply(.(Return = Return, Positive = Positive), mean),
  by = Month][order(Month)]

# avg Return and avg Positive by Symbol - result is 3x3
stock_sym <- stock_data[, lapply(.(Return = Return, Positive = Positive), mean),
  by = Symbol][order(Symbol)]

# avg Return and avg Positive by Symbol and month - result is 36x3
stock_sym_month <- 
  stock_data[, lapply(.(Return = Return, Positive = Positive), mean),
  by = .(Symbol, Month)][order(Symbol, Month)]