Annex. 4 Codes – Final Master’s Project

The code used to carry out the procedure described in the development of the work is presented below.

Data

Data Collection

The first thing that was done was to load the table of companies.

empresas <- read_excel("data/000_empresas.xlsx")

Then the ticks of the companies were extracted.

ticks <- empresas |> 
  select(TICKERS) |> 
  pull()

Once the ticks of the companies were stored in the ticks variable, we proceeded to download the data corresponding to said companies from Yahoo Finance using the quantmod package from Ryan and Ulrich (2023).

nombres_colum <- c("Date","Open","High","Low","Close","Volume","Adjusted")
qmd_data <- list()
for (i in 1:length(ticks)) {
  tick <- ticks[i]
  value <- getSymbols(
    tick,
    from = "2000-01-02",
    to = "2023-03-01",
    auto.assign = F,
    periodicity = "monthly") |>
    as.data.frame()
  dates <- row.names(value)
  row.names(value) <- NULL
  value <- cbind(dates,value)
  names(value) <- nombres_colum
  qmd_data[[tick]] <-  value
}

With the objective of carrying out an exploratory analysis of the data, it was decided to carry out a visual evaluation of the historical data of the adjusted price for what was executed:

lapply(qmd_data, function(x){
  x |>
    ggplot(aes(x=as.Date(Date), y=Adjusted))+
             geom_line(color="#065AD8")
})

After the visual analysis executed with the previous code fragment, the existence of constant prices was detected, as well as erroneous calculations in the adjusted price corresponding to the first years of some series. In order to eliminate these irregularities, only those observations after January 2005 were selected.

returns_emps <- qmd_data |>
  lapply(function(x){
    emps <- x |>
      filter(Date >= "2005-01-31")
  })

In order to determine if the data that had been imported had missing values, the following code was executed:

na_values <- returns_emps |>
  sapply(function(x){
    na <- length(which(is.na(x)))
  })
emp_con_na <- which(na_values > 0)

In order to solve the problem regarding the incorrect recording of the data, it was decided to eliminate those that did not present price variations in more than 10 observations. For which, the returns were first computed by executing the following code, through which the series with missing values were also eliminated.

returns_emps2 <- returns_emps[-emp_con_na] |>
  lapply(function(x){
    returns <- x |>
      select(Date, Adjusted) |>
      mutate(Return_Ad = Delt(Adjusted)[,1]) |>
      na.omit() |>
      select(Date, Return_Ad)
  })

Once the returns were computed, those series that presented 0 returns in more than 10 observations were eliminated, for which the following code was executed.

zero_values <- returns_emps2 |>
  sapply(function(x){
    zeros <- length(which(x[,2]==0))
  })
returns_emps3 <- returns_emps2[zero_values<10]

Indicators

Below is the code used during the process described in the indicators sub-heading of chapter 2.

First, the IBEX data were downloaded, the returns of the adjusted price of the same were computed and the values after January 2005 were selected.

# Importing IBEX
IBEXsel <- getSymbols(
  "^IBEX",
  from = "1990-01-01",
  to = "2023-03-01",
  auto.assign = F,
  periodicity = "monthly") |>
  as.data.frame()
dates <- row.names(IBEXsel)
row.names(IBEXsel) <- NULL
IBEXsel <- cbind(dates,IBEXsel)
names(IBEXsel) <- nombres_colum

# Calculating profitability and selecting observations after January 2005.

IBEXsel <- IBEXsel |>
  mutate(Return_I = Delt(Adjusted)[,1]) |>
  na.omit() |>
  filter(Date >= "2005-01-31") |>
  select(Date, Return_I)

Then the values of the IBEX returns were added to the tables of the returns of the shares of the selected companies, and the variables listed below were computed and added to each of the tables:

Company volatility
Index volatility
Correlation between the profitability of the company and the index
The Beta between the company and the index

returns_indc <- returns_emps3 |>
  lapply(function(x, ind = IBEXsel){
    emp <- x |>
      left_join(ind) |>
      mutate(
        VE = sqrt(cumsum((Return_Ad - cummean(Return_Ad))^2)/1:length(Return_Ad)),
        VI = sqrt(cumsum((Return_I - cummean(Return_I))^2)/1:length(Return_I)),
        Cor = cumsum((Return_Ad-cummean(Return_Ad))*(Return_I-cummean(Return_I)))/(sqrt(cumsum((Return_Ad-cummean(Return_Ad))^2))*sqrt(cumsum((Return_I-cummean(Return_I))^2)))
      )|>
      na.omit() |>
      mutate(
        Beta = (Cor*VE)/VI
      )
  })

Vectors

Below is the code used during the process described in the sub-heading vectors of the heading modelling in Chapter 2.

The first step carried out for the execution of the process explained in the sub-section in question was to create a function that allowed obtaining the consecutive samples for each series used. The function exposed below, as already mentioned, allows obtaining the consecutive samples of a series, for which the parameters mentioned in the sub-heading are used, number of input observations and number of output observations, as well as a parameter conditional with which it is indicated if the vector to be created is input or output.

vector2dmaker <- function(vec, ent, sal, eos=T){
  if(eos==T){
    emp <- 1
    term <- (length(vec) - (ent+sal-1))
    ob <- ent
  }else{
    emp <- ent + 1
    term <- (length(vec)-sal+1)
    ob <- sal
  }
  
  vec2d <- sapply(emp:term,
               function(x) vec[x:(x + ob-1)]) |>
    matrix(nrow = ob) |>
    t()
  
  return(vec2d)
}

Below is the code used to create the input vectors corresponding to each of the series. For which two functions were first created, one for the inputs and the other for the outputs.

# Function that will be used to create the three-dimensional inputs

input3dmaker <- function(x,inp,out){
  empre <- x
  series <- 2:dim(x)[2]
  for (i in series) {
    if(i==series[1]){
      vec3d <- vector2dmaker(empre[[i]],ent=inp,sal=out)
    }else{
      vec3d <- abind(vec3d,vector2dmaker(empre[[i]],ent=inp,sal=out), along = 3)
    }
  }
  return(vec3d)
}

# Function to be used to create the three-dimensional outputs

output3dmaker <- function(x,inp,out){
  empre <- x[["Return_Ad"]]
  vec3d <- vector2dmaker(empre,ent=inp,sal=out,F)
  dim(vec3d) <- c(dim(vec3d),1)
  return(vec3d)
}

Then the lists of three-dimensional vectors of inputs and outputs per company were created, executing the following code another two times with the aim of creating the lists vecs3d2e and vecs3d3e that correspond to those cases in which 2 and 3 inputs were selected.

# The time horizon is defined
ht <- 1

# The input observations are defined
oe <- 1

# 3d input vectors and 2d output vectors are created for input size 1
vecs3d1e <- list()
for(i in 1:length(returns_indc)){
  emp <- returns_indc[[i]]
  inps <- input3dmaker(emp, oe, ht)
  outs <- output3dmaker(emp, oe, ht)
  dates <- emp[(oe + ht):dim(emp)[1],1]
  id <- rep(names(returns_indc)[i],length(dates))
  tibblex <- tibble(
    Date = dates,
    ID = id,
    inputs = inps,
    outputs = outs
  )
  vecs3d1e[[names(returns_indc)[i]]] <- tibblex
}

Modelling and training

The code used during the process described in the different sub-sections of the Modelling and training section is presented below.

Modelling

For the creation of the models, the first step to execute is to obtain the information of the vectors for which the model is going to be built, which was done by executing the following code:

data <- bind_rows(vecs3d1e)
data <- data  |>
  arrange(Date)
inputsinfo <- data|>
  select(inputs) |>
  pull() |>
  dim()
outputsinfo <- data|>
  select(outputs) |>
  pull() |>
  dim()

# Define parameters
n_ob_pas <- inputsinfo[2]
n_variables <- inputsinfo[3]
n_ob_fut <- outputsinfo[2]

Then the structure of the models was constituted with the aspects described in 2.5.1 Modelling.

# Input layer
inp <- layer_input(
  shape = c(NULL,n_ob_pas,n_variables))

# Hidden layers
# - CNN
cnn <- inp |>
  layer_conv_1d(
    filters = 64,
    kernel_size = 1,
    activation = layer_activation_leaky_relu())
# - LSTM
lstm <- cnn |>
  layer_lstm(64)

# Output layer
out <- lstm |> 
  layer_dense(
    n_ob_fut*1)

# Join the layers to constitute the model 
model <- keras_model(inp, out)
# Setting learning parameters
model |> 
  compile(loss = "mse", optimizer = optimizer_sgd(0.0005))

Note

You can find untrained models in the data folder of the repository where this work is located. The models were saved using the hdf5 extension and under the names model1e, model2e and model3e.

Training

The first step is to define the function to use for training the models. This function was built with the goal of using the training method described in 2.5.2 Training. As a result, this function will return a list that will contain the predictions obtained and the model after having been trained and will take as main inputs the tibble called data constituted in the first step that is exposed in this annex’s section Modelling and the model as well. of other arguments that allows the use of the function with some main inputs that are not used in the present work.

wfv_train <- function(x, modelo, seq_var_name, inp_var_name = "inputs", out_var_name = "outputs", progress_bar=T){
  
  predictions <- c()
  seq_val <- unique(x[[seq_var_name]])
  
  if(progress_bar){
    pb <- txtProgressBar(min = 0, max = length(seq_val), initial = 0, style = 3)
  }
  
  
  # Iteration that will be executed for each unique value in the variable that defines the data sequence. For this reason it is of vital importance that the data in tibble x be ordered by the sequence variable whose name is passed to seq_var_name
  
  for (i in 1:length(seq_val)) {
    val_seq <- seq_val[i]
    
    # Extract inputs and outputs corresponding to the period in the current sequence variable
    inputs <- x |>
      filter(!!sym(seq_var_name) == val_seq) |>
      select(!!sym(inp_var_name)) |>
      pull()
    outputs <- x |>
      filter(!!sym(seq_var_name) == val_seq) |>
      select(!!sym(out_var_name)) |>
      pull()
    outputs <- outputs[,,1]
    
    # Use inputs to get forecasts for all periods in the sequence variable except for the first
    if(i > 1){
      pred <- modelo |>
        predict(inputs, verbose = 3)
      predictions <- rbind(predictions, pred)
    }
    
    # Train the model
    modelo |>
      fit(
        inputs,
        outputs,
        epochs = 1,
        batch_size = 10,
        shuffle = F,
        verbose = 0)
    
    if(progress_bar){
      setTxtProgressBar(pb,i)
      }
    
  }
  
  if(progress_bar){
    close(pb)
  }
  
  results <- list()
  results[['predicciones']] <- predictions
  results[['modelo']] <- modelo
  return(results)
}

Once the function was created, the predictions were obtained using the following code:

resultados <- wfv_train(data,model,'Date')
predicciones1e <- resultados$predicciones

Note

You can find trained models in the data folder of the repository where this work is located. The models were saved using the hdf5 extension and under the names model1etd, model2etd and model3etd.

As explained in 2.6.1 Predictions, in addition to the predictions obtained by the models, predictions obtained from the use of the arithmetic mean were computed, to compare with those obtained with the models. To compute these predictions, the following function was created:

wfv_means <- function(x, seq_var_name, inp_var_name = "inputs", out_var_name = "outputs", id_var_name, progress_bar=T){
  
  means <- c()
  seq_val <- unique(x[[seq_var_name]])
  
  if(progress_bar){
    pb <- txtProgressBar(min = 0, max = length(seq_val), initial = 0, style = 3)
  }
  
  for (i in 1:length(seq_val)) {
    val_seq <- seq_val[i]
    inputs <- x |>
      filter(!!sym(seq_var_name) == val_seq) |>
      select(!!sym(inp_var_name)) |>
      pull()
    inputspred <- x |>
      filter(!!sym(seq_var_name) == val_seq) |>
      select(!!sym(inp_var_name)) |>
      pull()
    outputs <- x |>
      filter(!!sym(seq_var_name) == val_seq) |>
      select(!!sym(out_var_name)) |>
      pull()
    outputs <- outputs[,,1]
    
    ids <- x |>
      filter(!!sym(seq_var_name) == val_seq) |>
      select(!!sym(id_var_name)) |>
      pull()
    
    if(i==1){
      dfmeans <- inputs[,,1] |>
        as.data.frame() |>
        cbind(ID = ids)
    }else{
      dfmeansupd <- inputs[,dim(inputs)[2],1] |>
        as.data.frame() |>
        cbind(ID = ids)
      names(dfmeansupd)[1] <- paste0("V",(dim(dfmeans)[2]))
      idsdf <- unique(c(ids, dfmeans[[id_var_name]]))
      idsdf <- data.frame(ID = idsdf)
      dfmeansupd <- dplyr::left_join(idsdf, dfmeansupd, by = "ID")
      ifelse(
        dim(dfmeansupd)[1] > dim(dfmeans)[1],
        dfmeans <- dplyr::left_join(dfmeansupd, dfmeans, by = "ID"),
        dfmeans <- dplyr::left_join(dfmeans, dfmeansupd, by = "ID")
        )
    }
    
    if(i > 1){
      MEANS <-  dfmeans |>
        rowwise() |>
        mutate(
          means = mean(c_across(-!!sym(id_var_name)), na.rm = T)) |>
        slice(match(ids,!!sym(id_var_name))) |>
        pull(means) |>
        as.matrix()
      means <- rbind(means, MEANS)
    }
    
    if(progress_bar){
      setTxtProgressBar(pb,i)
    }
    
  }
  
  if(progress_bar){
    close(pb)
  }
  
  return(means)
}

Once the function was created, the predictions were obtained using the following code:

meanse1 <- wfv_means(data,'Date',id_var_name = "ID")

Note

In addition to what was previously stated, two functions getconfig and plot_modelk were created, in the .Rprofile file of the repository in which this work is found, which allow graphing the structure of the models by using the Iannone (2023) package, as seen in the ?fig-estructuras. The code to use would be:

# The functions are created to graph the structures used in this work.

model |>
  getconfig() |>
  plot_modelk() |>
  grViz()

The procedure exposed in the sections Modelling and Training of this annex was repeated to build the 10 models made from each group of three-dimensional vectors, replacing the call to vecs3d1e with in the first code exposed. vecs3d2e and vecs3d3e, depending on the group of three-dimensional vectors used.

Result

The code used during the process described in the different sub-sections of the Result section is presented below.

Predictions

The analysis exposed in 2.6.1 Predictions was carried out from graphs (see Figure 8, Figure 9 and Figure 10), which show the values of the \(MSE\) and \(R^2\) indicators. for each of the structures tested.

The first step to obtain these graphs was to compute the indicators, for each period of time, for each of the predictions obtained from the different models built with each structure. This is done using the following code.

# Extract actual outputs
salidas <- data |>
  filter(
    Date > data$Date[1]
  ) |>
  select(outputs) |>
  pull()
salidas <- salidas[,,1]

# Compute MSE and R2 indicators
indicadores <- data |>
  filter(Date > data$Date[1]) |>
  cbind(predicciones = predicciones1e[,1]) |>
  cbind(means = meanse1) |>
  mutate(salidas = salidas) |>
  select(Date, predicciones, means, salidas) |>
  group_by(Date) |>
  summarise(
    r2 = 1 - (sum((salidas - predicciones)^2)/sum((salidas - means)^2)),
    mse = mse(predicciones, salidas),
  )

The different indicators computed for each of the 10 models trained with each of the structures were stored in a list called list_indicadores. This is done using the following code:

list_indicadores[["indicadores1"]] <-  indicadores

Once this is done, a list is obtained that contains 10 data frames (indicadores1,…,indicadores10), which in turn contain the values of those of \(MSE\) and \(R^2\) of the predictions obtained by RNA models for each of the companies grouped by date. So, then the graph was built by using the following code.

# Group the information of the different constructions in a single data frame
indi_graf_data <- do.call(cbind,list_indicadores)

# Obtain the average results, for each period of time, using the different constructions
indi_graf_data |>
  rowwise() |>
  mutate(
    Date = `indicadores1.Date`,
    meanmse = mean(c_across(contains("mse"))),
    meanr2 = mean(c_across(contains("r2")))
    ) |>
  select(
    Date, meanmse,meanr2
  )|>
  # Graph
  mutate(
    Date = as.Date(Date)) |>
  ggplot(aes(x = Date, group = 1)) +
  geom_line(aes(y = meanmse, color = "MSE")) +
  geom_line(aes(y = meanr2, color = "R2")) +
  scale_color_manual(values = c("blue", "green")) +
  theme(axis.text.x = element_text(angle = 90)) +
  labs(x = "Date", y = "Indicators", color = "Indicators")

In addition to the graphs, the Table 4 was also used in the analysis of the results, which contains the companies that obtained the best and worst indicators for each structure. To obtain these data, the following code was used:

indicadores_X_emp <- data |>
  filter(Date > data$Date[1]) |>
  cbind(predicciones = predicciones1e[,1]) |>
  cbind(means = meanse1) |>
  mutate(salidas = salidas) |>
  select(Date, predicciones, means, salidas, ID) |>
  group_by(ID) |>
  summarise(
    r2 = 1 - (sum((salidas - predicciones)^2)/sum((salidas - means)^2)),
    mse = mse(predicciones, salidas)
  ) |>
  select(ID, r2, mse)

Like the indicators computed by date, to save the indicators computed by company, a list called list_indic_emp was created. After having stored the 10 indicator data frames per company in the list, the companies with the best and worst results were extracted using the following code:

# Group the information of the different constructions in a single data frame
ind_emp_t <- do.call(rbind, list_indic_emp)

# Compute the average R2 and MSE by company
ind_emp_t <- ind_emp_t |>
  group_by(ID) |>
  summarize(
    r2 = mean(r2),
    mse= mean(mse)) |>
  ungroup() |>
  arrange(desc(r2))

# Obtain the 10 companies with the best and worst indicators
mejores10 <- head(ind_emp_t,10)
peores10 <- tail(ind_emp_t,10)

And using the above variables and using the rbind() and cbind functions was how the Table 4 were created.

Portfolio composition

This section explains how the analysis of the comparison of the results obtained by the different portfolios was carried out (see Figure 11, Figure 12 and Figure 13). For this, it is first necessary to obtain the composition of the portfolios, by date, from the predictions obtained by using the arithmetic means and the RNA models.

To calculate the composition of the portfolios, the R package Berwin A. Turlach R port by Andreas Weingessel <Andreas.Weingessel@ci.tuwien.ac.at> Fortran contributions from Cleve Moler dpodi/LINPACK) (2019) was used. Below is the code used to find the composition of the portfolios from the predictions of the mean:

# A data frame was created in which all the information was stored:
#   - IBEX values, as a reference index
#   - Values of the predictions, both those obtained by the RNA model and by the arithmetic means

DATA <- data |>
  left_join(IBEXsel, by ="Date") |>
  mutate(IBEX = Return_I) |>
  arrange(Date) |>
  filter(
    Date > data$Date[1]
  ) |>
  mutate(predicciones = predicciones1e[,1]) |>
  mutate(
    Real = salidas,
    RNA = predicciones,
    Means = meanse1
  ) |>
  select(Date, Real, IBEX, RNA, Means, ID)

# From the data frame DATA were created:
#    - A data frame whose columns are the actual data of each of the companies for each of the time periods for which predictions were obtained.
#    - A data frame whose columns are the data obtained by using the arithmetic means of each of the companies for each of the time periods for which predictions were obtained.

pvtReal <- DATA |>
  select(Date, Real, ID) |>
  pivot_wider(
    names_from = ID,
    values_from = Real
  )

pvtMeans <- DATA |>
  select(Date, Means, ID) |>
  pivot_wider(
    names_from = ID,
    values_from = Means
  )

# The data frame was created in which the composition of the portfolios was stored for each of the periods for which the prediction was obtained.
weightsm <- data.frame()

# Iteration by which the composition of the portfolios is found

pb <- txtProgressBar(min = 0, max = length(unique(data$Date)[-1]), initial = 0, style = 3)

for (i in 1:length(unique(data$Date)[-1])) {
  if(i>1){
    
    # The data frame is created that includes the data to be used to find the composition of the portfolio, this is created by the actual data to date and the forecast for the next period.
    datamQP <- pvtReal |>
      filter(Date < unique(data$Date)[-1][i]) |>
      rbind(pvtMeans |>
              filter(Date == unique(data$Date)[-1][i])
      )
    
    # Eliminate those companies that do not have actual or forecast data
    nare <- which(is.na(datamQP[dim(datamQP)[1],]))
    naremo <- which(is.na(datamQP[(dim(datamQP)[1]-1),]))
    nare <- c(nare,naremo)
    nare <- unique(nare)
    if(length(nare) != 0){
      carteram <- datamQP[, - nare]
    }else{
      carteram <- datamQP
    }
    
    # Extract the forecasts
    returnm <- carteram[dim(carteram)[1], -1] |>
      as.matrix() |>
      t()
    
    # Calculate the covariance matrix
    covmm <- cov(carteram[, -1], use = "complete.obs")
    npcovmm <- nearPD(covmm)$mat |> 
      as.matrix()
    # Extract the number of companies
    n <- ncol(npcovmm)
    
    # Find the composition of the portfolio
    qp_outm <- solve.QP(
      Dmat = 2*npcovmm,
      dvec = rep(0,n),
      Amat = cbind(-1, diag(n)),
      bvec = c(-1, rep(0,n)),
      meq = 1)
    qp_outm <- qp_outm$solution
    qp_outm <- floor(qp_outm*100)/100
    for(j in 1:length(qp_outm)){
      if(qp_outm[j] < 0.001){
        qp_outm[j] <- 0
      }else{}
    }
    
    # Save portfolio composition
    names(qp_outm) <- names(carteram[, -1])
    weightsm <- bind_rows(weightsm, qp_outm)
  }
  
  setTxtProgressBar(pb,i)
}

close(pb)

# Replace actual weights and observations with missing values with zero
pvtReal[is.na(pvtReal)] <- 0
weightsm[is.na(weightsm)] <- 0

Then, to find the profitability of the portfolio, the compositions were multiplied by the real returns, it was assumed that one was invested in the first period and a cumulative sum was made throughout the values to obtain the behaviour of the profitability throughout the period. time.

# Finding the returns of the portfolios formed from the predictions of the arithmetic mean

return_CM <-  weightsm * pvtReal[-1,-1]
return_CM <- rowSums(return_CM)
return_CM <- c(1,return_CM)
return_CM <- data.frame(
  Date = pvtReal[,1],
  Mean = return_CM
)

The same steps that were carried out to find the behaviour of the profitability of the portfolios from the arithmetic means were carried out to find the behaviour from the predictions obtained by the RNA model as seen in the code below.

# From the DATA data frame, a data frame was created whose columns are the data obtained by using the RNA model of each of the companies for each of the time periods for which predictions were obtained.

pvtRNA <- DATA |>
  select(Date, RNA, ID) |>
  pivot_wider(
    names_from = ID,
    values_from = RNA
  )

# The data frame was created in which the composition of the portfolios was stored for each of the periods for which the prediction was obtained.
weightse <- data.frame()

# Iteration by which the composition of the portfolios is found

pb <- txtProgressBar(min = 0, max = length(unique(data$Date)[-1]), initial = 0, style = 3)

for (i in 1:length(unique(data$Date)[-1])) {
  if(i>1){
    # The data frame is created that includes the data to be used to find the composition of the portfolio, this is created by the actual data to date and the forecast for the next period.
    dataeQP <- pvtReal |>
      filter(Date < unique(data$Date)[-1][i]) |>
      rbind(pvtRNA |>
              filter(Date == unique(data$Date)[-1][-1][i])
            )
    # Eliminate those companies that do not have actual or forecast data
    nare <- which(is.na(dataeQP[dim(dataeQP)[1],]))
    naremo <- which(is.na(dataeQP[(dim(dataeQP)[1]-1),]))
    nare <- c(nare,naremo)
    nare <- unique(nare)
    if(length(nare) != 0){
      carterae <- dataeQP[, - nare]
    }else{
      carterae <- dataeQP
    }
    
    # Extract forecasts
    returne <- carterae[dim(carterae)[1], -1] |>
      as.matrix() |>
      t()
    
    # Calculate the covariance matrix
    covme <- cov(carterae[, -1], use = "complete.obs")
    npcovme <- nearPD(covme)$mat |> 
      as.matrix()
    # Extract the number of companies
    n <- ncol(npcovme)
    
    # Find the composition of the portfolio
    qp_oute <- solve.QP(
      Dmat = 2*npcovme,
      dvec = rep(0,n),
      Amat = cbind(-1, diag(n)),
      bvec = c(-1, rep(0,n)),
      meq = 1)
    qp_oute <- qp_oute$solution
    qp_oute <- floor(qp_oute*100)/100
    for(j in 1:length(qp_oute)){
      if(qp_oute[j] < 0.001){
        qp_oute[j] <- 0
      }else{}
    }
    
    # Save portfolio composition
    names(qp_oute) <- names(carterae[, -1])
    weightse <- bind_rows(weightse, qp_oute)
  }
  
  setTxtProgressBar(pb,i)
}

close(pb)

# Replace weights with missing values with zero
weightse[is.na(weightse)] <- 0

# Finding the returns of the portfolios formed from the predictions of the RNA model

return_CRNA <-  weightse * pvtReal[-1,-1]
return_CRNA <- rowSums(return_CRNA)
return_CRNA <- c(1,return_CRNA)
return_CRNA <- data.frame(
  Date = pvtReal[,1],
  RNA = return_CRNA
)

Then, as with the indicators, a list_ret_RNA list was created in which the data frames of the different models built with each of the structures were stored. Then the following code was executed to obtain the graph.

# Finding the behaviour of the returns of the IBEX for the period

IBEXvals <- IBEXsel |>
    filter(Date > unique(data$Date)[2]) |>
    select(2) |>
    pull()
IBEXvals <- c(1, IBEXvals)

data_rent_RNA <- do.call(cbind,list_ret_RNA)
data_rent_RNA <- data_rent_RNA |>
  mutate(
    Date = RNA1.Date,
    IBEX = IBEXvals,
    Means = return_CM$Mean) |>
  mutate_at(vars(contains(".RNA")), ~ cumsum(.)) |>
  mutate(
    IBEX = cumsum(IBEX),
    Means = cumsum(Means)) |>
  group_by(Date) |>
  summarize(
    meanRNA = mean(c_across(contains(".RNA"))),
    max_y = max(c_across(contains(".RNA"))),
    min_y = min(c_across(contains(".RNA"))),
    min_5 = unname(quantile(c_across(contains(".RNA")),0.05)),
    max_95 = unname(quantile(c_across(contains(".RNA")),0.95)),
    IBEX = IBEX,
    Means = Means)
data_rent_RNA |>
  mutate(
    Date = as.Date(Date)) |>
ggplot(aes(x = Date)) +
  geom_ribbon(aes(ymin = min_y, ymax = min_5), fill = "blue", alpha = 0.3) +
  geom_ribbon(aes(ymin = max_y, ymax = max_95), fill = "blue", alpha = 0.3) +
  geom_ribbon(aes(ymin = min_5, ymax = max_95), fill = "blue", alpha = 0.6) +
  geom_line(
    aes(y = meanRNA, color = "Media RNA1"),
    linetype = "dashed") +
  geom_line(aes(y = max_y), color = "blue") +
  geom_line(aes(y = min_y), color = "blue") +
  geom_line(aes(y = max_95), color = "blue") +
  geom_line(aes(y = min_5, color = "RNA1")) +
  geom_line(aes(y = IBEX, color = "IBEX")) +
  geom_line(aes(y = Means, color = "Medias")) +
  scale_color_manual(
    values = c(
      "Media RNA1"="blue",
      "RNA1" = "blue",
      "IBEX" = "red",
      "Medias" = "green")) +
  guides(
    color = guide_legend(
      override.aes = list(
        linetype = c("solid","dashed","solid","solid"))))+
  labs(x = "Date",
       y = "Returns",
       color = "Legenda")+
  theme_minimal()