<- read_excel("data/000_empresas.xlsx") empresas
The code used to carry out the procedure described in the development of the work is presented below.
Data
Data Collection
The first thing that was done was to load the table of companies.
Then the ticks of the companies were extracted.
<- empresas |>
ticks select(TICKERS) |>
pull()
Once the ticks of the companies were stored in the ticks
variable, we proceeded to download the data corresponding to said companies from Yahoo Finance using the quantmod package from Ryan and Ulrich (2023).
<- c("Date","Open","High","Low","Close","Volume","Adjusted")
nombres_colum <- list()
qmd_data for (i in 1:length(ticks)) {
<- ticks[i]
tick <- getSymbols(
value
tick,from = "2000-01-02",
to = "2023-03-01",
auto.assign = F,
periodicity = "monthly") |>
as.data.frame()
<- row.names(value)
dates row.names(value) <- NULL
<- cbind(dates,value)
value names(value) <- nombres_colum
<- value
qmd_data[[tick]] }
With the objective of carrying out an exploratory analysis of the data, it was decided to carry out a visual evaluation of the historical data of the adjusted price for what was executed:
lapply(qmd_data, function(x){
|>
x ggplot(aes(x=as.Date(Date), y=Adjusted))+
geom_line(color="#065AD8")
})
After the visual analysis executed with the previous code fragment, the existence of constant prices was detected, as well as erroneous calculations in the adjusted price corresponding to the first years of some series. In order to eliminate these irregularities, only those observations after January 2005 were selected.
<- qmd_data |>
returns_emps lapply(function(x){
<- x |>
emps filter(Date >= "2005-01-31")
})
In order to determine if the data that had been imported had missing values, the following code was executed:
<- returns_emps |>
na_values sapply(function(x){
<- length(which(is.na(x)))
na
})<- which(na_values > 0) emp_con_na
In order to solve the problem regarding the incorrect recording of the data, it was decided to eliminate those that did not present price variations in more than 10 observations. For which, the returns were first computed by executing the following code, through which the series with missing values were also eliminated.
<- returns_emps[-emp_con_na] |>
returns_emps2 lapply(function(x){
<- x |>
returns select(Date, Adjusted) |>
mutate(Return_Ad = Delt(Adjusted)[,1]) |>
na.omit() |>
select(Date, Return_Ad)
})
Once the returns were computed, those series that presented 0 returns in more than 10 observations were eliminated, for which the following code was executed.
<- returns_emps2 |>
zero_values sapply(function(x){
<- length(which(x[,2]==0))
zeros
})<- returns_emps2[zero_values<10] returns_emps3
Indicators
Below is the code used during the process described in the indicators sub-heading of chapter 2.
First, the IBEX data were downloaded, the returns of the adjusted price of the same were computed and the values after January 2005 were selected.
# Importing IBEX
<- getSymbols(
IBEXsel "^IBEX",
from = "1990-01-01",
to = "2023-03-01",
auto.assign = F,
periodicity = "monthly") |>
as.data.frame()
<- row.names(IBEXsel)
dates row.names(IBEXsel) <- NULL
<- cbind(dates,IBEXsel)
IBEXsel names(IBEXsel) <- nombres_colum
# Calculating profitability and selecting observations after January 2005.
<- IBEXsel |>
IBEXsel mutate(Return_I = Delt(Adjusted)[,1]) |>
na.omit() |>
filter(Date >= "2005-01-31") |>
select(Date, Return_I)
Then the values of the IBEX returns were added to the tables of the returns of the shares of the selected companies, and the variables listed below were computed and added to each of the tables:
- Company volatility
- Index volatility
- Correlation between the profitability of the company and the index
- The Beta between the company and the index
<- returns_emps3 |>
returns_indc lapply(function(x, ind = IBEXsel){
<- x |>
emp left_join(ind) |>
mutate(
VE = sqrt(cumsum((Return_Ad - cummean(Return_Ad))^2)/1:length(Return_Ad)),
VI = sqrt(cumsum((Return_I - cummean(Return_I))^2)/1:length(Return_I)),
Cor = cumsum((Return_Ad-cummean(Return_Ad))*(Return_I-cummean(Return_I)))/(sqrt(cumsum((Return_Ad-cummean(Return_Ad))^2))*sqrt(cumsum((Return_I-cummean(Return_I))^2)))
|>
)na.omit() |>
mutate(
Beta = (Cor*VE)/VI
) })
Vectors
Below is the code used during the process described in the sub-heading vectors of the heading modelling in Chapter 2.
The first step carried out for the execution of the process explained in the sub-section in question was to create a function that allowed obtaining the consecutive samples for each series used. The function exposed below, as already mentioned, allows obtaining the consecutive samples of a series, for which the parameters mentioned in the sub-heading are used, number of input observations and number of output observations, as well as a parameter conditional with which it is indicated if the vector to be created is input or output.
<- function(vec, ent, sal, eos=T){
vector2dmaker if(eos==T){
<- 1
emp <- (length(vec) - (ent+sal-1))
term <- ent
ob else{
}<- ent + 1
emp <- (length(vec)-sal+1)
term <- sal
ob
}
<- sapply(emp:term,
vec2d function(x) vec[x:(x + ob-1)]) |>
matrix(nrow = ob) |>
t()
return(vec2d)
}
Below is the code used to create the input vectors corresponding to each of the series. For which two functions were first created, one for the inputs and the other for the outputs.
# Function that will be used to create the three-dimensional inputs
<- function(x,inp,out){
input3dmaker <- x
empre <- 2:dim(x)[2]
series for (i in series) {
if(i==series[1]){
<- vector2dmaker(empre[[i]],ent=inp,sal=out)
vec3d else{
}<- abind(vec3d,vector2dmaker(empre[[i]],ent=inp,sal=out), along = 3)
vec3d
}
}return(vec3d)
}
# Function to be used to create the three-dimensional outputs
<- function(x,inp,out){
output3dmaker <- x[["Return_Ad"]]
empre <- vector2dmaker(empre,ent=inp,sal=out,F)
vec3d dim(vec3d) <- c(dim(vec3d),1)
return(vec3d)
}
Then the lists of three-dimensional vectors of inputs and outputs per company were created, executing the following code another two times with the aim of creating the lists vecs3d2e
and vecs3d3e
that correspond to those cases in which 2 and 3 inputs were selected.
# The time horizon is defined
<- 1
ht
# The input observations are defined
<- 1
oe
# 3d input vectors and 2d output vectors are created for input size 1
<- list()
vecs3d1e for(i in 1:length(returns_indc)){
<- returns_indc[[i]]
emp <- input3dmaker(emp, oe, ht)
inps <- output3dmaker(emp, oe, ht)
outs <- emp[(oe + ht):dim(emp)[1],1]
dates <- rep(names(returns_indc)[i],length(dates))
id <- tibble(
tibblex Date = dates,
ID = id,
inputs = inps,
outputs = outs
)names(returns_indc)[i]]] <- tibblex
vecs3d1e[[ }
Modelling and training
The code used during the process described in the different sub-sections of the Modelling and training section is presented below.
Modelling
For the creation of the models, the first step to execute is to obtain the information of the vectors for which the model is going to be built, which was done by executing the following code:
<- bind_rows(vecs3d1e)
data <- data |>
data arrange(Date)
<- data|>
inputsinfo select(inputs) |>
pull() |>
dim()
<- data|>
outputsinfo select(outputs) |>
pull() |>
dim()
# Define parameters
<- inputsinfo[2]
n_ob_pas <- inputsinfo[3]
n_variables <- outputsinfo[2] n_ob_fut
Then the structure of the models was constituted with the aspects described in 2.5.1 Modelling.
# Input layer
<- layer_input(
inp shape = c(NULL,n_ob_pas,n_variables))
# Hidden layers
# - CNN
<- inp |>
cnn layer_conv_1d(
filters = 64,
kernel_size = 1,
activation = layer_activation_leaky_relu())
# - LSTM
<- cnn |>
lstm layer_lstm(64)
# Output layer
<- lstm |>
out layer_dense(
*1)
n_ob_fut
# Join the layers to constitute the model
<- keras_model(inp, out)
model # Setting learning parameters
|>
model compile(loss = "mse", optimizer = optimizer_sgd(0.0005))
You can find untrained models in the data
folder of the repository where this work is located. The models were saved using the hdf5
extension and under the names model1e
, model2e
and model3e
.
Training
The first step is to define the function to use for training the models. This function was built with the goal of using the training method described in 2.5.2 Training. As a result, this function will return a list that will contain the predictions obtained and the model after having been trained and will take as main inputs the tibble called data
constituted in the first step that is exposed in this annex’s section Modelling and the model as well. of other arguments that allows the use of the function with some main inputs that are not used in the present work.
<- function(x, modelo, seq_var_name, inp_var_name = "inputs", out_var_name = "outputs", progress_bar=T){
wfv_train
<- c()
predictions <- unique(x[[seq_var_name]])
seq_val
if(progress_bar){
<- txtProgressBar(min = 0, max = length(seq_val), initial = 0, style = 3)
pb
}
# Iteration that will be executed for each unique value in the variable that defines the data sequence. For this reason it is of vital importance that the data in tibble x be ordered by the sequence variable whose name is passed to seq_var_name
for (i in 1:length(seq_val)) {
<- seq_val[i]
val_seq
# Extract inputs and outputs corresponding to the period in the current sequence variable
<- x |>
inputs filter(!!sym(seq_var_name) == val_seq) |>
select(!!sym(inp_var_name)) |>
pull()
<- x |>
outputs filter(!!sym(seq_var_name) == val_seq) |>
select(!!sym(out_var_name)) |>
pull()
<- outputs[,,1]
outputs
# Use inputs to get forecasts for all periods in the sequence variable except for the first
if(i > 1){
<- modelo |>
pred predict(inputs, verbose = 3)
<- rbind(predictions, pred)
predictions
}
# Train the model
|>
modelo fit(
inputs,
outputs,epochs = 1,
batch_size = 10,
shuffle = F,
verbose = 0)
if(progress_bar){
setTxtProgressBar(pb,i)
}
}
if(progress_bar){
close(pb)
}
<- list()
results 'predicciones']] <- predictions
results[['modelo']] <- modelo
results[[return(results)
}
Once the function was created, the predictions were obtained using the following code:
<- wfv_train(data,model,'Date')
resultados <- resultados$predicciones predicciones1e
You can find trained models in the data
folder of the repository where this work is located. The models were saved using the hdf5
extension and under the names model1etd
, model2etd
and model3etd
.
As explained in 2.6.1 Predictions, in addition to the predictions obtained by the models, predictions obtained from the use of the arithmetic mean were computed, to compare with those obtained with the models. To compute these predictions, the following function was created:
<- function(x, seq_var_name, inp_var_name = "inputs", out_var_name = "outputs", id_var_name, progress_bar=T){
wfv_means
<- c()
means <- unique(x[[seq_var_name]])
seq_val
if(progress_bar){
<- txtProgressBar(min = 0, max = length(seq_val), initial = 0, style = 3)
pb
}
for (i in 1:length(seq_val)) {
<- seq_val[i]
val_seq <- x |>
inputs filter(!!sym(seq_var_name) == val_seq) |>
select(!!sym(inp_var_name)) |>
pull()
<- x |>
inputspred filter(!!sym(seq_var_name) == val_seq) |>
select(!!sym(inp_var_name)) |>
pull()
<- x |>
outputs filter(!!sym(seq_var_name) == val_seq) |>
select(!!sym(out_var_name)) |>
pull()
<- outputs[,,1]
outputs
<- x |>
ids filter(!!sym(seq_var_name) == val_seq) |>
select(!!sym(id_var_name)) |>
pull()
if(i==1){
<- inputs[,,1] |>
dfmeans as.data.frame() |>
cbind(ID = ids)
else{
}<- inputs[,dim(inputs)[2],1] |>
dfmeansupd as.data.frame() |>
cbind(ID = ids)
names(dfmeansupd)[1] <- paste0("V",(dim(dfmeans)[2]))
<- unique(c(ids, dfmeans[[id_var_name]]))
idsdf <- data.frame(ID = idsdf)
idsdf <- dplyr::left_join(idsdf, dfmeansupd, by = "ID")
dfmeansupd ifelse(
dim(dfmeansupd)[1] > dim(dfmeans)[1],
<- dplyr::left_join(dfmeansupd, dfmeans, by = "ID"),
dfmeans <- dplyr::left_join(dfmeans, dfmeansupd, by = "ID")
dfmeans
)
}
if(i > 1){
<- dfmeans |>
MEANS rowwise() |>
mutate(
means = mean(c_across(-!!sym(id_var_name)), na.rm = T)) |>
slice(match(ids,!!sym(id_var_name))) |>
pull(means) |>
as.matrix()
<- rbind(means, MEANS)
means
}
if(progress_bar){
setTxtProgressBar(pb,i)
}
}
if(progress_bar){
close(pb)
}
return(means)
}
Once the function was created, the predictions were obtained using the following code:
<- wfv_means(data,'Date',id_var_name = "ID") meanse1
In addition to what was previously stated, two functions getconfig
and plot_modelk
were created, in the .Rprofile file of the repository in which this work is found, which allow graphing the structure of the models by using the Iannone (2023) package, as seen in the Figure 17. The code to use would be:
# The functions are created to graph the structures used in this work.
|>
model getconfig() |>
plot_modelk() |>
grViz()
The procedure exposed in the sections Modelling and Training of this annex was repeated to build the 10 models made from each group of three-dimensional vectors, replacing the call to vecs3d1e
with in the first code exposed. vecs3d2e
and vecs3d3e
, depending on the group of three-dimensional vectors used.
Result
The code used during the process described in the different sub-sections of the Result section is presented below.
Predictions
The analysis exposed in 2.6.1 Predictions was carried out from graphs (see Figure 8, Figure 9 and Figure 10), which show the values of the \(MSE\) and \(R^2\) indicators. for each of the structures tested.
The first step to obtain these graphs was to compute the indicators, for each period of time, for each of the predictions obtained from the different models built with each structure. This is done using the following code.
# Extract actual outputs
<- data |>
salidas filter(
> data$Date[1]
Date |>
) select(outputs) |>
pull()
<- salidas[,,1]
salidas
# Compute MSE and R2 indicators
<- data |>
indicadores filter(Date > data$Date[1]) |>
cbind(predicciones = predicciones1e[,1]) |>
cbind(means = meanse1) |>
mutate(salidas = salidas) |>
select(Date, predicciones, means, salidas) |>
group_by(Date) |>
summarise(
r2 = 1 - (sum((salidas - predicciones)^2)/sum((salidas - means)^2)),
mse = mse(predicciones, salidas),
)
The different indicators computed for each of the 10 models trained with each of the structures were stored in a list called list_indicadores
. This is done using the following code:
"indicadores1"]] <- indicadores list_indicadores[[
Once this is done, a list is obtained that contains 10 data frames (indicadores1
,…,indicadores10
), which in turn contain the values of those of \(MSE\) and \(R^2\) of the predictions obtained by RNA models for each of the companies grouped by date. So, then the graph was built by using the following code.
# Group the information of the different constructions in a single data frame
<- do.call(cbind,list_indicadores)
indi_graf_data
# Obtain the average results, for each period of time, using the different constructions
|>
indi_graf_data rowwise() |>
mutate(
Date = `indicadores1.Date`,
meanmse = mean(c_across(contains("mse"))),
meanr2 = mean(c_across(contains("r2")))
|>
) select(
Date, meanmse,meanr2|>
)# Graph
mutate(
Date = as.Date(Date)) |>
ggplot(aes(x = Date, group = 1)) +
geom_line(aes(y = meanmse, color = "MSE")) +
geom_line(aes(y = meanr2, color = "R2")) +
scale_color_manual(values = c("blue", "green")) +
theme(axis.text.x = element_text(angle = 90)) +
labs(x = "Date", y = "Indicators", color = "Indicators")
In addition to the graphs, the Table 4 was also used in the analysis of the results, which contains the companies that obtained the best and worst indicators for each structure. To obtain these data, the following code was used:
<- data |>
indicadores_X_emp filter(Date > data$Date[1]) |>
cbind(predicciones = predicciones1e[,1]) |>
cbind(means = meanse1) |>
mutate(salidas = salidas) |>
select(Date, predicciones, means, salidas, ID) |>
group_by(ID) |>
summarise(
r2 = 1 - (sum((salidas - predicciones)^2)/sum((salidas - means)^2)),
mse = mse(predicciones, salidas)
|>
) select(ID, r2, mse)
Like the indicators computed by date, to save the indicators computed by company, a list called list_indic_emp
was created. After having stored the 10 indicator data frames per company in the list, the companies with the best and worst results were extracted using the following code:
# Group the information of the different constructions in a single data frame
<- do.call(rbind, list_indic_emp)
ind_emp_t
# Compute the average R2 and MSE by company
<- ind_emp_t |>
ind_emp_t group_by(ID) |>
summarize(
r2 = mean(r2),
mse= mean(mse)) |>
ungroup() |>
arrange(desc(r2))
# Obtain the 10 companies with the best and worst indicators
<- head(ind_emp_t,10)
mejores10 <- tail(ind_emp_t,10) peores10
And using the above variables and using the rbind()
and cbind
functions was how the Table 4 were created.
Portfolio composition
This section explains how the analysis of the comparison of the results obtained by the different portfolios was carried out (see Figure 11, Figure 12 and Figure 13). For this, it is first necessary to obtain the composition of the portfolios, by date, from the predictions obtained by using the arithmetic means and the RNA models.
To calculate the composition of the portfolios, the R package Berwin A. Turlach R port by Andreas Weingessel <Andreas.Weingessel@ci.tuwien.ac.at> Fortran contributions from Cleve Moler dpodi/LINPACK) (2019) was used. Below is the code used to find the composition of the portfolios from the predictions of the mean:
# A data frame was created in which all the information was stored:
# - IBEX values, as a reference index
# - Values of the predictions, both those obtained by the RNA model and by the arithmetic means
<- data |>
DATA left_join(IBEXsel, by ="Date") |>
mutate(IBEX = Return_I) |>
arrange(Date) |>
filter(
> data$Date[1]
Date |>
) mutate(predicciones = predicciones1e[,1]) |>
mutate(
Real = salidas,
RNA = predicciones,
Means = meanse1
|>
) select(Date, Real, IBEX, RNA, Means, ID)
# From the data frame DATA were created:
# - A data frame whose columns are the actual data of each of the companies for each of the time periods for which predictions were obtained.
# - A data frame whose columns are the data obtained by using the arithmetic means of each of the companies for each of the time periods for which predictions were obtained.
<- DATA |>
pvtReal select(Date, Real, ID) |>
pivot_wider(
names_from = ID,
values_from = Real
)
<- DATA |>
pvtMeans select(Date, Means, ID) |>
pivot_wider(
names_from = ID,
values_from = Means
)
# The data frame was created in which the composition of the portfolios was stored for each of the periods for which the prediction was obtained.
<- data.frame()
weightsm
# Iteration by which the composition of the portfolios is found
<- txtProgressBar(min = 0, max = length(unique(data$Date)[-1]), initial = 0, style = 3)
pb
for (i in 1:length(unique(data$Date)[-1])) {
if(i>1){
# The data frame is created that includes the data to be used to find the composition of the portfolio, this is created by the actual data to date and the forecast for the next period.
<- pvtReal |>
datamQP filter(Date < unique(data$Date)[-1][i]) |>
rbind(pvtMeans |>
filter(Date == unique(data$Date)[-1][i])
)
# Eliminate those companies that do not have actual or forecast data
<- which(is.na(datamQP[dim(datamQP)[1],]))
nare <- which(is.na(datamQP[(dim(datamQP)[1]-1),]))
naremo <- c(nare,naremo)
nare <- unique(nare)
nare if(length(nare) != 0){
<- datamQP[, - nare]
carteram else{
}<- datamQP
carteram
}
# Extract the forecasts
<- carteram[dim(carteram)[1], -1] |>
returnm as.matrix() |>
t()
# Calculate the covariance matrix
<- cov(carteram[, -1], use = "complete.obs")
covmm <- nearPD(covmm)$mat |>
npcovmm as.matrix()
# Extract the number of companies
<- ncol(npcovmm)
n
# Find the composition of the portfolio
<- solve.QP(
qp_outm Dmat = 2*npcovmm,
dvec = rep(0,n),
Amat = cbind(-1, diag(n)),
bvec = c(-1, rep(0,n)),
meq = 1)
<- qp_outm$solution
qp_outm <- floor(qp_outm*100)/100
qp_outm for(j in 1:length(qp_outm)){
if(qp_outm[j] < 0.001){
<- 0
qp_outm[j] else{}
}
}
# Save portfolio composition
names(qp_outm) <- names(carteram[, -1])
<- bind_rows(weightsm, qp_outm)
weightsm
}
setTxtProgressBar(pb,i)
}
close(pb)
# Replace actual weights and observations with missing values with zero
is.na(pvtReal)] <- 0
pvtReal[is.na(weightsm)] <- 0 weightsm[
Then, to find the profitability of the portfolio, the compositions were multiplied by the real returns, it was assumed that one was invested in the first period and a cumulative sum was made throughout the values to obtain the behaviour of the profitability throughout the period. time.
# Finding the returns of the portfolios formed from the predictions of the arithmetic mean
<- weightsm * pvtReal[-1,-1]
return_CM <- rowSums(return_CM)
return_CM <- c(1,return_CM)
return_CM <- data.frame(
return_CM Date = pvtReal[,1],
Mean = return_CM
)
The same steps that were carried out to find the behaviour of the profitability of the portfolios from the arithmetic means were carried out to find the behaviour from the predictions obtained by the RNA model as seen in the code below.
# From the DATA data frame, a data frame was created whose columns are the data obtained by using the RNA model of each of the companies for each of the time periods for which predictions were obtained.
<- DATA |>
pvtRNA select(Date, RNA, ID) |>
pivot_wider(
names_from = ID,
values_from = RNA
)
# The data frame was created in which the composition of the portfolios was stored for each of the periods for which the prediction was obtained.
<- data.frame()
weightse
# Iteration by which the composition of the portfolios is found
<- txtProgressBar(min = 0, max = length(unique(data$Date)[-1]), initial = 0, style = 3)
pb
for (i in 1:length(unique(data$Date)[-1])) {
if(i>1){
# The data frame is created that includes the data to be used to find the composition of the portfolio, this is created by the actual data to date and the forecast for the next period.
<- pvtReal |>
dataeQP filter(Date < unique(data$Date)[-1][i]) |>
rbind(pvtRNA |>
filter(Date == unique(data$Date)[-1][-1][i])
)# Eliminate those companies that do not have actual or forecast data
<- which(is.na(dataeQP[dim(dataeQP)[1],]))
nare <- which(is.na(dataeQP[(dim(dataeQP)[1]-1),]))
naremo <- c(nare,naremo)
nare <- unique(nare)
nare if(length(nare) != 0){
<- dataeQP[, - nare]
carterae else{
}<- dataeQP
carterae
}
# Extract forecasts
<- carterae[dim(carterae)[1], -1] |>
returne as.matrix() |>
t()
# Calculate the covariance matrix
<- cov(carterae[, -1], use = "complete.obs")
covme <- nearPD(covme)$mat |>
npcovme as.matrix()
# Extract the number of companies
<- ncol(npcovme)
n
# Find the composition of the portfolio
<- solve.QP(
qp_oute Dmat = 2*npcovme,
dvec = rep(0,n),
Amat = cbind(-1, diag(n)),
bvec = c(-1, rep(0,n)),
meq = 1)
<- qp_oute$solution
qp_oute <- floor(qp_oute*100)/100
qp_oute for(j in 1:length(qp_oute)){
if(qp_oute[j] < 0.001){
<- 0
qp_oute[j] else{}
}
}
# Save portfolio composition
names(qp_oute) <- names(carterae[, -1])
<- bind_rows(weightse, qp_oute)
weightse
}
setTxtProgressBar(pb,i)
}
close(pb)
# Replace weights with missing values with zero
is.na(weightse)] <- 0 weightse[
Then, to find the profitability of the portfolio, the compositions were multiplied by the real returns, it was assumed that one was invested in the first period and a cumulative sum was made throughout the values to obtain the behaviour of the profitability throughout the period time.
# Finding the returns of the portfolios formed from the predictions of the RNA model
<- weightse * pvtReal[-1,-1]
return_CRNA <- rowSums(return_CRNA)
return_CRNA <- c(1,return_CRNA)
return_CRNA <- data.frame(
return_CRNA Date = pvtReal[,1],
RNA = return_CRNA
)
Then, as with the indicators, a list_ret_RNA
list was created in which the data frames of the different models built with each of the structures were stored. Then the following code was executed to obtain the graph.
# Finding the behaviour of the returns of the IBEX for the period
<- IBEXsel |>
IBEXvals filter(Date > unique(data$Date)[2]) |>
select(2) |>
pull()
<- c(1, IBEXvals)
IBEXvals
<- do.call(cbind,list_ret_RNA)
data_rent_RNA <- data_rent_RNA |>
data_rent_RNA mutate(
Date = RNA1.Date,
IBEX = IBEXvals,
Means = return_CM$Mean) |>
mutate_at(vars(contains(".RNA")), ~ cumsum(.)) |>
mutate(
IBEX = cumsum(IBEX),
Means = cumsum(Means)) |>
group_by(Date) |>
summarize(
meanRNA = mean(c_across(contains(".RNA"))),
max_y = max(c_across(contains(".RNA"))),
min_y = min(c_across(contains(".RNA"))),
min_5 = unname(quantile(c_across(contains(".RNA")),0.05)),
max_95 = unname(quantile(c_across(contains(".RNA")),0.95)),
IBEX = IBEX,
Means = Means)
|>
data_rent_RNA mutate(
Date = as.Date(Date)) |>
ggplot(aes(x = Date)) +
geom_ribbon(aes(ymin = min_y, ymax = min_5), fill = "blue", alpha = 0.3) +
geom_ribbon(aes(ymin = max_y, ymax = max_95), fill = "blue", alpha = 0.3) +
geom_ribbon(aes(ymin = min_5, ymax = max_95), fill = "blue", alpha = 0.6) +
geom_line(
aes(y = meanRNA, color = "Media RNA1"),
linetype = "dashed") +
geom_line(aes(y = max_y), color = "blue") +
geom_line(aes(y = min_y), color = "blue") +
geom_line(aes(y = max_95), color = "blue") +
geom_line(aes(y = min_5, color = "RNA1")) +
geom_line(aes(y = IBEX, color = "IBEX")) +
geom_line(aes(y = Means, color = "Medias")) +
scale_color_manual(
values = c(
"Media RNA1"="blue",
"RNA1" = "blue",
"IBEX" = "red",
"Medias" = "green")) +
guides(
color = guide_legend(
override.aes = list(
linetype = c("solid","dashed","solid","solid"))))+
labs(x = "Date",
y = "Returns",
color = "Legenda")+
theme_minimal()