###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###
### CRC: DEMAND FOR OPEN TV                     ###*###*###*###*###*###*###*###*###
### DISCRETE CHOICE MODEL                       ###*###*###*###*###*###*###*###*###
### BUILD PRICE AND DEMAND DATA                 ###*###*###*###*###*###*###*###*###
###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###

# ____________________________________________________________________________ ####
# START ####

rm(list = ls())
library(readxl)
library(plyr)
library(lubridate)
library(xtable)
library(Matrix)
library(reshape2)

# ____________________________________________________________________________ ####
# INPUTS ####

## Paths
inpath  <- '~/Data/RAW/'
outpath <- '~/Data/Processed/'

# ____________________________________________________________________________ ####
# TV DATA: RATINGS ####

## Load data
file <- paste0(inpath, 'Audiencia/IBOPE_Programas_2022_2024.csv')
d    <- read.csv(file, sep = ';')
d    <- d[d$Género != 'NO APLICA', ]
vn   <- c("Canal", "Título.Programa", "Fecha", "Género", "Código.programa", 
          "Duración.Prg", "Día.Semana", "Timeband..Prg.", "Rat.", "Rat..1", 
          "Shr.", "TVR.")
d    <- d[, vn]

## Fix names
vn <- c('chann', 'prog', 'dates', 'genre', 'codp', 'dur0', 
        'wday', 'tb0', 'rat', 'rat1', 'shr', 'tvr')
names(d) <- vn

## Fix dates and time brand
tb <- t(sapply(d$tb0, function(x) unlist(strsplit(x, split = ' - '))))
tb <- apply(tb, 2, function(x) {
  y <- sapply(x, function(x1) as.numeric(unlist(strsplit(x1, split = ':')))[1:2])
  return(round(y[1, ] + y[2, ]/60, 1))
})

## Fix date and time
d$date    <- as.Date(d$dates, format = '%d/%m/%Y')
i         <- which(tb[, 1] >= 24)
j         <- which(tb[, 2] >= 24)
d$date[i] <- d$date[i] + 1
tb[i, 1]  <- tb[i, 1] - 24
tb[j, 2]  <- tb[j, 2] - 24
row.names(tb) <- NULL

## Fix timebrand
wd   <- wday(d$date)
d$tb <- '' 
i <- which(wd %in% 2:6 & tb[, 1] >= 6 & tb[, 2] < 12)
j <- which(wd %in% c(1, 7) & tb[, 1] >= 6 & tb[, 2] < 12)
r <- which(wd %in% 2:6 & tb[, 1] >= 6 & tb[, 1] < 12 & tb[, 2] >= 12 & (tb[, 2] + tb[, 1])/2 < 12)
n <- which(wd %in% c(1, 7) & tb[, 1] >= 6 & tb[, 1] < 12 & tb[, 2] >= 12 & (tb[, 2] + tb[, 1])/2 < 12)
d$tb[unique(c(i, j, r, n))] <- 'A'
i <- which(wd %in% 2:6 & tb[, 1] >= 12 & tb[, 2] < 19)
j <- which(wd %in% c(1, 7) & tb[, 1] >= 12 & tb[, 2] < 18)
r <- which(wd %in% 2:6 & tb[, 1] >= 12 & tb[, 1] < 19 & tb[, 2] >= 19 & (tb[, 2] + tb[, 1])/2 < 19)
n <- which(wd %in% c(1, 7) & tb[, 1] >= 12 & tb[, 1] < 18 & tb[, 2] >= 18 & (tb[, 2] + tb[, 1])/2 < 18)
d$tb[unique(c(i, j, r, n))] <- 'B'
i <- which(wd %in% 2:6 & tb[, 1] >= 19 & tb[, 2] < 22.5)
j <- which(wd %in% c(1, 7) & tb[, 1] >= 18 & tb[, 2] < 22)
r <- which(wd %in% 2:6 & tb[, 1] >= 19 & tb[, 1] < 22.5 & tb[, 2] >= 22.5 & (tb[, 2] + tb[, 1])/2 < 22.5)
n <- which(wd %in% c(1, 7) & tb[, 1] >= 18 & tb[, 1] < 22 & tb[, 2] >= 22 & (tb[, 2] + tb[, 1])/2 < 22)
d$tb[unique(c(i, j, r, n))] <- 'C'
i <- which(wd %in% 2:6 & tb[, 1] >= 22.5 & tb[, 2] < 24)
j <- which(wd %in% c(1, 7) & tb[, 1] >= 22 & tb[, 2] < 24)
r <- which(wd %in% 2:6 & tb[, 1] >= 22.5 & tb[, 1] < 24 & tb[, 2] >= 0 & (tb[, 2] + tb[, 1])/2 < 24)
n <- which(wd %in% c(1, 7) & tb[, 1] >= 22 & tb[, 1] < 24 & tb[, 2] >= 0 & (tb[, 2] + tb[, 1])/2 < 24)
d$tb[unique(c(i, j, r, n))] <- 'D'
i <- which(wd %in% 2:6 & tb[, 1] >= 0 & tb[, 2] < 6)
j <- which(wd %in% c(1, 7) & tb[, 1] >= 0 & tb[, 2] < 6)
r <- which(wd %in% 2:6 & tb[, 1] >= 0 & tb[, 1] < 6 & tb[, 2] >= 6 & (tb[, 2] + tb[, 1])/2 < 6)
n <- which(wd %in% c(1, 7) & tb[, 1] >= 0 & tb[, 1] < 6 & tb[, 2] >= 6 & (tb[, 2] + tb[, 1])/2 < 6)
d$tb[unique(c(i, j, r, n))] <- 'E'
# Final corrections
i  <- which(d$tb == '')
av <- (tb[i, 1] + tb[i, 2])/2 
d$tb[i][av >=  6 & av < 12] <- 'A'
d$tb[i][av >= 12 & av < 19] <- 'B'
d$tb[i][av >= 19 & av < 22] <- 'C'
d$tb[i][av >= 22 & av < 24] <- 'D'
d$tb[i][av >=  0 & av < 6]  <- 'E'

## Fix genre
d$gen <- toupper(d$genre)
d$gen <- gsub('NO COMERCIALIZABLE', 'NC', d$gen)
d$gen <- gsub('CINE-PELICULA', 'CINE', d$gen)
d$gen <- gsub('OPINION/PERIODISTICO', 'OPINION', d$gen)
d$gen <- gsub('LOTERIAS/SORTEOS', 'SORTEOS', d$gen)

## Products, firms & markets
d$y     <- year(d$date)
d$t     <- paste0(d$y, '-', month(d$date))
d$chann <- gsub('1', 'UNO', gsub('CANAL ', '', toupper(d$chann)))
d$j     <- paste0(d$chann, '-', d$tb)

## Duration
dur   <- t(sapply(d$dur0, function(x) as.numeric(unlist(strsplit(x, split = ':')))))
d$dur <- dur[, 1] + dur[, 2]/60

## Aggregate ratings, shares, shows
d$qa  <- as.numeric(gsub(',', '.', d$rat)) * d$dur
d2    <- ddply(d, .(t), mutate, M = sum(qa))
d2    <- d2[d2$chann %in% c('UNO', 'RCN', 'CARACOL'), ]
d2    <- d2[d2$tb != '', ]
d2    <- d2[d2$y < 2025, ]
d2    <- ddply(d2, .(y, t, chann, j, tb), summarize, 
               Not = sum(dur[gen == 'NOTICIERO'])/sum(dur),
               Tvn = sum(dur[gen == 'TELENOVELA'])/sum(dur),
               Mgz = sum(dur[gen == 'MAGAZIN'])/sum(dur),
               Cin = sum(dur[gen == 'CINE'])/sum(dur),
               Hum = sum(dur[gen == 'HUMOR'])/sum(dur),
               Ser = sum(dur[gen == 'SERIES'])/sum(dur),
               NC  = sum(dur[gen == 'NC'])/sum(dur),
               Ift = sum(dur[gen == 'INFANTIL'])/sum(dur),
               Dep = sum(dur[gen == 'DEPORTIVO'])/sum(dur),
               Qa = sum(qa), Ma = mean(M))
d2$Sa <- d2$Qa / d2$Ma
d2    <- ddply(d2, .(t), mutate, Sao = 1 - sum(Sa))
dsa   <- d2

## Time share
x <- ddply(d2, .(y, tb), summarize, v = sum(Qa, na.rm = T))
x <- dcast(x, tb ~ y, value.var = 'v')
x[, -1] <- x[, -1]/matrix(colSums(x[, -1], na.rm = T), 3, 3, byrow = T)*100
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Shares by year
x <- ddply(d2, .(y, chann), summarize, v = sum(Qa))
x <- dcast(x, chann ~ y, value.var = 'v')
x[, -1] <- x[, -1]/matrix(colSums(x[, -1]), 3, 3, byrow = T)*100
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Shares by time brand
x <- ddply(d2, .(tb, chann), summarize, v = sum(Qa))
x <- dcast(x, chann ~ tb, value.var = 'v')
x[, -1] <- x[, -1]/matrix(colSums(x[, -1]), 3, 5, byrow = T)*100
x <- xtable(x)
print.xtable(x, include.rownames = F)

# ____________________________________________________________________________ ####
# CONTENT QUALITY DATA ####

## Content quality
file <- paste0(inpath, 'Pauta/Insumos/Req particular/Consolidación cuadros.xlsx')
dq   <- as.data.frame(read_excel(file, sheet = 2))

## Fix channel
dq$chann <- dq$`NOMBRE DEL CANAL`
dq$chann[dq$chann == 'CARACOL TELEVISION S.A.'] <- 'CARACOL'
dq$chann <- gsub('CANAL ', '', dq$chann)
dq$chann <- gsub('1', 'UNO', dq$chann)

## Fix genre
dq$gen <- dq$`GÉNERO/FORMATO DEL CONTENIDO`
dq$gen <- toupper(dq$gen)
dq$gen[dq$gen == 'CINE-PELÍCULA-DOCUMENTAL'] <- 'CINE-PELICULA'
dq$gen[dq$gen == 'CONCURSO-REALITY'] <- 'CR'
dq$gen[dq$gen == 'LOTERÍAS/SORTEOS'] <- 'LOTERIAS/SORTEOS'
dq$gen[dq$gen == 'MAGAZÍN-OPINIÓN-INVESTIGACIÓN - ANÁLISIS DEPORTIVO'] <- 'MOD'
dq$gen[dq$gen == 'SERIES-TELENOVELAS-DRAMATIZADOS'] <- 'STD'
dq$gen[dq$gen == 'OTRO ¿CUÁL?: ____________'] <- 'OTROS'
dq$gen[dq$gen == 'TRANSMISIONES EN DIRECTO'] <- 'OTROS'

## Fix the rest
dq$y <- dq$AÑO + 1
dq$q <- rowSums(apply(dq[, 6:9], 2, as.numeric), na.rm = T)/1e+9
dq   <- ddply(dq[dq$y >= 2022, ], .(y, chann, gen), summarize, q = sum(q, na.rm = T))

## Weights
wq     <- d[d$chann %in% unique(dq$chann) & d$y < 2025, ]
wq$gen <- wq$genre
wq$gen[wq$gen == 'CONCURSOS']   <- 'CR'
wq$gen[wq$gen == 'REALITY']     <- 'CR'
wq$gen[wq$gen == 'MAGAZIN']     <- 'MOD'
wq$gen[wq$gen == 'OPINION/PERIODISTICO'] <- 'MOD'
wq$gen[wq$gen == 'DEPORTIVO']   <- 'MOD'
wq$gen[wq$gen == 'SERIES']      <- 'STD'
wq$gen[wq$gen == 'TELENOVELA']  <- 'STD'
wq$gen[wq$gen == 'DRAMATIZADO'] <- 'STD'
wq <- wq[wq$gen %in% dq$gen, ]
wq <- ddply(wq, .(y, chann, tb, gen), summarize, w = sum(dur))

## Compute aggregate q
vn <- c('y', 'chann', 'gen')
sort(unique(dq$gen))
sort(unique(wq$gen))
dwq <- merge(wq[, c(vn, 'tb', 'w')], dq[, c(vn, 'q')], all.x = T)
dwq <- ddply(dwq, .(y, chann, tb), summarize, 
             tq = sum(q), q = weighted.mean(q, w = w))

## Quality by year
x <- ddply(dwq, .(y, chann), summarize, v = sum(q))
x <- dcast(x, chann ~ y, value.var = 'v')
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Quality by time
x <- ddply(dwq, .(tb, chann), summarize, v = sum(q))
x <- dcast(x, chann ~ tb, value.var = 'v')
x <- xtable(x)
print.xtable(x, include.rownames = F)

# ____________________________________________________________________________ ####
# ADVERTISEMENT DATA ####

## Price data
file <- paste0(inpath, 'Pauta/Insumos/IBOPE simple (desde 2021)/PAUTA IBOPE COMPLETA.csv')
d    <- read.csv(file, sep = ';')
d$chann <- gsub('CANAL ', '', d$CANAL)
d$chann <- gsub('1', 'UNO', d$chann)
d$tb <- ''
i <- grep('Day', d$Franja)
d$tb[i] <- 'A'
i <- grep('Early', d$Franja)
d$tb[i] <- 'B'
i <- grep('Prime', d$Franja)
d$tb[i] <- 'C'
i <- grep('Late', d$Franja)
d$tb[i] <- 'D'
i <- grep('Over', d$Franja)
d$tb[i] <- 'E'
d       <- d[d$tb != '', ]
dt      <- t(sapply(d$PERIODO, function(x) as.numeric(unlist(strsplit(x, split = '-')))))
d$y     <- dt[, 1]
d$t     <- paste(dt[, 1], dt[, 2], sep = '-')
d       <- d[d$y >= 2022, ]
d$q     <- as.numeric(d$Total_Inversion)
d$dur   <- d$Total_Duracion
d1      <- ddply(d, .(y, t, chann, tb),  summarize, Q1 = sum(q), Dur = sum(dur))

## Shares data
file <- paste0(inpath, 'Pauta/Insumos/Req particular/Consolidación cuadros.xlsx')
d    <- as.data.frame(read_excel(file, sheet = 3))
d$ty <- toupper(d$`TIPO DE PUBLICIDAD CONTRATADA EN EL CANAL`)
d    <- d[d$ty %in% c("ANUNCIO PUBLICITARIO EN ESPACIOS DEL CANAL",
                      "ANUNCIO PUBLICITARIO DENTRO DEL PROGRAMA",
                      "ANUNCIO PUBLICITARIO EN LOS ESPACIOS COMERCIALES DEL CANAL;MENCIÓN;SUPERIMPOSICIÓN;RECUADRO;PATROCINIO DE UN PROGRAMA O SEGMENTO DE PROGRAMA.",
                      "ANUNCIO PUBLICITARIO EN LOS ESPACIOS COMERCIALES DEL CANAL",
                      "ANUNCIO PUBLICITARIO DENTRO DE UN PROGRAMA"), ]
d$chann <- d$`NOMBRE DEL CANAL`
d$chann[d$chann == 'CARACOL TELEVISIÓN S.A.'] <- 'CARACOL'
d$chann <- gsub('CANAL ', '', d$chann)
d$chann <- gsub('1', 'UNO', d$chann)
d$y     <- d$AÑO
d$tb    <- d$`FRANJA HORARIA`
d       <- d[d$tb %in% LETTERS[1:5] & d$y >= 2022, ]
d$q     <- d$`VALOR TOTAL DE LA PUBLICIDAD PAUTADA EN EL CANAL TELEVISIVO`
d$nc    <- d$`NÚMERO DE ANUNCIOS CONTRATADOS`
d$t     <- paste0(d$y, '-', as.numeric(d$MES_NUM))
d3      <- ddply(d, .(y, t, chann, tb),  summarize, Qp = sum(q), Nc = sum(nc))

## Time share
x <- ddply(d3, .(y, tb), summarize, v = sum(Qp, na.rm = T))
x <- dcast(x, tb ~ y, value.var = 'v')
x[, -1] <- x[, -1]/matrix(colSums(x[, -1], na.rm = T), 3, 3, byrow = T)*100
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Shares by year
x <- ddply(d3, .(y, chann), summarize, v = sum(Qp, na.rm = T))
x <- dcast(x, chann ~ y, value.var = 'v')
x[, -1] <- x[, -1]/matrix(colSums(x[, -1], na.rm = T), 3, 3, byrow = T)*100
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Shares by time brand
x <- ddply(d3, .(tb, chann), summarize, v = sum(Qp, na.rm = T))
x <- dcast(x, chann ~ tb, value.var = 'v')
x[, -1] <- x[, -1]/matrix(colSums(x[, -1], na.rm = T), 3, 5, byrow = T)*100
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Merge
v1 <- c('y', 't', 'chann', 'tb', 'Q1', 'Dur')
v2 <- c('t', 'chann', 'tb', 'Qp', 'Nc')
d2 <- merge(d1[, v1], d3[, v2])

## Prices
d2$p  <- d2$Q1/d2$Dur
d2$dp <- d2$Qp/d2$Dur/1000
d2$D  <- 1 - d2$dp/d2$p
summary(d2)

## Shares
d2$M  <- 1.963e+12/12 # CRC
d2$M[d2$y == 2023] <- 2.2e+12/12 # CRC
d2$M[d2$y == 2024] <- 2.260e+12/12 # IAB colombia
d2$Sp <- d2$Qp/d2$M
dsp   <- ddply(d2, .(t), mutate, Spo = 1 - sum(Sp))
dsp$j <- paste0(dsp$chann, '-', dsp$tb)

# ____________________________________________________________________________ ####
# INSTRUMENTS ####

exmean <- function(x) (sum(x) - x)/(length(x) - 1)

## Audience instruments
dz  <- ddply(dsa, .(j), mutate, zs1 = exmean(Qa/1e+6))
dz  <- ddply(dz, .(y, j), mutate, zs2 = exmean(Qa/1e+6))
dz  <- ddply(dz, .(chann), mutate, zs3 = exmean(Qa/1e+6))
dz  <- ddply(dz, .(y, chann), mutate, zs4 = exmean(Qa/1e+6))
dz  <- ddply(dz, .(t, chann), mutate, zs5 = exmean(Qa/1e+6))
dz  <- ddply(dz, .(t), mutate, zs6 = exmean(Qa/1e+6))
dzs <- dz

vz <- paste0('zs', 1:6)
cor(dz[, c(vz, 'Sa')])
Z <- dz[, vz]; rankMatrix(Z)[1]

## Quality instruments
dz  <- ddply(dwq, .(chann, tb), mutate, zq1 = exmean(q))
dz  <- ddply(dz, .(y, tb), mutate, zq2 = exmean(q))
dz  <- ddply(dz, .(chann), mutate, zq3 = exmean(q))
dz  <- ddply(dz, .(y, chann), mutate, zq4 = exmean(q))
dzq <- dz

vz <- paste0('zq', 1:4)
cor(dz[, c(vz, 'q')])
Z <- dz[, vz]; rankMatrix(Z)[1]

## Publicity instruments
dz    <- ddply(dsp, .(j), mutate, zsp1 = exmean(Qp/1e+9), zp1 = exmean(p), zD1 = exmean(D))
dz    <- ddply(dz, .(y, j), mutate, zsp2 = exmean(Qp/1e+9), zp2 = exmean(p), zD2 = exmean(D))
dz    <- ddply(dz, .(chann), mutate, zsp3 = exmean(Qp/1e+9), zp3 = exmean(p), zD3 = exmean(D))
dz    <- ddply(dz, .(y, chann), mutate, zsp4 = exmean(Qp/1e+9), zp4 = exmean(p), zD4 = exmean(D))
dz    <- ddply(dz, .(t, chann), mutate, zsp5 = exmean(Qp/1e+9), zp5 = exmean(p), zD5 = exmean(D))
dz    <- ddply(dz, .(t), mutate, zsp6 = exmean(Qp/1e+9), zp6 = exmean(p), zD6 = exmean(D))
dzsp  <- dz

vz <- paste0('zsp', 1:6)
cor(dz[, c(vz, 'Sp')])
Z <- dz[, vz]; rankMatrix(Z)[1]

vz <- paste0('zp', 1:6)
cor(dz[, c(vz, 'p')])
Z  <- dz[, vz]; rankMatrix(Z)[1]

vz <- paste0('zD', 1:6)
cor(dz[, c(vz, 'D')])
Z  <- dz[, vz]; rankMatrix(Z)[1]

# ____________________________________________________________________________ ####
# FINAL DATASET & SAVING ####

## Merge
d <- merge(dzs, dzsp)
d <- merge(d, dzq)
d <- d[d$t != '2024-8', ]

x <- ddply(d, .(y, chann), summarize, v = weighted.mean(Sa*100, Qp))
x <- dcast(x, chann ~ y, value.var = 'v')
x <- xtable(x)
print.xtable(x, include.rownames = F)

x <- ddply(d, .(y, chann), summarize, v = weighted.mean(Sp*100, Qp))
x <- dcast(x, chann ~ y, value.var = 'v')
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Precios IBOPE: anual
x <- ddply(d, .(y, chann), summarize, v = weighted.mean(p, Qp))
x <- dcast(x, chann ~ y, value.var = 'v')
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Precios IBOPE: banda
x <- ddply(d, .(tb, chann), summarize, v = weighted.mean(p, Qp))
x <- dcast(x, chann ~ tb, value.var = 'v')
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Precios Req. Part.: anual
x <- ddply(d, .(y, chann), summarize, v = weighted.mean(dp, Qp))
x <- dcast(x, chann ~ y, value.var = 'v')
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Precios Req. Part.: banda
x <- ddply(d, .(tb, chann), summarize, v = weighted.mean(dp, Qp))
x <- dcast(x, chann ~ tb, value.var = 'v')
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Descuento.: anual
x <- ddply(d, .(y, chann), summarize, v = weighted.mean(D*100, Qp))
x <- dcast(x, chann ~ y, value.var = 'v')
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Descuento: banda
x <- ddply(d, .(tb, chann), summarize, v = weighted.mean(D*100, Qp))
x <- dcast(x, chann ~ tb, value.var = 'v')
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Calidad: anual
x <- ddply(d, .(y, chann), summarize, v = weighted.mean(q, Qa))
x <- dcast(x, chann ~ y, value.var = 'v')
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Calidad: franja
x <- ddply(d, .(tb, chann), summarize, v = weighted.mean(q, Qa))
x <- dcast(x, chann ~ tb, value.var = 'v')
x <- xtable(x)
print.xtable(x, include.rownames = F)

## Save data
DATA <- d
save(DATA, file = paste0(outpath, 'MDATA6.RData'))

# ____________________________________________________________________________ ####
# END ####