###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###
### CRC: DEMAND FOR MOBILE SERVICES             ###*###*###*###*###*###*###*###*###
### DISCRETE CHOICE MODEL                       ###*###*###*###*###*###*###*###*###
### BUILD PRICE AND DEMAND DATA                 ###*###*###*###*###*###*###*###*###
###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###

# ____________________________________________________________________________ ####
# START ####

rm(list = ls())
library(readxl)
library(plyr)
library(lubridate)
library(xtable)
library(ggplot2)

# ____________________________________________________________________________ ####
# INPUTS ####

## Paths
inpath  <- '~/Data/RAW/'
outpath <- '~/Data/Processed/'

## Firm rename
fnms <- data.frame(rbind(c('ALMACENES EXITO INVERSIONES S.A.S.', 'Exito'),
                         c('COLOMBIA MOVIL  S.A ESP', 'Tigo-Une'),
                         c('COLOMBIA TELECOMUNICACIONES S.A. E.S.P.', 'Movistar'),
                         c('COMUNICACION CELULAR S A COMCEL S A', 'Claro'),
                         c('PARTNERS TELECOM COLOMBIA SAS', 'Wom'),
                         c('VIRGIN MOBILE COLOMBIA S.A.S.', 'Virgin')))
names(fnms) <- c('EMPRESA', 'firm')

# ____________________________________________________________________________ ####
# LOAD & ORGANIZE DATA ####

## Load data
file <- paste0(inpath, 'EMPAQUETAMIENTO_MOVIL_10.csv')
d    <- read.csv(file = file, sep = ';')

## Filter and process variables
d      <- d[d$MODALIDAD_PAGO != 'Prepago sin compra', ]
d$serv <- d$ID_SERVICIO_PAQUETE
# Pospay dummy
d$posp <- -(d$ID_MODALIDAD_PAGO - 2) # (=1) Post
# Create year-month id
d$t    <- 3*(d$TRIMESTRE - 1) + d$MES_DEL_TRIMESTRE
d$t[d$t < 10] <- paste0('0', d$t[d$t < 10])
d$t <- paste(d$ANNO, d$t, sep = '-')
# Firm id
d$firm <- fnms$firm[match(d$EMPRESA, fnms$EMPRESA)]
d$firm[is.na(d$firm)] <- d$EMPRESA[is.na(d$firm)]

# ____________________________________________________________________________ ####
# DEFINE MARKETS & AGGREGATE ####

## Aggregate market level data
d   <- ddply(d, .(ANNO, t, firm, serv, posp), summarize, 
             q = sum(CANTIDAD_LINEAS), 
             vp = VALOR_FACTURADO_O_COBRADO)
d   <- ddply(d, .(t), mutate, M = sum(q))
d$s <- d$q/d$M
do  <- d

## Filter small brands as outside option
x  <- ddply(d, .(t, firm), summarize, v = sum(s)*100)
x  <- ddply(x, .(firm), summarize, m = mean(v), cv = sd(v)/mean(v))
f  <- x$firm[x$m >= 1]
d  <- d[d$firm %in% f, ]

# ____________________________________________________________________________ ####
# MARKET STATS (BEFORE FILTERS) ####

## Market share by firm
subject <- 'Shares0'
x    <- do; x$firm[!(x$firm %in% f)] <- "Outside option"
x    <- ddply(x, .(firm), summarize, Q = sum(q))
x$v  <- x$Q/sum(x$Q)
x$pc <- ""
x$pc[x$v > 0.10] <- paste0(round(x$v[x$v > 0.10]*100), '%')
x$x <- "Total"
p  <- ggplot(x, aes(x = x, y = v, fill = firm)) + 
  geom_bar(position = "stack", stat = "identity") + #coord_flip() + 
  ylab('') + xlab('') + scale_fill_brewer(palette = "Set1", name = '') +
  geom_text(aes(label = pc), vjust = 3, position = "stack", size = 2.8, 
            fontface = "bold", colour = 'white') + theme_bw() 
pdf(file = paste0(wrtpath, subject, '.pdf'), height = 5, width = 7)
print(p)
graphics.off()

# ____________________________________________________________________________ ####
# FINAL FILTERS ####

## Price bounds
lb <- c(0.3, 1)
ub <- c(170, 70)

## Filters for quantities and av. prices
d   <- d[d$q > 0 & d$vp > 0, ]
d$p <- d$vp/d$q * 1e-3
d1  <- d
d   <- d[d$p >= lb[1] & d$p <= ub[1], ]

# ____________________________________________________________________________ ####
# INSTRUMENTS ####

## Brand id
d$j <- as.numeric(as.factor(paste(d$firm, d$serv, d$posp, sep = '-')))

## Instrumental price 1: same product and year, other market
d <- ddply(d, .(ANNO, j), mutate, z1 = (sum(p) - p) / (length(p) - 1))

## Instrumental price 2: same firm and year, other market and products
d <- ddply(d, .(ANNO, firm), mutate, z2 = (sum(p) - p) / (length(p) - 1))

## Instrumental price 3: same product other market
d <- ddply(d, .(j), mutate, z3 = (sum(p) - p) / (length(p) - 1))

## Instrumental price 4: same firm and market, other product
d <- ddply(d, .(t, firm), mutate, z4 = (sum(p) - p) / (length(p) - 1))

# ____________________________________________________________________________ ####
# SAVE ####

## Save
DATA <- d
save(DATA, file = paste0(outpath, 'MDATA4.RData'))

# ____________________________________________________________________________ ####
# END ####