###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###
### CRC: DEMAND FOR MOBILE SERVICES             ###*###*###*###*###*###*###*###*###
### DISCRETE CHOICE MODEL                       ###*###*###*###*###*###*###*###*###
### BUILD PORTABILITY & WINBACK DATA            ###*###*###*###*###*###*###*###*###
###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###*###

# ____________________________________________________________________________ ####
# START ####

rm(list = ls())
library(readxl)
library(plyr)
library(lubridate)
library(ggplot2)

# ____________________________________________________________________________ ####
# INPUTS ####

## Paths
inpath  <- '~/Data/RAW/PORT/'
outpath <- '~/Data/Processed/'

# ____________________________________________________________________________ ####
# LOAD DATA ####

## Files and variable
vn <- c("NUMERO", "TIPO_USUARIO...5", "TIPO_SERVICIO...7", 
        "DONANTE", "RECEPTOR", "FECHA_VENTANA")
fn    <- 'SolicitudesPortabilidad_exito_'
files <- 202000 + rep(1:4, each = 12)*100 + 1:12
files <- files[files < 202401]

## Load and join data
dw <- data.frame()
for(i in files) {
  print(i)
  file <- paste0(fn, i, '.xlsx')
  d    <- as.data.frame(read_excel(paste0(inpath, file)))
  d    <- d[, vn]
  dw   <- rbind(dw, d)
}
save(dw, file = paste0(outpath, 'WBDATA.RDATA'))

## Load 2024-2025 data
dw2 <- read.csv(paste0(inpath, 'Portaciones_2024_2025.csv'))
dw2 <- dw2[dw2$FECHA_VENTANA != 'NULL', ]

# ____________________________________________________________________________ ####
# PROCESS DATA ####

## Portability windows
pwinds <- c(1:4)

## Create portability & winback variables
vn <- c('NUMERO', 'DONANTE', 'RECEPTOR', 'FECHA_VENTANA')
dw$FECHA_VENTANA  <- as.Date(dw$FECHA_VENTANA)
dw2$FECHA_VENTANA <- as.Date(dw2$FECHA_VENTANA)
d  <- rbind(dw[, vn], dw2[, vn])
names(d) <- gsub('FECHA_VENTANA', 'FECHA', names(d))
dw <- d

# Identify winback: more than 2 providers, first donor is the last receiver 
x       <- table(d$NUMERO)
repn    <- as.numeric(names(x[which(x >= 2)]))
wbks    <- d[which(d$NUMERO %in% repn), ]
wbks    <- wbks[order(wbks$NUMERO, wbks$FECHA), ]
i       <- 1:(nrow(wbks) - 1)
wbks$pt <- c(0, 1*(wbks$NUMERO[i] == wbks$NUMERO[i + 1]))
wbks$wb <- c(0, 1*(wbks$DONANTE[i] == wbks$RECEPTOR[i + 1]))*wbks$pt

# Define portability speed
speed    <- c(0, wbks$FECHA[i + 1] - wbks$FECHA[i])
wbks$pt1 <- wbks$pt*(speed <= 30)
wbks$pt2 <- wbks$pt*(speed > 30 & speed <= 60)
wbks$pt3 <- wbks$pt*(speed > 60 & speed <= 90)
wbks$pt4 <- wbks$pt*(speed > 90)

# Interact with winback
wbks$wb1 <- wbks$pt1*wbks$wb
wbks$wb2 <- wbks$pt2*wbks$wb
wbks$wb3 <- wbks$pt3*wbks$wb
wbks$wb4 <- wbks$pt4*wbks$wb

## Portability dataset
PT    <- wbks[, c('FECHA', 'RECEPTOR', paste0('pt', pwinds))]
PT$ym <- PT$FECHA
day(PT$ym)  <- 1
PT$RECEPTOR <- gsub('VIRGIN MOBILE', 'VIRGIN', PT$RECEPTOR)

## Winback dataset
WB    <- wbks[wbks$wb == 1, c('FECHA', 'RECEPTOR', paste0('wb', pwinds))]
WB$ym <- WB$FECHA
day(WB$ym)  <- 1
WB$RECEPTOR <- gsub('VIRGIN MOBILE', 'VIRGIN', WB$RECEPTOR)

## Save datasets
save(list = c('WB', 'PT'), file = paste0(outpath, 'WBDATA4.RDATA'))

# ____________________________________________________________________________ ####
# SIMULATED SAMPLE DATA ####

load(paste0(outpath, 'WBDATA4.RDATA'))
load(paste0(outpath, 'MDATA4.RDATA'))

## For random samples
ns    <- 5000
firms <- unique(DATA$firm)

## Portability & Winback sampling
pt1 <- pt2 <- pt3 <- pt4 <- matrix(0, nrow(DATA), ns)
for(t in 2022:2025) {
  m2 <- 0
  for(fn in 1:length(firms)) {
    f  <- firms[fn]
    print(f)
    j  <- which(DATA$firm == f & DATA$ANNO == t)
    f2 <- toupper(gsub('-Une', '', f))
    x1 <- PT[year(PT$FECHA) == t & PT$RECEPTOR == f2, ]
    m1 <- ceiling(2 * ns * nrow(x1) / mean(DATA$M[DATA$ANNO == t]))
    print(m1)
    set.seed(t*fn); n1 <- sample(1:nrow(x1), size = m1, replace = T)
    xx <- which(colSums(pt1[j, ] + pt2[j, ] + pt3[j, ] + pt4[j, ]) == 0)
    set.seed(t*fn); n2 <- sample(xx, size = m1, replace = F)
    print(range(pt1[-j, n2] + pt2[-j, n2] + pt3[-j, n2] + pt4[-j, n2]))
    pt1[j, n2] <- matrix(x1$pt1[n1], length(j), m1, byrow = T)
    pt2[j, n2] <- matrix(x1$pt2[n1], length(j), m1, byrow = T)
    pt3[j, n2] <- matrix(x1$pt3[n1], length(j), m1, byrow = T)
    pt4[j, n2] <- matrix(x1$pt4[n1], length(j), m1, byrow = T)
    print(sum(colMeans(pt1[j, ] + pt2[j, ] + pt3[j, ] + pt4[j, ])))
    m2 <- m2 + m1
  }
  jj <- which(DATA$ANNO == t)
  xx <- pt1[jj, ] + pt2[jj, ] + pt3[jj, ] + pt4[jj, ]
  xx <- aggregate(xx, by = list(f = DATA$firm[jj]), mean)
  xx <- rowSums(xx[,-1])
  print(xx)
  print(m2)
  print(sum(xx))
}

## Winback sample
wb1 <- wb2 <- wb3 <- wb4 <- matrix(0, nrow(DATA), ns)
for(t in 2022:2025) {
  m2 <- 0
  for(fn in 1:length(firms)) {
    f  <- firms[fn]
    print(f)
    j  <- which(DATA$firm == f & DATA$ANNO == t)
    f2 <- toupper(gsub('-Une', '', f))
    x1 <- WB[year(WB$FECHA) == t & WB$RECEPTOR == f2, ]
    m1 <- ceiling(2 * ns * nrow(x1) / mean(DATA$M[DATA$ANNO == t]))
    print(m1)
    set.seed(t*fn); n1 <- sample(1:nrow(x1), size = m1, replace = T)
    xx <- which(colSums(wb1[j, ] + wb2[j, ] + wb3[j, ] + wb4[j, ]) == 0)
    set.seed(t*fn); n2 <- sample(xx, size = m1, replace = F)
    print(range(wb1[-j, n2] + wb2[-j, n2] + wb3[-j, n2] + wb4[-j, n2]))
    wb1[j, n2] <- matrix(x1$wb1[n1], length(j), m1, byrow = T)
    wb2[j, n2] <- matrix(x1$wb2[n1], length(j), m1, byrow = T)
    wb3[j, n2] <- matrix(x1$wb3[n1], length(j), m1, byrow = T)
    wb4[j, n2] <- matrix(x1$wb4[n1], length(j), m1, byrow = T)
    print(sum(colMeans(wb1[j, ] + wb2[j, ] + wb3[j, ] + wb4[j, ])))
    m2 <- m2 + m1
  }
  jj <- which(DATA$ANNO == t)
  xx <- wb1[jj, ] + wb2[jj, ] + wb3[jj, ] + wb4[jj, ]
  xx <- aggregate(xx, by = list(f = DATA$firm[jj]), mean)
  xx <- rowSums(xx[,-1])
  print(xx)
  print(m2)
  print(sum(xx))
}

## Random preference shock
set.seed(123); v <- matrix(rnorm(ns), nrow(DATA), ns, byrow = T)

## Save final sample
objs <- c(paste0('pt', pwinds), paste0('wb', pwinds), 'v')
save(list = objs, file = paste0(outpath, 'WB4.RData'))

# ____________________________________________________________________________ ####
# END ####