Define Libraries

library("stringr")
library("dplyr")
library("reshape2")
library("rmarkdown")

Define Path

dir.wrk <- getwd()
dir.data <- file.path(dir.wrk, "dataset/data")
dir.annot <- file.path(dir.wrk, "dataset/annotation")

Define Files

file.dat <- file.path(dir.data, "reconstruction_data.csv")
file.eth <- file.path(dir.annot, "groups_ethnicity.tsv")
file.edu <- file.path(dir.annot, "groups_education.tsv")
file.job <- file.path(dir.annot, "groups_occupation.tsv")
file.income <- file.path(dir.annot, "groups_income.tsv")

Load Data

dat <- read.csv(file.dat, header = TRUE, stringsAsFactors = FALSE)

colnames(dat)
##  [1] "UNIQUE_ID"                "DATE"                    
##  [3] "DISTRICT_CURRENT"         "VDC_MUNICIPALITY"        
##  [5] "WARD_CURRENT"             "CASTE_ETHNICITY"         
##  [7] "SEX_OF_RESPONDENTS"       "AGE_OF_RESPONDENTS"      
##  [9] "EDUCATION_OF_RESPONDENTS" "WALKING_DISTANCE"        
## [11] "Household_Size"           "Highest_Education"       
## [13] "Primary_Occupation"       "Income"                  
## [15] "Damage_Category"          "Months_Recon"            
## [17] "Recon_Completed"          "Amount_Spent"            
## [19] "Govt_Grant"               "Home_Safe"               
## [21] "Satisfaction_Level"       "Freedom_Choice"

Add District & VDC-Municipality

dat$VDC_MUN <- dat$DISTRICT <- NA
dat$DISTRICT[which(dat$DISTRICT_CURRENT == 1)] <- "Sindhupalchowk"
dat$DISTRICT[which(dat$DISTRICT_CURRENT == 2)] <- "Dhading"

dat$VDC_MUN[which(dat$VDC_MUNICIPALITY == 1)] <- "Gumdi"
dat$VDC_MUN[which(dat$VDC_MUNICIPALITY == 2)] <- "Sertung"
dat$VDC_MUN[which(dat$VDC_MUNICIPALITY == 3)] <- "Batase"
dat$VDC_MUN[which(dat$VDC_MUNICIPALITY == 4)] <- "Pangtang"

Add Sex

dat$SEX <- NA
dat$SEX[which(dat$SEX_OF_RESPONDENTS == 1)] <- "Male"
dat$SEX[which(dat$SEX_OF_RESPONDENTS == 2)] <- "Female"
dat$SEX[which(dat$SEX_OF_RESPONDENTS == 3)] <- "Other"

Add Ethnicity

dat.eth <- read.delim(file.eth, header = TRUE, stringsAsFactors = FALSE)

dat$ETHNICITY <- NA
for (i in 1:nrow(dat.eth)) {
    index <- which(dat$CASTE_ETHNICITY == dat.eth$CasteID[i])
    dat$ETHNICITY[index] <- dat.eth$Ethnicity[i]
}

Add EducationLevel

dat.edu <- read.delim(file.edu, header = TRUE, stringsAsFactors = FALSE)

dat$HIGHEST_EDUCATION <- dat$EDUCATION_LEVEL <- NA
for (i in 1:nrow(dat.edu)) {
    index1 <- which(dat$EDUCATION_OF_RESPONDENTS == dat.edu$EduLevelID[i])
    if (length(index1) != 0) {
        dat$EDUCATION_LEVEL[index1] <- dat.edu$EducationLevel[i]
    }

    index2 <- which(dat$Highest_Education == dat.edu$EduLevelID[i])
    if (length(index2) != 0) {
        dat$HIGHEST_EDUCATION[index2] <- dat.edu$EducationLevel[i]
    }
}

Add Occupation

dat.job <- read.delim(file.job, header = TRUE, stringsAsFactors = FALSE)

dat$OCCUPATION <- NA
for (i in 1:nrow(dat.job)) {
    index <- which(dat$Primary_Occupation == dat.job$OccupationID[i])
    dat$OCCUPATION[index] <- dat.job$OccupationGroup[i]
}
table(dat$Income, dat$HIGHEST_EDUCATION)
##    
##     Illiterate Primary Secondary University
##   1          7       3        14          1
##   2         11       8        96          9
##   3          6       9        88         12
##   4          1       4        43         13
##   5          0       0         1          7
##   6          0       0         1          3
##   7          4       5        30          5
d <- subset(dat, dat$Income == 7)
table(d$EDUCATION_LEVEL, d$HIGHEST_EDUCATION)
##             
##              Illiterate Primary Secondary University
##   Illiterate          2       4        19          3
##   Primary             1       1         2          2
##   Secondary           1       0         9          0

Impute Income Level Evaluating Education Level of Household

dat$Income[which((dat$Income == 7) & (dat$HIGHEST_EDUCATION == "Illiterate") & (dat$EDUCATION_LEVEL == 
    "Illiterate"))] <- 1
dat$Income[which((dat$Income == 7) & (dat$HIGHEST_EDUCATION == "Illiterate") & (dat$EDUCATION_LEVEL == 
    "Primary"))] <- 2
dat$Income[which((dat$Income == 7) & (dat$HIGHEST_EDUCATION == "Illiterate") & (dat$EDUCATION_LEVEL == 
    "Secondary"))] <- 3

dat$Income[which((dat$Income == 7) & (dat$HIGHEST_EDUCATION == "Primary") & (dat$EDUCATION_LEVEL == 
    "Illiterate"))] <- 1
dat$Income[which((dat$Income == 7) & (dat$HIGHEST_EDUCATION == "Primary") & (dat$EDUCATION_LEVEL == 
    "Primary"))] <- 2

dat$Income[which((dat$Income == 7) & (dat$HIGHEST_EDUCATION == "Secondary") & (dat$EDUCATION_LEVEL == 
    "Illiterate"))] <- 2
dat$Income[which((dat$Income == 7) & (dat$HIGHEST_EDUCATION == "Secondary") & (dat$EDUCATION_LEVEL == 
    "Primary"))] <- 3
dat$Income[which((dat$Income == 7) & (dat$HIGHEST_EDUCATION == "Secondary") & (dat$EDUCATION_LEVEL == 
    "Secondary"))] <- 4

dat$Income[which((dat$Income == 7) & (dat$HIGHEST_EDUCATION == "University") & (dat$EDUCATION_LEVEL == 
    "Illiterate"))] <- 2
dat$Income[which((dat$Income == 7) & (dat$HIGHEST_EDUCATION == "University") & (dat$EDUCATION_LEVEL == 
    "Primary"))] <- 3

# table(dat$Income)

Add Income

dat.income <- read.delim(file.income, header = TRUE, stringsAsFactors = FALSE)

dat$INCOME_LEVEL <- NA
for (i in 1:nrow(dat.income)) {
    index <- which(dat$Income == dat.income$IncomeID[i])
    dat$INCOME_LEVEL[index] <- dat.income$IncomeGroup[i]
}

Trim Data

dat$Amount_Spent[which(dat$Amount_Spent == -1)] <- NA

items1 <- c("UNIQUE_ID", "DATE", "DISTRICT", "VDC_MUN", "WARD_CURRENT", "SEX", "AGE_OF_RESPONDENTS", 
    "ETHNICITY", "EDUCATION_LEVEL", "HIGHEST_EDUCATION", "WALKING_DISTANCE", "Household_Size", 
    "OCCUPATION", "INCOME_LEVEL", "Damage_Category", "Recon_Completed", "Months_Recon", 
    "Govt_Grant", "Amount_Spent", "Home_Safe", "Satisfaction_Level", "Freedom_Choice")

# setdiff(colnames(dat), items)
dat <- subset(dat, select = items1)

items2 <- c("UNIQUE_ID", "DATE", "DISTRICT", "VDC_MUN", "WARD", "SEX", "AGE", "ETHNICITY", 
    "EDUCATION_LEVEL", "HIGHEST_EDUCATION", "WALKING_DISTANCE", "HOUSEHOLD_SIZE", 
    "OCCUPATION", "INCOME_LEVEL", "DAMAGE_CATEGORY", "RECON_COMPLETED", "RECON_MONTHS", 
    "GOVT_GRANT", "AMOUNT_SPENT", "HOME_SAFE", "SATISFACTION_LEVEL", "FREEDOM_CHOICE")

colnames(dat) <- items2

View Data

rmarkdown::paged_table(dat, options = list(rows.print = 20))

Write Output

file.output <- file.path(dir.data, "mastertbl_household_reconstruction.tsv")
write.table(dat, file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)