Data Preparation to Generate Choropleth Maps

Define Libraries

library("stringr")
library("dplyr")
library("reshape2")

Define Path

dir.wrk <- getwd()
dir.data <- file.path(dir.wrk, "data/data_household")
dir.annot <- file.path(dir.wrk, "data/data_annotations")
dir.output <- file.path(dir.wrk, "data/data_processed")
dir.maps <- file.path(dir.wrk, "data/data_maps")

Define Files

file.household <- file.path(dir.output, "household_level_data_categorical.tsv")

Load Household Categorical Data

dat.household <- read.delim(file.household, header = TRUE, stringsAsFactors = FALSE)
dat.household <- dat.household %>% dplyr::mutate_all(as.character)

head(dat.household)

##     household_id    District GeoRegion    Ethnicity IncomeGroup EducationLevel
## 1 12010100001101 Okhaldhunga     Hilly    Rai-Limbu     0-10000     Illiterate
## 2 12010100002101 Okhaldhunga     Hilly    Rai-Limbu     0-10000     Illiterate
## 3 12010100003101 Okhaldhunga     Hilly Gurung-Magar     0-10000     Illiterate
## 4 12010100004101 Okhaldhunga     Hilly Gurung-Magar     0-10000     Illiterate
## 5 12010100005101 Okhaldhunga     Hilly Gurung-Magar     0-10000     Illiterate
## 6 12010100006101 Okhaldhunga     Hilly Gurung-Magar     0-10000     Illiterate
##   source_cooking_fuel_post_eq
## 1                        Wood
## 2                        Wood
## 3                        Wood
## 4                        Wood
## 5                        Wood
## 6                        Wood

Prepare Frequency Table by District and FuelType

df <- dat.household %>% dplyr::count(District, source_cooking_fuel_post_eq, sort = FALSE, 
    name = "Freq")

head(df)

## # A tibble: 6 x 3
##   District source_cooking_fuel_post_eq  Freq
##   <chr>    <chr>                       <int>
## 1 Dhading  Electricity                    16
## 2 Dhading  Gobar Gas                    1806
## 3 Dhading  Kerosene                       12
## 4 Dhading  LP Gas                       8895
## 5 Dhading  Others                         31
## 6 Dhading  Wood                        75585

Compute Ratio Tables

dm <- reshape2::dcast(data = df, formula = District ~ source_cooking_fuel_post_eq, 
    fun.aggregate = sum, value.var = "Freq")
dm <- as.data.frame(cbind(id = dm$District, apply(dm[, -1], 2, function(x) x/sum(x))))

dm[, 1:3]

##                id         Electricity           Gobar Gas
## 1         Dhading  0.0846560846560847    0.20506415351425
## 2         Dolakha   0.142857142857143 0.00510957193141819
## 3          Gorkha  0.0899470899470899   0.174520267968661
## 4  Kavrepalanchok  0.0793650793650794    0.19950039741115
## 5       Makwanpur  0.0899470899470899    0.22754627001249
## 6         Nuwakot   0.111111111111111  0.0453048711252413
## 7     Okhaldhunga 0.00529100529100529 0.00408765754513455
## 8       Ramechhap   0.137566137566138  0.0054502100601794
## 9          Rasuwa  0.0264550264550265 0.00567730214602021
## 10       Sindhuli  0.0634920634920635   0.119904621323947
## 11  Sindhupalchok   0.169312169312169 0.00783467696150789

# WRITE OUTPUT ---
file.output <- file.path(dir.output, "maps_tbl_district_fueltype_ratio.tsv")
write.table(dm, file.output, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)

Prepare Frequency Table for Total Household Population

df <- dat.household %>% dplyr::count(District, sort = FALSE, name = "Freq")
colnames(df)[which(colnames(df) == "District")] <- "id"

df

## # A tibble: 11 x 2
##    id              Freq
##    <chr>          <int>
##  1 Dhading        86345
##  2 Dolakha        70495
##  3 Gorkha         75883
##  4 Kavrepalanchok 91895
##  5 Makwanpur      88365
##  6 Nuwakot        75429
##  7 Okhaldhunga    36112
##  8 Ramechhap      55253
##  9 Rasuwa         12380
## 10 Sindhuli       64908
## 11 Sindhupalchok  90072

# WRITE OUTPUT ---
file.output <- file.path(dir.output, "maps_tbl_district_total_household.tsv")
write.table(df, file.output, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)

Prepare Frequency Table by District and Ethnicity

df <- dat.household %>% dplyr::count(District, Ethnicity, sort = FALSE, name = "Freq")

dm <- reshape2::dcast(data = df, formula = District ~ Ethnicity, fun.aggregate = sum, 
    value.var = "Freq")
dm <- as.data.frame(cbind(id = dm$District, apply(dm[, -1], 2, function(x) x/sum(x))))

dm[, 1:3]

##                id            Brahman        Chepang-Thami
## 1         Dhading  0.131838687628161    0.199207271236247
## 2         Dolakha 0.0686078833447255    0.357001298435044
## 3          Gorkha  0.118869902027797   0.0522107565092599
## 4  Kavrepalanchok  0.194932786511734 0.000888402924895783
## 5       Makwanpur  0.122469810890864     0.24738604524021
## 6         Nuwakot  0.137899293688767 6.83386865304449e-05
## 7     Okhaldhunga 0.0375757575757576  0.00013667737306089
## 8       Ramechhap 0.0269036226930964   0.0362195038611358
## 9          Rasuwa 0.0166416040100251                    0
## 10       Sindhuli 0.0537252221462748   0.0102508029795667
## 11  Sindhupalchok 0.0905354294827979   0.0966309027540491

# WRITE OUTPUT ---
file.output <- file.path(dir.output, "maps_tbl_district_ethnicity_ratio.tsv")
write.table(dm, file.output, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)