Define Libraries
library("stringr")
library("dplyr")
library("reshape2")
Define Path
dir.wrk <- getwd()
dir.data <- file.path(dir.wrk, "data/data_household")
dir.annot <- file.path(dir.wrk, "data/data_annotations")
dir.output <- file.path(dir.wrk, "data/data_processed")
dir.maps <- file.path(dir.wrk, "data/data_maps")
Define Files
file.household <- file.path(dir.output, "household_level_data_categorical.tsv")
Load Household Categorical Data
dat.household <- read.delim(file.household, header = TRUE, stringsAsFactors = FALSE)
dat.household <- dat.household %>% dplyr::mutate_all(as.character)
head(dat.household)
## household_id District GeoRegion Ethnicity IncomeGroup EducationLevel
## 1 12010100001101 Okhaldhunga Hilly Rai-Limbu 0-10000 Illiterate
## 2 12010100002101 Okhaldhunga Hilly Rai-Limbu 0-10000 Illiterate
## 3 12010100003101 Okhaldhunga Hilly Gurung-Magar 0-10000 Illiterate
## 4 12010100004101 Okhaldhunga Hilly Gurung-Magar 0-10000 Illiterate
## 5 12010100005101 Okhaldhunga Hilly Gurung-Magar 0-10000 Illiterate
## 6 12010100006101 Okhaldhunga Hilly Gurung-Magar 0-10000 Illiterate
## source_cooking_fuel_post_eq
## 1 Wood
## 2 Wood
## 3 Wood
## 4 Wood
## 5 Wood
## 6 Wood
Prepare Frequency Table by District and FuelType
df <- dat.household %>% dplyr::count(District, source_cooking_fuel_post_eq, sort = FALSE,
name = "Freq")
head(df)
## # A tibble: 6 x 3
## District source_cooking_fuel_post_eq Freq
## <chr> <chr> <int>
## 1 Dhading Electricity 16
## 2 Dhading Gobar Gas 1806
## 3 Dhading Kerosene 12
## 4 Dhading LP Gas 8895
## 5 Dhading Others 31
## 6 Dhading Wood 75585
Compute Ratio Tables
dm <- reshape2::dcast(data = df, formula = District ~ source_cooking_fuel_post_eq,
fun.aggregate = sum, value.var = "Freq")
dm <- as.data.frame(cbind(id = dm$District, apply(dm[, -1], 2, function(x) x/sum(x))))
dm[, 1:3]
## id Electricity Gobar Gas
## 1 Dhading 0.0846560846560847 0.20506415351425
## 2 Dolakha 0.142857142857143 0.00510957193141819
## 3 Gorkha 0.0899470899470899 0.174520267968661
## 4 Kavrepalanchok 0.0793650793650794 0.19950039741115
## 5 Makwanpur 0.0899470899470899 0.22754627001249
## 6 Nuwakot 0.111111111111111 0.0453048711252413
## 7 Okhaldhunga 0.00529100529100529 0.00408765754513455
## 8 Ramechhap 0.137566137566138 0.0054502100601794
## 9 Rasuwa 0.0264550264550265 0.00567730214602021
## 10 Sindhuli 0.0634920634920635 0.119904621323947
## 11 Sindhupalchok 0.169312169312169 0.00783467696150789
# WRITE OUTPUT ---
file.output <- file.path(dir.output, "maps_tbl_district_fueltype_ratio.tsv")
write.table(dm, file.output, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)
Prepare Frequency Table for Total Household Population
df <- dat.household %>% dplyr::count(District, sort = FALSE, name = "Freq")
colnames(df)[which(colnames(df) == "District")] <- "id"
df
## # A tibble: 11 x 2
## id Freq
## <chr> <int>
## 1 Dhading 86345
## 2 Dolakha 70495
## 3 Gorkha 75883
## 4 Kavrepalanchok 91895
## 5 Makwanpur 88365
## 6 Nuwakot 75429
## 7 Okhaldhunga 36112
## 8 Ramechhap 55253
## 9 Rasuwa 12380
## 10 Sindhuli 64908
## 11 Sindhupalchok 90072
# WRITE OUTPUT ---
file.output <- file.path(dir.output, "maps_tbl_district_total_household.tsv")
write.table(df, file.output, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)
Prepare Frequency Table by District and Ethnicity
df <- dat.household %>% dplyr::count(District, Ethnicity, sort = FALSE, name = "Freq")
dm <- reshape2::dcast(data = df, formula = District ~ Ethnicity, fun.aggregate = sum,
value.var = "Freq")
dm <- as.data.frame(cbind(id = dm$District, apply(dm[, -1], 2, function(x) x/sum(x))))
dm[, 1:3]
## id Brahman Chepang-Thami
## 1 Dhading 0.131838687628161 0.199207271236247
## 2 Dolakha 0.0686078833447255 0.357001298435044
## 3 Gorkha 0.118869902027797 0.0522107565092599
## 4 Kavrepalanchok 0.194932786511734 0.000888402924895783
## 5 Makwanpur 0.122469810890864 0.24738604524021
## 6 Nuwakot 0.137899293688767 6.83386865304449e-05
## 7 Okhaldhunga 0.0375757575757576 0.00013667737306089
## 8 Ramechhap 0.0269036226930964 0.0362195038611358
## 9 Rasuwa 0.0166416040100251 0
## 10 Sindhuli 0.0537252221462748 0.0102508029795667
## 11 Sindhupalchok 0.0905354294827979 0.0966309027540491
# WRITE OUTPUT ---
file.output <- file.path(dir.output, "maps_tbl_district_ethnicity_ratio.tsv")
write.table(dm, file.output, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)