Following the 7.8 Mw Gorkha Earthquake in Nepal on April 25, 2015, the Kathmandu Living Labs in collaboration with the National Planning Commission (the Government of Nepal), carried out the largest household survey ever done in Nepal using mobile technology. Although the primary objective of this survey was to assess damages inflicted by the quake and identify beneficiaries eligible for government’s housing reconstruction grants, the data contain many other kinds of valuable socio-economic information, including the types of fuel used by households for cooking and lighting from 11 of the most earthquake-affected districts of Nepal, excluding the Kathmandu valley.
The data for all 11 districts were downloaded from the 2015 Nepal Earthquake: Open Data Portal (http://eq2015.npc.gov.np/). For this study, we use the following two data files:
We restrict our analysis to four key socio-economic and demographic information of the households (ethnicity, income, education, and location) and their post-earthquake sources of cooking fuel.
Ethnicity: The dataset includes the types of cooking fuel used by 96 different social groups found in the area. Since the relative population sizes of many of these groups are small, we regroup them together to form a larger group with comparable population size; we also merge some of them with the larger major groups to which they are closer culturally or linguistically. With this regrouping, we obtain 10 major ethnic groups:
Education Level: The education level of the household heads range from illiterate to Doctoral degree. We regroup the education attainment into 5 levels:
Geo-climatic Region: Similarly, the data provide information on municipality (and ward-level) location which allows us to divide the entire earthquake-affected 11 contiguous districts into two geo-climatic regions:
Income Group: As for income levels (Rs. 110 = USD 1, approx.), we follow the original dataset’s categorization that breaks the monthly income of the households into 5 levels:
library("stringr")
library("dplyr")
library("reshape2")
dir.wrk <- getwd()
dir.data <- file.path(dir.wrk, "data/data_household")
dir.annot <- file.path(dir.wrk, "data/data_annotations")
dir.output <- file.path(dir.wrk, "data/data_processed")
file.household_demo <- file.path(dir.data, "csv_household_demographics.csv.gz")
file.household_res <- file.path(dir.data, "csv_household_resources.csv.gz")
file.map_loc <- file.path(dir.data, "ward_vdcmun_district_name_mapping.csv.gz")
file.eth_grp <- file.path(dir.annot, "group_ethnicity.tsv")
file.edu_grp <- file.path(dir.annot, "group_education.tsv")
file.geo_grp <- file.path(dir.annot, "group_georegion.tsv")
dat.map <- read.csv(file.map_loc, header = TRUE, stringsAsFactors = FALSE)
dat.map <- dat.map[!duplicated(dat.map), ]
dat.map$ward_num <- unlist(lapply(str_split(dat.map$ward_id, ""), function(x) as.numeric(paste(x[length(x) -
1], x[length(x)], sep = ""))))
lookup_dist <- subset(dat.map, select = c("district_id", "district_name"))
lookup_dist <- lookup_dist[!duplicated(lookup_dist), ]
head(lookup_dist)
## district_id district_name
## 1 12 Okhaldhunga
## 76 20 Sindhuli
## 155 21 Ramechhap
## 219 22 Dolakha
## 293 23 Sindhupalchok
## 398 24 Kavrepalanchok
dat.eth <- read.delim(file.eth_grp, header = TRUE, stringsAsFactors = FALSE)
dat.edu <- read.delim(file.edu_grp, header = TRUE, stringsAsFactors = FALSE)
dat.geo <- read.delim(file.geo_grp, header = TRUE, stringsAsFactors = FALSE)
head(dat.geo)
## district_id district_name vdcmun_id vdcmun_name
## 1 12 Okhaldhunga 1201 Champadevi Rural Municipality
## 2 12 Okhaldhunga 1202 Chishankhu Gadhi Rural Municipality
## 3 12 Okhaldhunga 1203 Khiji Demba Rural Municipality
## 4 12 Okhaldhunga 1204 Likhu Rural Municipality
## 5 12 Okhaldhunga 1205 Manebhanjyang Rural Municipality
## 6 12 Okhaldhunga 1206 Molung Rural Municipality
## Region
## 1 Hilly
## 2 Hilly
## 3 Hilly
## 4 Hilly
## 5 Hilly
## 6 Hilly
dat.household_demo <- read.csv(file.household_demo, header = TRUE, stringsAsFactors = FALSE)
dat.household_res <- read.csv(file.household_res, header = TRUE, stringsAsFactors = FALSE)
### MERGE DATA ---
dat.household <- Reduce(function(x, y) merge(x, y, all = TRUE), list(dat.household_demo,
dat.household_res, by = "household_id"))
### TRIM DATA ---
items <- c("household_id", "district_id", "vdcmun_id", "source_cooking_fuel_post_eq",
"caste_household", "income_level_household", "education_level_household_head")
dat.household <- subset(dat.household, select = items)
dat.household$District <- NA
for (i in 1:nrow(lookup_dist)) {
index <- which(dat.household$district_id == lookup_dist$district_id[i])
dat.household$District[index] <- lookup_dist$district_name[i]
}
dat.household$GeoRegion <- NA
for (i in 1:nrow(dat.geo)) {
index <- which(dat.household$vdcmun_id == dat.geo$vdcmun_id[i])
dat.household$GeoRegion[index] <- dat.geo$Region[i]
}
dat.household$IncomeGroup <- NA
dat.household$IncomeGroup[which(dat.household$income_level_household == "Rs. 10 thousand")] <- "0-10000"
dat.household$IncomeGroup[which(dat.household$income_level_household == "Rs. 10-20 thousand")] <- "10000-20000"
dat.household$IncomeGroup[which(dat.household$income_level_household == "Rs. 20-30 thousand")] <- "20000-30000"
dat.household$IncomeGroup[which(dat.household$income_level_household == "Rs. 30-50 thousand")] <- "30000-50000"
dat.household$IncomeGroup[which(dat.household$income_level_household == "Rs. 50 thousand or more")] <- "50000-ABOVE"
dat.household$Ethnicity <- NA
for (i in 1:nrow(dat.eth)) {
index <- which(dat.household$caste_household == dat.eth$Caste[i])
dat.household$Ethnicity[index] <- dat.eth$Ethnicity[i]
}
dat.household$EducationLevel <- NA
for (i in 1:nrow(dat.edu)) {
index <- which(dat.household$education_level_household_head == dat.edu$EducationLevel[i])
dat.household$EducationLevel[index] <- dat.edu$EducationGroup[i]
}
dat.household <- dat.household[-which(is.na(dat.household$Ethnicity)), ]
### TRIM DATA ---
items <- c("household_id", "District", "GeoRegion", "Ethnicity", "IncomeGroup", "EducationLevel",
"source_cooking_fuel_post_eq")
dat.household <- subset(dat.household, select = items)
dat.household <- dat.household %>% dplyr::mutate_all(as.character)
head(dat.household)
## household_id District GeoRegion Ethnicity IncomeGroup EducationLevel
## 1 12010100001101 Okhaldhunga Hilly Rai-Limbu 0-10000 Illiterate
## 2 12010100002101 Okhaldhunga Hilly Rai-Limbu 0-10000 Illiterate
## 3 12010100003101 Okhaldhunga Hilly Gurung-Magar 0-10000 Illiterate
## 4 12010100004101 Okhaldhunga Hilly Gurung-Magar 0-10000 Illiterate
## 5 12010100005101 Okhaldhunga Hilly Gurung-Magar 0-10000 Illiterate
## 6 12010100006101 Okhaldhunga Hilly Gurung-Magar 0-10000 Illiterate
## source_cooking_fuel_post_eq
## 1 Wood
## 2 Wood
## 3 Wood
## 4 Wood
## 5 Wood
## 6 Wood
# WRITE OUTPUT ---
file.output <- file.path(dir.output, "household_level_data_categorical.tsv")
write.table(dat.household, file.output, sep = "\t", row.names = FALSE, col.names = TRUE,
quote = FALSE)
df <- dat.household %>% dplyr::count(Ethnicity, EducationLevel, GeoRegion, IncomeGroup,
source_cooking_fuel_post_eq, sort = FALSE, name = "Freq")
head(df)
## # A tibble: 6 x 6
## Ethnicity EducationLevel GeoRegion IncomeGroup source_cooking_fuel_post~ Freq
## <chr> <chr> <chr> <chr> <chr> <int>
## 1 Brahman Illiterate Hilly 0-10000 Gobar Gas 159
## 2 Brahman Illiterate Hilly 0-10000 Kerosene 5
## 3 Brahman Illiterate Hilly 0-10000 LP Gas 375
## 4 Brahman Illiterate Hilly 0-10000 Others 3
## 5 Brahman Illiterate Hilly 0-10000 Wood 7932
## 6 Brahman Illiterate Hilly 10000-20000 Gobar Gas 213
dkey <- apply(df, 1, function(x) paste(x[1:5], collapse = ":"))
var1 <- setdiff(unique(df$Ethnicity), NA)
var2 <- setdiff(unique(df$EducationLevel), NA)
var3 <- setdiff(unique(df$GeoRegion), NA)
var4 <- setdiff(unique(df$IncomeGroup), NA)
var5 <- setdiff(unique(df$source_cooking_fuel_post_eq), NA)
dcom <- expand.grid(var1, var2, var3, var4, var5)
dcom$key <- apply(dcom, 1, function(x) paste(x[1:5], collapse = ":"))
var.missed <- setdiff(dcom$key, dkey)
dcom <- subset(dcom, dcom$key %in% var.missed)
dcom <- dcom[, -6]
dcom$Freq <- 0
colnames(dcom) <- colnames(df)
# MERGE DATA WITH RESCUED COMBINATION ---
df <- rbind(df, dcom)
df <- df[order(df$Ethnicity, df$EducationLevel, df$GeoRegion, df$IncomeGroup, df$source_cooking_fuel_post_eq),
]
dim(df)
## [1] 3000 6
tail(df)
## # A tibble: 6 x 6
## Ethnicity EducationLevel GeoRegion IncomeGroup source_cooking_fuel_post~ Freq
## <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 Tamang University Himalayan 50000-ABOVE Electricity 0
## 2 Tamang University Himalayan 50000-ABOVE Gobar Gas 0
## 3 Tamang University Himalayan 50000-ABOVE Kerosene 0
## 4 Tamang University Himalayan 50000-ABOVE LP Gas 12
## 5 Tamang University Himalayan 50000-ABOVE Others 0
## 6 Tamang University Himalayan 50000-ABOVE Wood 11
# WRITE OUTPUT ---
file.output <- file.path(dir.output, "household_level_data_frequency.tsv")
write.table(df, file.output, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)
df <- subset(df, !(is.na(df$Ethnicity)))
df <- subset(df, !(is.na(df$source_cooking_fuel_post_eq)))
dm <- reshape2::dcast(data = df, formula = Ethnicity + EducationLevel + GeoRegion +
IncomeGroup ~ source_cooking_fuel_post_eq, fun.aggregate = sum, value.var = "Freq")
colnames(dm) <- c("Ethnicity", "EducationLevel", "GeoRegion", "IncomeGroup", "Electricity",
"GobarGas", "Kerosene", "LPGas", "Others", "Wood")
head(dm)
## Ethnicity EducationLevel GeoRegion IncomeGroup Electricity GobarGas Kerosene
## 1 Brahman Illiterate Hilly 0-10000 0 159 5
## 2 Brahman Illiterate Hilly 10000-20000 0 213 1
## 3 Brahman Illiterate Hilly 20000-30000 1 95 0
## 4 Brahman Illiterate Hilly 30000-50000 0 27 0
## 5 Brahman Illiterate Hilly 50000-ABOVE 0 9 0
## 6 Brahman Illiterate Himalayan 0-10000 1 24 1
## LPGas Others Wood
## 1 375 3 7932
## 2 666 1 3245
## 3 336 0 1038
## 4 109 0 355
## 5 69 1 156
## 6 118 4 5608
# WRITE OUTPUT ---
file.output <- file.path(dir.output, "household_level_data_frequency_table.tsv")
write.table(dm, file.output, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)