Define Libraries

library("stringr")
library("dplyr")
library("ggplot2")
library("gridExtra")
library("knitr")

Define Path

dir.wrk <- getwd()
dir.data <- file.path(dir.wrk, "dataset/data")
dir.annot <- file.path(dir.wrk, "dataset/annotation")

Define Files

file.dat <- file.path(dir.data, "mastertbl_household_reconstruction.tsv")

Load Data

dat <- read.delim(file.dat, header = TRUE, stringsAsFactors = FALSE)

# FACTORIZE ---
dat$DISTRICT <- factor(dat$DISTRICT, levels = c("Dhading", "Sindhupalchowk"))
dat$VDC_MUN <- factor(dat$VDC_MUN, levels = c("Gumdi", "Sertung", "Batase", "Pangtang"))
dat$WARD <- as.factor(dat$WARD)
dat$SEX <- factor(dat$SEX, levels = c("Male", "Female"))
dat$ETHNICITY <- factor(dat$ETHNICITY, levels = c("Bahun", "Chhetri", "Dalit-Others", 
    "Gurung-Magar", "Newar", "Tamang"))
dat$EDUCATION_LEVEL <- factor(dat$EDUCATION_LEVEL, levels = c("Illiterate", "Primary", 
    "Secondary", "University"))
dat$OCCUPATION <- factor(dat$OCCUPATION, levels = c("Agriculture", "Business", "Goverment Employee", 
    "Labour", "Teacher"))
dat$INCOME_LEVEL <- factor(dat$INCOME_LEVEL, levels = c("0-2.5K", "2.5-10K", "10-20K", 
    "20-40K", "40-60K", "60K-Above"))
dat$RECON_COMPLETED <- as.factor(dat$RECON_COMPLETED)
dat$FREEDOM_CHOICE <- as.factor(dat$FREEDOM_CHOICE)

# str(dat)



Generate Basic Statistics


Overall Satisfaction Level

dm <- dat %>% 
          dplyr::summarize(Satisfaction_Mean=round(mean(SATISFACTION_LEVEL),3), 
                    Satisfaction_Median=round(median(SATISFACTION_LEVEL),3), 
                    Satisfaction_SD=round(sd(SATISFACTION_LEVEL),3))

knitr::kable(dm)
Satisfaction_Mean Satisfaction_Median Satisfaction_SD
6.764 7 2.052


Distribution by Location

df11 <- dat %>% 
          dplyr::count(DISTRICT, sort=FALSE, name="Number_of_Households")

df12 <- dat %>% 
          dplyr::select(DISTRICT, SATISFACTION_LEVEL) %>%
          group_by(DISTRICT) %>%
          summarize(Satisfaction_Mean=round(mean(SATISFACTION_LEVEL),3), 
                    Satisfaction_Median=round(median(SATISFACTION_LEVEL),3), 
                    Satisfaction_SD=round(sd(SATISFACTION_LEVEL),3))

df1 <- merge(df11, df12, by="DISTRICT")

knitr::kable(df1)
DISTRICT Number_of_Households Satisfaction_Mean Satisfaction_Median Satisfaction_SD
Dhading 197 6.569 6 2.129
Sindhupalchowk 184 6.973 7 1.951


Distribution by Sex

df21 <- dat %>% 
          dplyr::count(SEX, sort=FALSE, name="Number_of_Households")

df22 <- dat %>% 
          dplyr::select(SEX, SATISFACTION_LEVEL) %>%
          group_by(SEX) %>%
          summarize(Satisfaction_Mean=round(mean(SATISFACTION_LEVEL),3), 
                    Satisfaction_Median=round(median(SATISFACTION_LEVEL),3), 
                    Satisfaction_SD=round(sd(SATISFACTION_LEVEL),3))

df2 <- merge(df21, df22, by="SEX")

knitr::kable(df2)
SEX Number_of_Households Satisfaction_Mean Satisfaction_Median Satisfaction_SD
Female 162 6.667 6 2.003
Male 219 6.836 7 2.090


Distribution by Ethnicity

df31 <- dat %>% 
          dplyr::count(ETHNICITY, sort=FALSE, name="Number_of_Households")

df32 <- dat %>% 
          dplyr::select(ETHNICITY, SATISFACTION_LEVEL) %>%
          group_by(ETHNICITY) %>%
          summarize(Satisfaction_Mean=round(mean(SATISFACTION_LEVEL),3), 
                    Satisfaction_Median=round(median(SATISFACTION_LEVEL),3), 
                    Satisfaction_SD=round(sd(SATISFACTION_LEVEL),3))

df3 <- merge(df31, df32, by="ETHNICITY")

knitr::kable(df3)
ETHNICITY Number_of_Households Satisfaction_Mean Satisfaction_Median Satisfaction_SD
Bahun 40 7.725 8 1.783
Chhetri 52 7.250 7 1.969
Dalit-Others 47 6.830 6 2.099
Gurung-Magar 81 6.741 7 2.011
Newar 36 7.167 7 1.828
Tamang 125 6.128 6 2.067


Distribution by Income Level

df41 <- dat %>% 
          dplyr::count(INCOME_LEVEL, sort=FALSE, name="Number_of_Households")

df42 <- dat %>% 
          dplyr::select(INCOME_LEVEL, SATISFACTION_LEVEL) %>%
          group_by(INCOME_LEVEL) %>%
          summarize(Satisfaction_Mean=round(mean(SATISFACTION_LEVEL),3), 
                    Satisfaction_Median=round(median(SATISFACTION_LEVEL),3), 
                    Satisfaction_SD=round(sd(SATISFACTION_LEVEL),3))

df4 <- merge(df41, df42, by="INCOME_LEVEL")

knitr::kable(df4)
INCOME_LEVEL Number_of_Households Satisfaction_Mean Satisfaction_Median Satisfaction_SD
0-2.5K 31 7.355 8 1.976
10-20K 120 6.892 7 2.233
2.5-10K 148 6.527 6 2.065
20-40K 70 6.829 7 1.668
40-60K 8 6.750 7 1.488
60K-Above 4 6.000 6 3.266


Distribution by Education Level

df51 <- dat %>% 
          dplyr::count(EDUCATION_LEVEL, sort=FALSE, name="Number_of_Households")

df52 <- dat %>% 
          dplyr::select(EDUCATION_LEVEL, SATISFACTION_LEVEL) %>%
          group_by(EDUCATION_LEVEL) %>%
          summarize(Satisfaction_Mean=round(mean(SATISFACTION_LEVEL),3), 
                    Satisfaction_Median=round(median(SATISFACTION_LEVEL),3), 
                    Satisfaction_SD=round(sd(SATISFACTION_LEVEL),3))

df5 <- merge(df51, df52, by="EDUCATION_LEVEL")

knitr::kable(df5)
EDUCATION_LEVEL Number_of_Households Satisfaction_Mean Satisfaction_Median Satisfaction_SD
Illiterate 178 6.539 6 2.072
Primary 80 7.000 7 1.889
Secondary 117 6.949 7 2.133
University 6 6.667 7 1.633


Distribution by Reconstruction Completed Level

df61 <- dat %>% 
          dplyr::count(RECON_COMPLETED, sort=FALSE, name="Number_of_Households")

df62 <- dat %>% 
          dplyr::select(RECON_COMPLETED, SATISFACTION_LEVEL) %>%
          group_by(RECON_COMPLETED) %>%
          summarize(Satisfaction_Mean=round(mean(SATISFACTION_LEVEL),3), 
                    Satisfaction_Median=round(median(SATISFACTION_LEVEL),3), 
                    Satisfaction_SD=round(sd(SATISFACTION_LEVEL),3))

df6 <- merge(df61, df62, by="RECON_COMPLETED")

knitr::kable(df6)
RECON_COMPLETED Number_of_Households Satisfaction_Mean Satisfaction_Median Satisfaction_SD
100 292 6.818 7.0 2.089
20 24 6.208 5.5 2.146
40 14 6.429 6.5 2.102
60 7 6.000 6.0 1.000
80 44 6.932 7.0 1.848


Distribution by Freedom of Choice

df71 <- dat %>% 
          dplyr::count(FREEDOM_CHOICE, sort=FALSE, name="Number_of_Households")

df72 <- dat %>% 
          dplyr::select(FREEDOM_CHOICE, SATISFACTION_LEVEL) %>%
          group_by(FREEDOM_CHOICE) %>%
          summarize(Satisfaction_Mean=round(mean(SATISFACTION_LEVEL),3), 
                    Satisfaction_Median=round(median(SATISFACTION_LEVEL),3), 
                    Satisfaction_SD=round(sd(SATISFACTION_LEVEL),3))

df7 <- merge(df71, df72, by="FREEDOM_CHOICE")
df7$FREEDOM_CHOICE <- as.numeric(df7$FREEDOM_CHOICE)
df7 <- df7[order(df7$FREEDOM_CHOICE, decreasing=FALSE),]

knitr::kable(df7)
FREEDOM_CHOICE Number_of_Households Satisfaction_Mean Satisfaction_Median Satisfaction_SD
1 1 36 5.278 5.0 2.146
3 2 8 6.000 6.5 3.423
4 3 20 5.800 6.0 1.642
5 4 40 5.950 5.0 1.709
6 5 74 6.014 5.0 1.716
7 6 55 6.836 7.0 1.330
8 7 57 7.368 7.0 1.447
9 8 21 7.810 8.0 1.721
10 9 21 8.619 9.0 1.564
2 10 49 8.143 10.0 2.398


Distribution of Satisfaction Level

p1 <- ggplot(dat, aes(x=SATISFACTION_LEVEL)) +
        geom_histogram(aes(y=..density..), position="identity", color="#FFFFFF", fill="#377eb8", bins=10) +
        scale_x_continuous(breaks=seq(0,10,by=1)) +
        theme(
          axis.text.x = element_text(size = 10, color="#000000"),
          axis.text.y = element_text(size = 10, color="#000000"),
          axis.title.x = element_text(size = 10, color="#000000"),
          axis.title.y = element_text(size = 10, color="#000000"),
          plot.title = element_text(size = 10, color="#000000", hjust=0.5),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.ticks = element_line(size=0.4, color="#000000"), 
          strip.text = element_text(size=10, color="#000000"),
          strip.background = element_rect(fill="#FFFFFF", color="#FFFFFF"),
          panel.background = element_rect(fill="#FFFFFF", color="#000000"),
          legend.text = element_text(size = 10, color="#000000"),
          legend.title = element_blank(),
          legend.key.size = unit(0.5, "cm"),
          legend.position = "none") +
        ylab("Probability  Density") +
        xlab("Satisfaction Level") + 
        ggtitle("")
p1

Distribution of Satisfaction Level (by Ethnicity)

p2 <- ggplot(dat, aes(x=SATISFACTION_LEVEL)) +
        geom_histogram(aes(y=..density..), position="identity", color="#FFFFFF", fill="#377eb8", bins=10) +
        scale_x_continuous(breaks=seq(0,10,by=1)) +
        facet_wrap(~ETHNICITY, nrow=2, scales="fixed") +
        theme(
          axis.text.x = element_text(size = 10, color="#000000"),
          axis.text.y = element_text(size = 10, color="#000000"),
          axis.title.x = element_text(size = 10, color="#000000"),
          axis.title.y = element_text(size = 10, color="#000000"),
          plot.title = element_text(size = 10, color="#000000", hjust=0.5),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.ticks = element_line(size=0.4, color="#000000"), 
          strip.text = element_text(size=10, color="#000000", hjust=0.5),
          strip.background = element_rect(fill="#FFFFFF", color="#FFFFFF"),
          panel.background = element_rect(fill="#FFFFFF", color="#000000"),
          legend.text = element_text(size = 10, color="#000000"),
          legend.title = element_blank(),
          legend.key.size = unit(0.5, "cm"),
          legend.position = "none") +
        ylab("Probability  Density") +
        xlab("Satisfaction Level") + 
        ggtitle("")
p2

Distribution of Satisfaction Level (by Sex)

p3 <- ggplot(dat, aes(x=SATISFACTION_LEVEL)) +
        geom_histogram(aes(y=..density..), position="identity", color="#FFFFFF", fill="#377eb8", bins=10) +
        scale_x_continuous(breaks=seq(0,10,by=1)) +
        facet_wrap(~SEX, nrow=1, scales="fixed") +
        theme(
          axis.text.x = element_text(size = 10, color="#000000"),
          axis.text.y = element_text(size = 10, color="#000000"),
          axis.title.x = element_text(size = 10, color="#000000"),
          axis.title.y = element_text(size = 10, color="#000000"),
          plot.title = element_text(size = 10, color="#000000", hjust=0.5),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.ticks = element_line(size=0.4, color="#000000"), 
          strip.text = element_text(size=10, color="#000000", hjust=0.5),
          strip.background = element_rect(fill="#FFFFFF", color="#FFFFFF"),
          panel.background = element_rect(fill="#FFFFFF", color="#000000"),
          legend.text = element_text(size = 10, color="#000000"),
          legend.title = element_blank(),
          legend.key.size = unit(0.5, "cm"),
          legend.position = "none") +
        ylab("Probability  Density") +
        xlab("Satisfaction Level") + 
        ggtitle("")
p3

Distribution of Satisfaction Level (by Ethnicity and Sex)

p4 <- ggplot(dat, aes(x=SATISFACTION_LEVEL)) +
        geom_histogram(aes(y=..density..), position="identity", color="#FFFFFF", fill="#377eb8", bins=10) +
        scale_x_continuous(breaks=seq(0,10,by=1)) +
        facet_grid(SEX~ETHNICITY, scales="fixed") +
        theme(
          axis.text.x = element_text(size = 10, color="#000000"),
          axis.text.y = element_text(size = 10, color="#000000"),
          axis.title.x = element_text(size = 10, color="#000000"),
          axis.title.y = element_text(size = 10, color="#000000"),
          plot.title = element_text(size = 10, color="#000000", hjust=0.5),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.ticks = element_line(size=0.4, color="#000000"), 
          strip.text = element_text(size=10, color="#000000", hjust=0.5),
          strip.background = element_rect(fill="#FFFFFF", color="#FFFFFF"),
          panel.background = element_rect(fill="#FFFFFF", color="#000000"),
          legend.text = element_text(size = 10, color="#000000"),
          legend.title = element_blank(),
          legend.key.size = unit(0.5, "cm"),
          legend.position = "none") +
        ylab("Probability  Density") +
        xlab("Satisfaction Level") + 
        ggtitle("")
p4