library("stringr")
library("dplyr")
library("reshape2")
library("ggplot2")
library("RColorBrewer")
library("vegan")
library("RVAideMemoire")
dir.wrk <- getwd()
dir.data <- file.path(dir.wrk, "data/data_household")
dir.annot <- file.path(dir.wrk, "data/data_annotations")
dir.process <- file.path(dir.wrk, "data/data_processed")
dir.output <- file.path(dir.wrk, "output")
file.dat <- file.path(dir.process, "household_level_data_frequency_table.tsv")
type_eth <- c("Newar","Brahman","Madhesi","Chettri","Muslim−Others",
"Gurung−Magar","Dalit","Rai−Limbu","Tamang","Chepang−Thami")
type_fuel <- c("Wood","Electricity","BioGas","Keroscene","LPGas","Others")
type_inc <- c("0-10000","10000-20000","20000-30000","30000-50000","50000-ABOVE")
type_edu <- c("Illiterate","NonFormal-Other","Primary","Secondary","University")
type_geo <- c("Himalayan","Hilly")
cpalette.eth <- c("#e31a1c","#a6cee3","#1f78b4","#b2df8a","#33a02c",
"#fb9a99","#fdbf6f","#ff7f00","#cab2d6","#6a3d9a")
cpalette.inc <- c("#fdd49e","#fdbb84","#fc8d59","#e34a33","#b30000")
cpalette.edu <- c("#bfd3e6","#9ebcda","#8c96c6","#8856a7","#810f7c")
cpalette.geo <- c("#35978f","#bf812d")
dat <- read.delim(file.dat, header=TRUE, stringsAsFactors=FALSE)
dat <- dat[-which(rowSums(dat[,5:10]) == 0),]
head(dat)
## Ethnicity EducationLevel GeoRegion IncomeGroup Electricity GobarGas Kerosene
## 1 Brahman Illiterate Hilly 0-10000 0 159 5
## 2 Brahman Illiterate Hilly 10000-20000 0 213 1
## 3 Brahman Illiterate Hilly 20000-30000 1 95 0
## 4 Brahman Illiterate Hilly 30000-50000 0 27 0
## 5 Brahman Illiterate Hilly 50000-ABOVE 0 9 0
## 6 Brahman Illiterate Himalayan 0-10000 1 24 1
## LPGas Others Wood
## 1 375 3 7932
## 2 666 1 3245
## 3 336 0 1038
## 4 109 0 355
## 5 69 1 156
## 6 118 4 5608
dml <- vegan::decostand(dat[,5:10], "log")
dml <- cbind(dat[,-c(5:10)], dml)
dml$Ethnicity <- factor(dml$Ethnicity, levels=type_eth)
head(dml)
## Ethnicity EducationLevel GeoRegion IncomeGroup Electricity GobarGas Kerosene
## 1 Brahman Illiterate Hilly 0-10000 0 8.312883 3.321928
## 2 Brahman Illiterate Hilly 10000-20000 0 8.734710 1.000000
## 3 Brahman Illiterate Hilly 20000-30000 1 7.569856 0.000000
## 4 Brahman Illiterate Hilly 30000-50000 0 5.754888 0.000000
## 5 Brahman Illiterate Hilly 50000-ABOVE 0 4.169925 0.000000
## 6 Brahman Illiterate Himalayan 0-10000 1 5.584963 1.000000
## LPGas Others Wood
## 1 9.550747 2.584963 13.953469
## 2 10.379378 1.000000 12.664003
## 3 9.392317 0.000000 11.019591
## 4 7.768184 0.000000 9.471675
## 5 7.108524 1.000000 8.285402
## 6 7.882643 3.000000 13.453271
list.kw <- list()
list.kw[[1]] <- kruskal.test(Wood ~ Ethnicity, data=dml)
list.kw[[2]] <- kruskal.test(Electricity ~ Ethnicity, data=dml)
list.kw[[3]] <- kruskal.test(GobarGas ~ Ethnicity, data=dml)
list.kw[[4]] <- kruskal.test(Kerosene ~ Ethnicity, data=dml)
list.kw[[5]] <- kruskal.test(LPGas ~ Ethnicity, data=dml)
list.kw[[6]] <- kruskal.test(Others ~ Ethnicity, data=dml)
# EXtract Data ---
list.df <- list()
for(i in 1:length(list.kw)){
d <- list.kw[[i]]
list.df[[i]] <- data.frame(Item=d$data.name, ChiSq.Statistic=d$statistic, df=d$parameter, pvalue=d$p.value)
}
# Aggregate Data ---
df <- do.call(rbind.data.frame, list.df)
rownames(df) <- c(1:nrow(df))
head(df)
## Item ChiSq.Statistic df pvalue
## 1 Wood by Ethnicity 149.88956 9 9.296684e-28
## 2 Electricity by Ethnicity 90.86661 9 1.090749e-15
## 3 GobarGas by Ethnicity 188.49928 9 8.481833e-36
## 4 Kerosene by Ethnicity 54.79593 9 1.332018e-08
## 5 LPGas by Ethnicity 276.66599 9 2.298195e-54
## 6 Others by Ethnicity 79.81956 9 1.755155e-13
# Write Output ---
file.output <- file.path(dir.output, "stats_abundance_krusalwallis_test.tsv")
write.table(df, file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)
dmt <- vegan::decostand(dat[,5:10], "total")
dmt <- cbind(dat[,-c(5:10)], dmt)
dmt$Ethnicity <- factor(dmt$Ethnicity, levels=type_eth)
head(dmt)
## Ethnicity EducationLevel GeoRegion IncomeGroup Electricity GobarGas
## 1 Brahman Illiterate Hilly 0-10000 0.0000000000 0.018763276
## 2 Brahman Illiterate Hilly 10000-20000 0.0000000000 0.051623849
## 3 Brahman Illiterate Hilly 20000-30000 0.0006802721 0.064625850
## 4 Brahman Illiterate Hilly 30000-50000 0.0000000000 0.054989817
## 5 Brahman Illiterate Hilly 50000-ABOVE 0.0000000000 0.038297872
## 6 Brahman Illiterate Himalayan 0-10000 0.0001737318 0.004169562
## Kerosene LPGas Others Wood
## 1 0.0005900401 0.04425301 0.0003540241 0.9360397
## 2 0.0002423655 0.16141541 0.0002423655 0.7864760
## 3 0.0000000000 0.22857143 0.0000000000 0.7061224
## 4 0.0000000000 0.22199593 0.0000000000 0.7230143
## 5 0.0000000000 0.29361702 0.0042553191 0.6638298
## 6 0.0001737318 0.02050035 0.0006949270 0.9742877
list.wrst <- list()
list.wrst[[1]] <- pairwise.wilcox.test(x=dmt$Wood, g=dmt$Ethnicity, p.adjust.method="none", paired=FALSE)
list.wrst[[2]] <- pairwise.wilcox.test(x=dmt$Electricity, g=dmt$Ethnicity, p.adjust.method="none", paired=FALSE)
list.wrst[[3]] <- pairwise.wilcox.test(x=dmt$GobarGas, g=dmt$Ethnicity, p.adjust.method="none", paired=FALSE)
list.wrst[[4]] <- pairwise.wilcox.test(x=dmt$Kerosene, g=dmt$Ethnicity, p.adjust.method="none", paired=FALSE)
list.wrst[[5]] <- pairwise.wilcox.test(x=dmt$LPGas, g=dmt$Ethnicity, p.adjust.metho ="none", paired=FALSE)
list.wrst[[6]] <- pairwise.wilcox.test(x=dmt$Others, g=dmt$Ethnicity, p.adjust.method="none", paired=FALSE)
# EXtract Data ---
list.df <- list()
for(i in 1:length(list.wrst)){
d <- melt(list.wrst[[i]]$p.value)
colnames(d) <- c("Ethnicity1", "Ethnicity2", "pvalue")
d <- d[-which(is.na(d$pvalue)),]
d$pvalue.adj <- p.adjust(p=d$pvalue, method="bonferroni", n=length(d$pvalue))
list.df[[i]] <- cbind(FuelType=type_fuel[i], d)
}
# Aggregate Data ---
df <- do.call(rbind.data.frame, list.df)
rownames(df) <- c(1:nrow(df))
head(df)
## FuelType Ethnicity1 Ethnicity2 pvalue pvalue.adj
## 1 Wood Brahman Newar 1.659308e-02 0.746688526
## 2 Wood Madhesi Newar 8.550240e-01 1.000000000
## 3 Wood Chettri Newar 1.396926e-03 0.062861658
## 4 Wood Muslim-Others Newar 4.540765e-03 0.204334445
## 5 Wood Gurung-Magar Newar 1.162618e-05 0.000523178
## 6 Wood Dalit Newar 8.183934e-06 0.000368277
# Write Output ---
file.output <- file.path(dir.output, "stats_abundance_Mann–Whitney_test.tsv")
write.table(df, file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)
# GET DISTANCE MATRIX ---
dist_dml <- vegan::vegdist(x=as.matrix(dml[,5:10]), method="euclidean", binary=FALSE, diag=TRUE, upper=TRUE, na.rm=FALSE)
# PERMANOVA TEST ---
set.seed(12345)
y_permanova <- vegan::adonis(dist_dml ~ IncomeGroup+Ethnicity+EducationLevel+GeoRegion,
data=dml, permutations=999, method="euclidean", parallel=4)
y_permanova
##
## Call:
## vegan::adonis(formula = dist_dml ~ IncomeGroup + Ethnicity + EducationLevel + GeoRegion, data = dml, permutations = 999, method = "euclidean", parallel = 4)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## IncomeGroup 4 3765.4 941.35 175.18 0.26303 0.001 ***
## Ethnicity 9 5600.6 622.28 115.81 0.39122 0.001 ***
## EducationLevel 4 1806.2 451.54 84.03 0.12617 0.001 ***
## GeoRegion 1 580.2 580.22 107.98 0.04053 0.001 ***
## Residuals 477 2563.2 5.37 0.17905
## Total 495 14315.5 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Write Output ---
file.output <- file.path(dir.output, "stats_abundance_permanova.tsv")
write.table(as.data.frame(y_permanova$aov.tab), file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)
# PERMANOVA TEST ---
set.seed(12345)
y_permanova <- vegan::adonis(dist_dml ~ IncomeGroup*Ethnicity+EducationLevel+GeoRegion,
data=dml, permutations=999, method="euclidean", parallel=4)
y_permanova
##
## Call:
## vegan::adonis(formula = dist_dml ~ IncomeGroup * Ethnicity + EducationLevel + GeoRegion, data = dml, permutations = 999, method = "euclidean", parallel = 4)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## IncomeGroup 4 3765.4 941.35 191.473 0.26303 0.001 ***
## Ethnicity 9 5600.6 622.28 126.574 0.39122 0.001 ***
## EducationLevel 4 1806.2 451.54 91.844 0.12617 0.001 ***
## GeoRegion 1 580.2 580.22 118.019 0.04053 0.001 ***
## IncomeGroup:Ethnicity 36 395.1 10.97 2.232 0.02760 0.001 ***
## Residuals 441 2168.1 4.92 0.15145
## Total 495 14315.5 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Write Output ---
file.output <- file.path(dir.output, "stats_abundance_permanova_interaction.tsv")
write.table(as.data.frame(y_permanova$aov.tab), file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)
permtst <- RVAideMemoire::pairwise.perm.manova(resp = dist_dml, fact = dml$Ethnicity,
test = "Pillai", nperm = 999, progress = TRUE, p.method = "none")
df <- reshape2::melt(permtst$p.value)
colnames(df) <- c("Ethnicity1", "Ethnicity2", "pvalue")
df <- df[-which(is.na(df$pvalue)), ]
df$pvalue.adj <- p.adjust(p = df$pvalue, method = "bonferroni", n = length(df$pvalue))
head(df)
## Ethnicity1 Ethnicity2 pvalue pvalue.adj
## 1 Brahman Newar 0.018 0.810
## 2 Madhesi Newar 0.001 0.045
## 3 Chettri Newar 0.083 1.000
## 4 Muslim-Others Newar 0.001 0.045
## 5 Gurung-Magar Newar 0.010 0.450
## 6 Dalit Newar 0.002 0.090
# Write Output ---
file.output <- file.path(dir.output, "stats_abundance_permanova_posthoc.tsv")
write.table(df, file.output, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)
# GET DISTANCE MATRIX ---
dist_dml <- vegan::vegdist(x=as.matrix(dml[,5:10]), method="bray", binary=FALSE, diag=TRUE, upper=TRUE, na.rm=FALSE)
# PERMANOVA TEST ---
set.seed(12345)
y_permanova <- vegan::adonis(dist_dml ~ IncomeGroup+Ethnicity+EducationLevel+GeoRegion,
data=dml, permutations=999, method="bray", parallel=4)
y_permanova
##
## Call:
## vegan::adonis(formula = dist_dml ~ IncomeGroup + Ethnicity + EducationLevel + GeoRegion, data = dml, permutations = 999, method = "bray", parallel = 4)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## IncomeGroup 4 7.342 1.83547 57.224 0.17942 0.001 ***
## Ethnicity 9 13.789 1.53209 47.766 0.33697 0.001 ***
## EducationLevel 4 3.610 0.90238 28.134 0.08821 0.001 ***
## GeoRegion 1 0.880 0.87998 27.435 0.02150 0.001 ***
## Residuals 477 15.300 0.03208 0.37390
## Total 495 40.920 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Write Output ---
file.output <- file.path(dir.output, "stats_abundance_permanova_braycurtis.tsv")
write.table(as.data.frame(y_permanova$aov.tab), file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)