Define Libraries

library("stringr")
library("dplyr")
library("reshape2")
library("ggplot2")
library("RColorBrewer")
library("vegan")
library("RVAideMemoire")

Define Path

dir.wrk <- getwd()
dir.data <- file.path(dir.wrk, "data/data_household")
dir.annot <- file.path(dir.wrk, "data/data_annotations")
dir.process <- file.path(dir.wrk, "data/data_processed")
dir.output <- file.path(dir.wrk, "output")

Define Files

file.dat <- file.path(dir.process, "household_level_data_frequency_table.tsv")

Define Categories

type_eth <- c("Newar","Brahman","Madhesi","Chettri","Muslim−Others",
             "Gurung−Magar","Dalit","Rai−Limbu","Tamang","Chepang−Thami")
type_fuel <- c("Wood","Electricity","BioGas","Keroscene","LPGas","Others")
type_inc <- c("0-10000","10000-20000","20000-30000","30000-50000","50000-ABOVE")
type_edu <- c("Illiterate","NonFormal-Other","Primary","Secondary","University")
type_geo <- c("Himalayan","Hilly")

cpalette.eth <- c("#e31a1c","#a6cee3","#1f78b4","#b2df8a","#33a02c",
                  "#fb9a99","#fdbf6f","#ff7f00","#cab2d6","#6a3d9a")
cpalette.inc <- c("#fdd49e","#fdbb84","#fc8d59","#e34a33","#b30000")
cpalette.edu <- c("#bfd3e6","#9ebcda","#8c96c6","#8856a7","#810f7c")
cpalette.geo <- c("#35978f","#bf812d")

Load Frequency Table Data

dat <- read.delim(file.dat, header=TRUE, stringsAsFactors=FALSE)
dat <- dat[-which(rowSums(dat[,5:10]) == 0),]

head(dat)
##   Ethnicity EducationLevel GeoRegion IncomeGroup Electricity GobarGas Kerosene
## 1   Brahman     Illiterate     Hilly     0-10000           0      159        5
## 2   Brahman     Illiterate     Hilly 10000-20000           0      213        1
## 3   Brahman     Illiterate     Hilly 20000-30000           1       95        0
## 4   Brahman     Illiterate     Hilly 30000-50000           0       27        0
## 5   Brahman     Illiterate     Hilly 50000-ABOVE           0        9        0
## 6   Brahman     Illiterate Himalayan     0-10000           1       24        1
##   LPGas Others Wood
## 1   375      3 7932
## 2   666      1 3245
## 3   336      0 1038
## 4   109      0  355
## 5    69      1  156
## 6   118      4 5608

Transform Data to Log Scale

dml <- vegan::decostand(dat[,5:10], "log")
dml <-  cbind(dat[,-c(5:10)], dml)
dml$Ethnicity <- factor(dml$Ethnicity, levels=type_eth)

head(dml)
##   Ethnicity EducationLevel GeoRegion IncomeGroup Electricity GobarGas Kerosene
## 1   Brahman     Illiterate     Hilly     0-10000           0 8.312883 3.321928
## 2   Brahman     Illiterate     Hilly 10000-20000           0 8.734710 1.000000
## 3   Brahman     Illiterate     Hilly 20000-30000           1 7.569856 0.000000
## 4   Brahman     Illiterate     Hilly 30000-50000           0 5.754888 0.000000
## 5   Brahman     Illiterate     Hilly 50000-ABOVE           0 4.169925 0.000000
## 6   Brahman     Illiterate Himalayan     0-10000           1 5.584963 1.000000
##       LPGas   Others      Wood
## 1  9.550747 2.584963 13.953469
## 2 10.379378 1.000000 12.664003
## 3  9.392317 0.000000 11.019591
## 4  7.768184 0.000000  9.471675
## 5  7.108524 1.000000  8.285402
## 6  7.882643 3.000000 13.453271

Kruskal-Wallis Test (One-way ANOVA) on Abundance Data

list.kw <- list()
list.kw[[1]] <- kruskal.test(Wood ~ Ethnicity, data=dml)
list.kw[[2]] <- kruskal.test(Electricity ~ Ethnicity, data=dml)
list.kw[[3]] <- kruskal.test(GobarGas ~ Ethnicity, data=dml)
list.kw[[4]] <- kruskal.test(Kerosene ~ Ethnicity, data=dml)
list.kw[[5]] <- kruskal.test(LPGas ~ Ethnicity, data=dml)
list.kw[[6]] <- kruskal.test(Others ~ Ethnicity, data=dml)

# EXtract Data ---
list.df <- list()
for(i in 1:length(list.kw)){
  d <- list.kw[[i]]
  list.df[[i]] <- data.frame(Item=d$data.name, ChiSq.Statistic=d$statistic, df=d$parameter, pvalue=d$p.value)  
}

# Aggregate Data ---
df <- do.call(rbind.data.frame, list.df)
rownames(df) <- c(1:nrow(df))

head(df)
##                       Item ChiSq.Statistic df       pvalue
## 1        Wood by Ethnicity       149.88956  9 9.296684e-28
## 2 Electricity by Ethnicity        90.86661  9 1.090749e-15
## 3    GobarGas by Ethnicity       188.49928  9 8.481833e-36
## 4    Kerosene by Ethnicity        54.79593  9 1.332018e-08
## 5       LPGas by Ethnicity       276.66599  9 2.298195e-54
## 6      Others by Ethnicity        79.81956  9 1.755155e-13
# Write Output ---
file.output <- file.path(dir.output, "stats_abundance_krusalwallis_test.tsv")
write.table(df, file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)

Transform Data to Relative Scale (Total Transformation)

dmt <- vegan::decostand(dat[,5:10], "total")
dmt <-  cbind(dat[,-c(5:10)], dmt)
dmt$Ethnicity <- factor(dmt$Ethnicity, levels=type_eth)

head(dmt)
##   Ethnicity EducationLevel GeoRegion IncomeGroup  Electricity    GobarGas
## 1   Brahman     Illiterate     Hilly     0-10000 0.0000000000 0.018763276
## 2   Brahman     Illiterate     Hilly 10000-20000 0.0000000000 0.051623849
## 3   Brahman     Illiterate     Hilly 20000-30000 0.0006802721 0.064625850
## 4   Brahman     Illiterate     Hilly 30000-50000 0.0000000000 0.054989817
## 5   Brahman     Illiterate     Hilly 50000-ABOVE 0.0000000000 0.038297872
## 6   Brahman     Illiterate Himalayan     0-10000 0.0001737318 0.004169562
##       Kerosene      LPGas       Others      Wood
## 1 0.0005900401 0.04425301 0.0003540241 0.9360397
## 2 0.0002423655 0.16141541 0.0002423655 0.7864760
## 3 0.0000000000 0.22857143 0.0000000000 0.7061224
## 4 0.0000000000 0.22199593 0.0000000000 0.7230143
## 5 0.0000000000 0.29361702 0.0042553191 0.6638298
## 6 0.0001737318 0.02050035 0.0006949270 0.9742877

Perform Pairwise Wilcoxon Rank Sum Test (Mann-Whitney U Test)

list.wrst <- list()
list.wrst[[1]] <- pairwise.wilcox.test(x=dmt$Wood, g=dmt$Ethnicity, p.adjust.method="none", paired=FALSE)
list.wrst[[2]] <- pairwise.wilcox.test(x=dmt$Electricity, g=dmt$Ethnicity, p.adjust.method="none", paired=FALSE)
list.wrst[[3]] <- pairwise.wilcox.test(x=dmt$GobarGas, g=dmt$Ethnicity, p.adjust.method="none", paired=FALSE)
list.wrst[[4]] <- pairwise.wilcox.test(x=dmt$Kerosene, g=dmt$Ethnicity, p.adjust.method="none", paired=FALSE)
list.wrst[[5]] <- pairwise.wilcox.test(x=dmt$LPGas, g=dmt$Ethnicity, p.adjust.metho ="none", paired=FALSE)
list.wrst[[6]] <- pairwise.wilcox.test(x=dmt$Others, g=dmt$Ethnicity, p.adjust.method="none", paired=FALSE)

# EXtract Data ---
list.df <- list()
for(i in 1:length(list.wrst)){
  d <- melt(list.wrst[[i]]$p.value)
  colnames(d) <- c("Ethnicity1", "Ethnicity2", "pvalue")
  d <- d[-which(is.na(d$pvalue)),]
  d$pvalue.adj <- p.adjust(p=d$pvalue, method="bonferroni", n=length(d$pvalue))
  list.df[[i]] <- cbind(FuelType=type_fuel[i], d)
}

# Aggregate Data ---
df <- do.call(rbind.data.frame, list.df)
rownames(df) <- c(1:nrow(df))

head(df)
##   FuelType    Ethnicity1 Ethnicity2       pvalue  pvalue.adj
## 1     Wood       Brahman      Newar 1.659308e-02 0.746688526
## 2     Wood       Madhesi      Newar 8.550240e-01 1.000000000
## 3     Wood       Chettri      Newar 1.396926e-03 0.062861658
## 4     Wood Muslim-Others      Newar 4.540765e-03 0.204334445
## 5     Wood  Gurung-Magar      Newar 1.162618e-05 0.000523178
## 6     Wood         Dalit      Newar 8.183934e-06 0.000368277
# Write Output ---
file.output <- file.path(dir.output, "stats_abundance_Mann–Whitney_test.tsv")
write.table(df, file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)

Perform Permanova Test: Using Euclidean Distance

# GET DISTANCE MATRIX ---
dist_dml <- vegan::vegdist(x=as.matrix(dml[,5:10]), method="euclidean", binary=FALSE, diag=TRUE, upper=TRUE, na.rm=FALSE)

# PERMANOVA TEST ---
set.seed(12345)
y_permanova <- vegan::adonis(dist_dml ~ IncomeGroup+Ethnicity+EducationLevel+GeoRegion, 
                          data=dml, permutations=999, method="euclidean", parallel=4)
y_permanova
## 
## Call:
## vegan::adonis(formula = dist_dml ~ IncomeGroup + Ethnicity +      EducationLevel + GeoRegion, data = dml, permutations = 999,      method = "euclidean", parallel = 4) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## IncomeGroup      4    3765.4  941.35  175.18 0.26303  0.001 ***
## Ethnicity        9    5600.6  622.28  115.81 0.39122  0.001 ***
## EducationLevel   4    1806.2  451.54   84.03 0.12617  0.001 ***
## GeoRegion        1     580.2  580.22  107.98 0.04053  0.001 ***
## Residuals      477    2563.2    5.37         0.17905           
## Total          495   14315.5                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Write Output ---
file.output <- file.path(dir.output, "stats_abundance_permanova.tsv")
write.table(as.data.frame(y_permanova$aov.tab), file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)

Model with Interaction between Income and Ethnicty

# PERMANOVA TEST ---
set.seed(12345)
y_permanova <- vegan::adonis(dist_dml ~ IncomeGroup*Ethnicity+EducationLevel+GeoRegion, 
                          data=dml, permutations=999, method="euclidean", parallel=4)
y_permanova
## 
## Call:
## vegan::adonis(formula = dist_dml ~ IncomeGroup * Ethnicity +      EducationLevel + GeoRegion, data = dml, permutations = 999,      method = "euclidean", parallel = 4) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                        Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## IncomeGroup             4    3765.4  941.35 191.473 0.26303  0.001 ***
## Ethnicity               9    5600.6  622.28 126.574 0.39122  0.001 ***
## EducationLevel          4    1806.2  451.54  91.844 0.12617  0.001 ***
## GeoRegion               1     580.2  580.22 118.019 0.04053  0.001 ***
## IncomeGroup:Ethnicity  36     395.1   10.97   2.232 0.02760  0.001 ***
## Residuals             441    2168.1    4.92         0.15145           
## Total                 495   14315.5                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Write Output ---
file.output <- file.path(dir.output, "stats_abundance_permanova_interaction.tsv")
write.table(as.data.frame(y_permanova$aov.tab), file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)

Post-hoc Test for Permanova

permtst <- RVAideMemoire::pairwise.perm.manova(resp = dist_dml, fact = dml$Ethnicity, 
    test = "Pillai", nperm = 999, progress = TRUE, p.method = "none")
df <- reshape2::melt(permtst$p.value)
colnames(df) <- c("Ethnicity1", "Ethnicity2", "pvalue")
df <- df[-which(is.na(df$pvalue)), ]
df$pvalue.adj <- p.adjust(p = df$pvalue, method = "bonferroni", n = length(df$pvalue))

head(df)
##      Ethnicity1 Ethnicity2 pvalue pvalue.adj
## 1       Brahman      Newar  0.018      0.810
## 2       Madhesi      Newar  0.001      0.045
## 3       Chettri      Newar  0.083      1.000
## 4 Muslim-Others      Newar  0.001      0.045
## 5  Gurung-Magar      Newar  0.010      0.450
## 6         Dalit      Newar  0.002      0.090
# Write Output ---
file.output <- file.path(dir.output, "stats_abundance_permanova_posthoc.tsv")
write.table(df, file.output, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE)


Perform Permanova Test: Using Bray-Curtis Distance

# GET DISTANCE MATRIX ---
dist_dml <- vegan::vegdist(x=as.matrix(dml[,5:10]), method="bray", binary=FALSE, diag=TRUE, upper=TRUE, na.rm=FALSE)

# PERMANOVA TEST ---
set.seed(12345)
y_permanova <- vegan::adonis(dist_dml ~ IncomeGroup+Ethnicity+EducationLevel+GeoRegion, 
                          data=dml, permutations=999, method="bray", parallel=4)
y_permanova
## 
## Call:
## vegan::adonis(formula = dist_dml ~ IncomeGroup + Ethnicity +      EducationLevel + GeoRegion, data = dml, permutations = 999,      method = "bray", parallel = 4) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##                 Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## IncomeGroup      4     7.342 1.83547  57.224 0.17942  0.001 ***
## Ethnicity        9    13.789 1.53209  47.766 0.33697  0.001 ***
## EducationLevel   4     3.610 0.90238  28.134 0.08821  0.001 ***
## GeoRegion        1     0.880 0.87998  27.435 0.02150  0.001 ***
## Residuals      477    15.300 0.03208         0.37390           
## Total          495    40.920                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Write Output ---
file.output <- file.path(dir.output, "stats_abundance_permanova_braycurtis.tsv")
write.table(as.data.frame(y_permanova$aov.tab), file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)