Define Libraries

library("stringr")
library("dplyr")
library("vegan")

Define Path

dir.wrk <- getwd()
dir.data <- file.path(dir.wrk, "data/data_household")
dir.annot <- file.path(dir.wrk, "data/data_annotations")
dir.process <- file.path(dir.wrk, "data/data_processed")
dir.output <- file.path(dir.wrk, "output")

Define Files

file.dat <- file.path(dir.process, "household_level_data_frequency_table.tsv")

Define Categories

type_eth <- c("Newar","Brahman","Madhesi","Chettri","Muslim−Others",
             "Gurung−Magar","Dalit","Rai−Limbu","Tamang","Chepang−Thami")
type_fuel <- c("Wood","Electricity","BioGas","Keroscene","LPGas","Others")
type_inc <- c("0-10000","10000-20000","20000-30000","30000-50000","50000-ABOVE")
type_edu <- c("Illiterate","NonFormal-Other","Primary","Secondary","University")
type_geo <- c("Himalayan","Hilly")

cpalette.eth <- c("#e31a1c","#a6cee3","#1f78b4","#b2df8a","#33a02c",
                  "#fb9a99","#fdbf6f","#ff7f00","#cab2d6","#6a3d9a")
cpalette.inc <- c("#fdd49e","#fdbb84","#fc8d59","#e34a33","#b30000")
cpalette.edu <- c("#bfd3e6","#9ebcda","#8c96c6","#8856a7","#810f7c")
cpalette.geo <- c("#35978f","#bf812d")

Load Frequency Table Data

dat <- read.delim(file.dat, header=TRUE, stringsAsFactors=FALSE)
dat <- dat[-which(rowSums(dat[,5:10]) == 0),]

head(dat)
##   Ethnicity EducationLevel GeoRegion IncomeGroup Electricity GobarGas
## 1   Brahman     Illiterate     Hilly     0-10000           0      159
## 2   Brahman     Illiterate     Hilly 10000-20000           0      213
## 3   Brahman     Illiterate     Hilly 20000-30000           1       95
## 4   Brahman     Illiterate     Hilly 30000-50000           0       27
## 5   Brahman     Illiterate     Hilly 50000-ABOVE           0        9
## 6   Brahman     Illiterate Himalayan     0-10000           1       24
##   Kerosene LPGas Others Wood
## 1        5   375      3 7932
## 2        1   666      1 3245
## 3        0   336      0 1038
## 4        0   109      0  355
## 5        0    69      1  156
## 6        1   118      4 5608

Transform Data to Log Scale

dml <- vegan::decostand(dat[,5:10], "log")
dml <-  cbind(dat[,-c(5:10)], dml)
dml$Ethnicity <- factor(dml$Ethnicity, levels=type_eth)

head(dml)
##   Ethnicity EducationLevel GeoRegion IncomeGroup Electricity GobarGas
## 1   Brahman     Illiterate     Hilly     0-10000           0 8.312883
## 2   Brahman     Illiterate     Hilly 10000-20000           0 8.734710
## 3   Brahman     Illiterate     Hilly 20000-30000           1 7.569856
## 4   Brahman     Illiterate     Hilly 30000-50000           0 5.754888
## 5   Brahman     Illiterate     Hilly 50000-ABOVE           0 4.169925
## 6   Brahman     Illiterate Himalayan     0-10000           1 5.584963
##   Kerosene     LPGas   Others      Wood
## 1 3.321928  9.550747 2.584963 13.953469
## 2 1.000000 10.379378 1.000000 12.664003
## 3 0.000000  9.392317 0.000000 11.019591
## 4 0.000000  7.768184 0.000000  9.471675
## 5 0.000000  7.108524 1.000000  8.285402
## 6 1.000000  7.882643 3.000000 13.453271

Function: get_permanova()

get_permanova <- function(dm, control_geo, type_eth, type_inc, type_edu){
     # Retain Data for Control GeoRegion ---
     dm <- subset(dm, dm$GeoRegion == control_geo)  
  
     # Factorize ---
     dm$Ethnicity <- factor(dm$Ethnicity, levels=type_eth)
     dm$IncomeGroup <- factor(dm$IncomeGroup, levels=type_inc)
     dm$EducationLevel <- factor(dm$EducationLevel, levels=type_edu)
  
     # Compute Distance Matrix ---
     dist_dm <- vegan::vegdist(x=as.matrix(dm[,5:10]), method="euclidean", binary=FALSE, diag=TRUE, upper=TRUE, na.rm = FALSE)

     # Adonis test ---
     set.seed(12345)
     y_permanova <- vegan::adonis(dist_dm ~ Ethnicity+IncomeGroup+EducationLevel, 
                                  data=dm, permutations=999, method="euclidean", parallel=4)

     return(y_permanova)
}

Get Permanova by Control GeoRegion

list.permanova <- list()
for(i in 1:length(type_geo)){
  control_geo <- type_geo[i] 
  d <- get_permanova(dm=dml, control_geo, type_eth, type_inc, type_edu)
  list.permanova[[i]] <- cbind(Control_GeoRegion=control_geo,  Factor=rownames(as.data.frame(d$aov.tab)),  as.data.frame(d$aov.tab))
}

# Aggregate Results ---
df <- do.call(rbind.data.frame, list.permanova)
rownames(df) <- c(1:nrow(df))

head(df)
##   Control_GeoRegion         Factor  Df SumsOfSqs   MeanSqs  F.Model
## 1         Himalayan      Ethnicity   9 2502.5584 278.06204 64.04317
## 2         Himalayan    IncomeGroup   4 1660.4860 415.12151 95.61067
## 3         Himalayan EducationLevel   4  846.3357 211.58393 48.73195
## 4         Himalayan      Residuals 230  998.6118   4.34179       NA
## 5         Himalayan          Total 247 6007.9920        NA       NA
## 6             Hilly      Ethnicity   9 3260.7168 362.30187 71.09840
##          R2 Pr(>F)
## 1 0.4165382  0.001
## 2 0.2763795  0.001
## 3 0.1408683  0.001
## 4 0.1662139     NA
## 5 1.0000000     NA
## 6 0.4219129  0.001
# Write Table ---
file.output <- file.path(dir.output, "stats_abundance_permanova_factorcontrol_georegion.tsv")
write.table(df, file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)