Define Libraries
library("stringr")
library("dplyr")
library("vegan")
Define Path
dir.wrk <- getwd()
dir.data <- file.path(dir.wrk, "data/data_household")
dir.annot <- file.path(dir.wrk, "data/data_annotations")
dir.process <- file.path(dir.wrk, "data/data_processed")
dir.output <- file.path(dir.wrk, "output")
Define Files
file.dat <- file.path(dir.process, "household_level_data_frequency_table.tsv")
Define Categories
type_eth <- c("Newar","Brahman","Madhesi","Chettri","Muslim−Others",
"Gurung−Magar","Dalit","Rai−Limbu","Tamang","Chepang−Thami")
type_fuel <- c("Wood","Electricity","BioGas","Keroscene","LPGas","Others")
type_inc <- c("0-10000","10000-20000","20000-30000","30000-50000","50000-ABOVE")
type_edu <- c("Illiterate","NonFormal-Other","Primary","Secondary","University")
type_geo <- c("Himalayan","Hilly")
cpalette.eth <- c("#e31a1c","#a6cee3","#1f78b4","#b2df8a","#33a02c",
"#fb9a99","#fdbf6f","#ff7f00","#cab2d6","#6a3d9a")
cpalette.inc <- c("#fdd49e","#fdbb84","#fc8d59","#e34a33","#b30000")
cpalette.edu <- c("#bfd3e6","#9ebcda","#8c96c6","#8856a7","#810f7c")
cpalette.geo <- c("#35978f","#bf812d")
Load Frequency Table Data
dat <- read.delim(file.dat, header=TRUE, stringsAsFactors=FALSE)
dat <- dat[-which(rowSums(dat[,5:10]) == 0),]
head(dat)
## Ethnicity EducationLevel GeoRegion IncomeGroup Electricity GobarGas
## 1 Brahman Illiterate Hilly 0-10000 0 159
## 2 Brahman Illiterate Hilly 10000-20000 0 213
## 3 Brahman Illiterate Hilly 20000-30000 1 95
## 4 Brahman Illiterate Hilly 30000-50000 0 27
## 5 Brahman Illiterate Hilly 50000-ABOVE 0 9
## 6 Brahman Illiterate Himalayan 0-10000 1 24
## Kerosene LPGas Others Wood
## 1 5 375 3 7932
## 2 1 666 1 3245
## 3 0 336 0 1038
## 4 0 109 0 355
## 5 0 69 1 156
## 6 1 118 4 5608
Function: get_permanova()
get_permanova <- function(dm, control_income, type_eth, type_edu, type_geo){
# Retain Data for Control Income Group ---
dm <- subset(dm, dm$IncomeGroup == control_income)
# Factorize ---
dm$Ethnicity <- factor(dm$Ethnicity, levels=type_eth)
dm$EducationLevel <- factor(dm$EducationLevel, levels=type_edu)
dm$GeoRegion <- factor(dm$GeoRegion, levels=type_geo)
# Compute Distance Matrix ---
dist_dm <- vegan::vegdist(x=as.matrix(dm[,5:10]), method="euclidean", binary=FALSE, diag=TRUE, upper=TRUE, na.rm = FALSE)
# Adonis test ---
set.seed(12345)
y_permanova <- vegan::adonis(dist_dm ~ Ethnicity+EducationLevel+GeoRegion,
data=dm, permutations=999, method="euclidean", parallel=4)
return(y_permanova)
}
Get Permanova by Control Income Group
list.permanova <- list()
for(i in 1:length(type_inc)){
control_income <- type_inc[i]
d <- get_permanova(dm=dml, control_income, type_eth, type_edu, type_geo)
list.permanova[[i]] <- cbind(Control_IncomeGroup=control_income, Factor=rownames(as.data.frame(d$aov.tab)), as.data.frame(d$aov.tab))
}
# Aggregate Results ---
df <- do.call(rbind.data.frame, list.permanova)
rownames(df) <- c(1:nrow(df))
head(df)
## Control_IncomeGroup Factor Df SumsOfSqs MeanSqs F.Model
## 1 0-10000 Ethnicity 9 1247.3146 138.590510 31.79899
## 2 0-10000 EducationLevel 4 1154.4441 288.611029 66.22054
## 3 0-10000 GeoRegion 1 177.4671 177.467087 40.71905
## 4 0-10000 Residuals 85 370.4581 4.358331 NA
## 5 0-10000 Total 99 2949.6839 NA NA
## 6 10000-20000 Ethnicity 9 1353.1291 150.347683 29.66447
## R2 Pr(>F)
## 1 0.42286381 0.001
## 2 0.39137892 0.001
## 3 0.06016478 0.001
## 4 0.12559248 NA
## 5 1.00000000 NA
## 6 0.54481852 0.001
# Write Table ---
file.output <- file.path(dir.output, "stats_abundance_permanova_factorcontrol_income.tsv")
write.table(df, file.output, sep="\t", row.names=FALSE, col.names=TRUE, quote=FALSE)