library("argparse")
parser <- ArgumentParser(description='plot Orthogroups.GeneCount venn and stat ')
parser$add_argument( "-i", "--input", type="character", required=T,
help="nwk format [required]",
metavar="filepath")
parser$add_argument( "-x", "--xlab", type="character", required=F, default="",
help="input xlab [default %(default)s]",
metavar="xlab")
parser$add_argument( "-y", "--ylab", type="character", required=F, default="Gene number",
help="input ylab [default %(default)s]",
metavar="ylab")
parser$add_argument( "-P", "--palette", type="character", required=F, default="lancet",
help="fill palette in ggsci : eg npg lancet... for more info:https://nanx.me/ggsci/articles/ggsci.html [default %(default)s]",
metavar="palette")
parser$add_argument( "-s", "--species.order", type="character", required=F, default=NULL, nargs="+",
help="species name order for bar plot [default %(default)s]",
metavar="species.order")
parser$add_argument( "-o", "--outdir", type="character", default=getwd(),
help="output file directory [default %(default)s]",
metavar="path")
parser$add_argument("-n", "--name", type="character", default="GF",
help="out file name prefix [default %(default)s]",
metavar="name")
parser$add_argument( "-H", "--height", type="double", default=5,
help="the height of pic inches [default %(default)s]",
metavar="number")
parser$add_argument("-W", "--width", type="double", default=5,
help="the width of pic inches [default %(default)s]",
metavar="number")
opt <- parser$parse_args()
if (!file.exists(opt$outdir)) {
if (!dir.create(opt$outdir, showWarnings = FALSE, recursive = TRUE)) {
stop(paste("dir.create failed: outdir=", opt$outdir, sep=""))
}
}
require(ggplot2)
require(reshape2)
require(RColorBrewer)
library(tidyr)
library(dplyr)
library(ggsci)
library(getopt)
script <- get_Rscript_filename()
source(paste(dirname(script), "venn_util.r", sep="/"))
df <- read.table(opt$input, sep="\t", row.names=1, header=TRUE, check.names=FALSE, comment.char="")
df <- df[, c(-ncol(df))]
single_copy <- rowSums(df == 1) == ncol(df)
species_number <- rowSums(df > 0)
species_specific <- rowSums(df == 0) == (ncol(df)-1) & rowSums(df) > 1
all_species <- rowSums(df > 0) == ncol(df) & rowSums(df) > ncol(df)
unassigined_genes <- rowSums(df == 0) == (ncol(df)-1) & rowSums(df) == 1
stat_df <- df
stat_df$species_number <- species_number
stat_df$type <- ifelse(single_copy, "Single-copy orthologs",
ifelse(species_specific, "Unique paralogs",
ifelse(unassigined_genes, "Unclusters genes",
ifelse(all_species, "Multi-copy orthologs", "Other orthologs"))))
write.table(data.frame(Orthogroup = rownames(stat_df), stat_df),
file = paste(opt$outdir, "/", opt$name, ".type_table.tsv", sep=""),
quote = FALSE, sep = '\t', row.names = FALSE, col.names = TRUE)
table(stat_df$type)
stat_bar_df <- stat_df %>%
select(-species_number) %>%
pivot_longer(!type, names_to = "species", values_to = "count") %>%
group_by(species, type) %>%
summarize(gene_number = sum(count), .groups = "drop")
stat_bar_df$type <- factor(stat_bar_df$type, ordered = TRUE,
levels = rev(c("Single-copy orthologs", "Multi-copy orthologs",
"Unique paralogs", "Other orthologs", "Unclusters genes")))
if (!is.null(opt$species.order)) {
stat_bar_df$species <- factor(stat_bar_df$species, ordered = TRUE,
levels = opt$species.order)
} else {
stat_bar_df$species <- factor(stat_bar_df$species, levels = colnames(df))
}
p <- ggplot(stat_bar_df, aes(species, as.double(gene_number))) +
geom_bar(aes(fill = type), stat = "identity") +
get(paste0("scale_fill_", opt$palette))() +
labs(y = "Gene number", fill = "", x = "") +
scale_y_continuous(expand = expansion(mult = c(0, .1))) +
theme_classic() +
theme(axis.text.x = element_text(colour = "black", angle = 45, hjust = 1),
axis.text.y = element_text(colour = "black"))
pdf(file = paste(opt$outdir, "/", opt$name, ".gene_number_bar.pdf", sep=""),
height = opt$height, width = opt$width * 1.5)
print(p)
dev.off()
png(filename = paste(opt$outdir, "/", opt$name, ".gene_number_bar.png", sep=""),
height = opt$height * 300, width = opt$width * 300 * 1.5, res = 300, units = "px")
print(p)
dev.off()
stat_bar_df_res <- stat_bar_df %>% pivot_wider(names_from = type, values_from = gene_number)
write.table(stat_bar_df_res,
file = paste(opt$outdir, "/", opt$name, ".type_gene_number_stat.tsv", sep=""),
quote = FALSE, sep = '\t', row.names = FALSE, col.names = TRUE)
df[df == 0] <- NA
otu.id <- rownames(df)
map.id <- function(x) {
x[!is.na(x)] <- otu.id[!is.na(x)]
x
}
mydf <- sapply(df, map.id)
write.table(mydf, file = paste(opt$outdir, "/", opt$name, ".data_for_venn.tsv", sep=""),
quote = FALSE, sep = '\t', row.names = FALSE, col.names = TRUE)
mydf <- sapply(as.data.frame(mydf), na.omit)
samn <- length(mydf)
coreID <- Reduce(intersect, mydf)
flower <- outersect(mydf, length(coreID), unlist(mydf))
write.table(flower, file = paste(opt$outdir, "/", opt$name, ".flower.tsv", sep=""),
quote = FALSE, sep = '\t', row.names = FALSE, col.names = TRUE)
if (samn >= 2 && samn <= 5) {
myvenn <- get(paste0("venn", samn))
myvenn(mydf, paste(opt$outdir, "/", opt$name, sep=""), h = opt$height, w = opt$width)
} else {
cat("more than 5 species, venn not plot\n")
}