# HG changeset patch # User davidvanzessen # Date 1491912137 14400 # Node ID 77a7ac76c7b95f4522c7fb2717abc18ea91788e2 # Parent 1cf60ae234b4e39fed603282f5f53d398a7f42e5 Uploaded diff -r 1cf60ae234b4 -r 77a7ac76c7b9 plot_pdf.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plot_pdf.r Tue Apr 11 08:02:17 2017 -0400 @@ -0,0 +1,17 @@ +library(ggplot2) + +args <- commandArgs(trailingOnly = TRUE) +print(args) + +input = args[1] +outputdir = args[2] +setwd(outputdir) + +load(input) + +print(names(pdfplots)) + +for(n in names(pdfplots)){ + print(paste("n:", n)) + ggsave(pdfplots[[n]], file=n, device="pdf") +} diff -r 1cf60ae234b4 -r 77a7ac76c7b9 shm_csr.py --- a/shm_csr.py Tue Mar 28 08:25:36 2017 -0400 +++ b/shm_csr.py Tue Apr 11 08:02:17 2017 -0400 @@ -80,7 +80,7 @@ IDlist += [ID] -print mutationList, linecount +#print mutationList, linecount AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent if AALength < 60: diff -r 1cf60ae234b4 -r 77a7ac76c7b9 shm_csr.r --- a/shm_csr.r Tue Mar 28 08:25:36 2017 -0400 +++ b/shm_csr.r Tue Apr 11 08:02:17 2017 -0400 @@ -124,6 +124,8 @@ regions = c("CDR2", "FR3") } +pdfplots = list() #save() this later to create the pdf plots in another script (maybe avoids the "address (nil), cause memory not mapped") + sum_by_row = function(x, columns) { sum(as.numeric(x[columns]), na.rm=T) } print("aggregating data into new columns") @@ -308,7 +310,7 @@ print(p) dev.off() - ggsave(paste("transitions_stacked_", name, ".pdf", sep="")) + pdfplots[[paste("transitions_stacked_", name, ".pdf", sep="")]] <<- p png(filename=paste("transitions_heatmap_", name, ".png", sep="")) p = ggplot(transition2, aes(factor(reorder(variable, -order.y)), factor(reorder(id, -order.x)))) + geom_tile(aes(fill = value)) + scale_fill_gradient(low="white", high="steelblue") #heatmap @@ -316,7 +318,7 @@ print(p) dev.off() - ggsave(paste("transitions_heatmap_", name, ".pdf", sep="")) + pdfplots[[paste("transitions_heatmap_", name, ".pdf", sep="")]] <<- p } else { #print("No data to plot") } @@ -400,7 +402,7 @@ print(pc) dev.off() - ggsave("IGA.pdf", pc) + pdfplots[["IGA.pdf"]] <- pc } print("Plotting IGG piechart") @@ -423,7 +425,7 @@ print(pc) dev.off() - ggsave("IGG.pdf", pc) + pdfplots[["IGG.pdf"]] <- pc } print("Plotting scatterplot") @@ -445,7 +447,7 @@ print(p) dev.off() -ggsave("scatter.pdf", p) +pdfplots[["scatter.pdf"]] <- p write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T) @@ -471,7 +473,9 @@ print(p) dev.off() -ggsave("frequency_ranges.pdf", p) +pdfplots[["frequency_ranges.pdf"]] <- p + +save(pdfplots, file="pdfplots.RData") frequency_bins_data_by_class = frequency_bins_data diff -r 1cf60ae234b4 -r 77a7ac76c7b9 wrapper.sh --- a/wrapper.sh Tue Mar 28 08:25:36 2017 -0400 +++ b/wrapper.sh Tue Apr 11 08:02:17 2017 -0400 @@ -176,6 +176,13 @@ echo "R mutation analysis" Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} 2>&1 +echo "---------------- plot_pdfs.r ----------------" +echo "---------------- plot_pdfs.r ----------------
" >> $log + +echo "Rscript $dir/shm_csr.r $outdir/pdfplots.RData $outdir 2>&1" + +Rscript $dir/plot_pdf.r "$outdir/pdfplots.RData" "$outdir" 2>&1 + echo "---------------- shm_csr.py ----------------" echo "---------------- shm_csr.py ----------------
" >> $log @@ -249,7 +256,7 @@ echo "---------------- pattern_plots.r ----------------" echo "---------------- pattern_plots.r ----------------
" >> $log - Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/aid_motives $outdir/relative_mutations $outdir/abolute_mutations $outdir/shm_overview.txt 2>&1 + Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/aid_motives $outdir/relative_mutations $outdir/absolute_mutations $outdir/shm_overview.txt 2>&1 echo "" >> $output echo "" >> $output @@ -300,7 +307,7 @@ echo "
" >> $output echo "
" >> $output -echo "
" >> $output +echo "
" >> $output echo "
" >> $output cat $dir/shm_overview.htm >> $output echo "" >> $output #SHM overview tab end @@ -657,7 +664,7 @@ echo "" >> $output echo "" >> $output echo "" >> $output -echo "" >> $output +echo "" >> $output echo "" >> $output echo "" >> $output
info
Base count for every sequenceView
The data used to generate the percentage of mutations in AID and pol eta motives plotDownload
The data used to generate the relative mutation patterns plotDownload
The data used to generate the absolute mutation patterns plotDownload
The data used to generate the absolute mutation patterns plotDownload
SHM Frequency
The data generate the frequency scatter plotDownload