changeset 2:e85fec274cde draft

Uploaded
author davidvanzessen
date Thu, 27 Oct 2016 07:26:45 -0400
parents faae21ba5c63
children 275ab5175fd6
files merge_and_filter.r shm_csr.r wrapper.sh
diffstat 3 files changed, 47 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/merge_and_filter.r	Tue Oct 25 07:28:43 2016 -0400
+++ b/merge_and_filter.r	Thu Oct 27 07:26:45 2016 -0400
@@ -114,7 +114,7 @@
 }
 
 print(paste("Number of sequences in result after CDR/FR filtering:", nrow(result)))
-print(paste("Number of matched sequences in result after CDR/FR filtering:", nrow(result[!grepl("unmatched", result$best_match),])))
+print(paste("Number of sequences in result after CDR/FR filtering:", nrow(result[!grepl("unmatched", result$best_match),])))
 
 if(empty.region.filter == "leader"){
 	result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
@@ -123,7 +123,7 @@
 } else if(empty.region.filter == "CDR1"){
 	result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
 } else if(empty.region.filter == "FR2"){
-	result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
+	result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
 }
 
 print(paste("Number of sequences in result after n filtering:", nrow(result)))
@@ -183,15 +183,15 @@
 chunk_hit_threshold = as.numeric(splt[1])
 nt_hit_threshold = as.numeric(splt[2])
 
-higher_than=(summ$chunk_hit_percentage >= chunk_hit_threshold & summ$nt_hit_percentage >= nt_hit_threshold)
+higher_than=(result$chunk_hit_percentage >= chunk_hit_threshold & result$nt_hit_percentage >= nt_hit_threshold)
 
-unmatched=summ[NULL,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
+unmatched=result[NULL,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
 
-if(!all(higher_than, na.rm=T)){ #check for 'not all' because that would mean the unmatched set is empty
-	unmatched = summ[!higher_than,]
+if(!all(higher_than, na.rm=T)){ #check for no unmatched
+	unmatched = result[!higher_than,]
 	unmatched = unmatched[,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
 	unmatched$best_match = paste("unmatched,", unmatched$best_match)
-	summ[!higher_than,"best_match"] = paste("unmatched,", summ[!higher_than,"best_match"])
+	result[!higher_than,"best_match"] = paste("unmatched,", result[!higher_than,"best_match"])
 }
 
 if(any(higher_than, na.rm=T)){
--- a/shm_csr.r	Tue Oct 25 07:28:43 2016 -0400
+++ b/shm_csr.r	Thu Oct 27 07:26:45 2016 -0400
@@ -374,7 +374,7 @@
 	pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL)
 	pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black"))
 	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclasses", "( n =", sum(genesForPlot$Freq), ")"))
-	write.table(genesForPlot, "IGA.txt", sep="\t",quote=F,row.names=F,col.names=T)
+	write.table(genesForPlot, "IGA_pie.txt", sep="\t",quote=F,row.names=F,col.names=T)
 
 	png(filename="IGA.png")
 	print(pc)
@@ -395,7 +395,7 @@
 	pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL)
 	pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black"))
 	pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclasses", "( n =", sum(genesForPlot$Freq), ")"))
-	write.table(genesForPlot, "IGG.txt", sep="\t",quote=F,row.names=F,col.names=T)
+	write.table(genesForPlot, "IGG_pie.txt", sep="\t",quote=F,row.names=F,col.names=T)
 
 	png(filename="IGG.png")
 	print(pc)
--- a/wrapper.sh	Tue Oct 25 07:28:43 2016 -0400
+++ b/wrapper.sh	Thu Oct 27 07:26:45 2016 -0400
@@ -478,24 +478,37 @@
 echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output
 echo "<tr><td>The complete dataset</td><td><a href='merged.txt' download='merged.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The filtered dataset</td><td><a href='filtered.txt' download='filtered.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt' download='data_sum.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the first SHM Overview plot</td><td><a href='plot1.txt' download='plot1.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the second SHM Overview plot</td><td><a href='plot2.txt' download='plot2.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The data used to generate the third SHM Overview plot</td><td><a href='plot3.txt' download='plot3.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt' download='unmatched.txt' >Download</a></td></tr>" >> $output
 
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>SHM Overview</td></tr>" >> $output
+echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt' download='data_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the RGYW/WRCY and TW/WA plot</td><td><a href='plot1.txt' download='plot1.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the relative transition and transversion plot</td><td><a href='plot2.txt' download='plot2.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the absolute transition and transversion plot</td><td><a href='plot3.txt' download='plot3.txt' >Download</a></td></tr>" >> $output
+
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>SHM Frequency</td></tr>" >> $output
 echo "<tr><td>The data  generate the frequency scatter plot</td><td><a href='scatter.txt' download='scatter.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data used to generate the frequency by class plot</td><td><a href='frequency_ranges_classes.txt' download='frequency_ranges_classes.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>The data for frequency by subclass</td><td><a href='frequency_ranges_subclasses.txt' download='frequency_ranges_subclasses.txt' >Download</a></td></tr>" >> $output
 
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Transition Tables</td></tr>" >> $output
+echo "<tr><td>The data for the 'all' transition plot</td><td><a href='transition_all_sum.txt' download='transition_all_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGA' transition plot</td><td><a href='transition_IGA_sum.txt' download='transition_all_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transition_IGA1_sum.txt' download='transition_IGA1_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGA1' transition plot</td><td><a href='transition_IGA1_sum.txt' download='transition_IGA1_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG' transition plot</td><td><a href='transition_IGG_sum.txt' download='transition_IGG_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG1' transition plot</td><td><a href='transition_IGG1_sum.txt' download='transition_IGG1_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG2' transition plot</td><td><a href='transition_IGG2_sum.txt' download='transition_IGG2_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG3' transition plot</td><td><a href='transition_IGG3_sum.txt' download='transition_IGG3_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGG4' transition plot</td><td><a href='transition_IGG4_sum.txt' download='transition_IGG4_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the 'IGM' transition plot</td><td><a href='transition_IGM_sum.txt' download='transition_IGM_sum.txt' >Download</a></td></tr>" >> $output
 
-echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Antigen Selection</td></tr>" >> $output
 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt' download='aa_id_mutations.txt' >Download</a></td></tr>" >> $output
 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt' download='absent_aa_id.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output
-
-echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output
 
 echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf' download='baseline.pdf' >Download</a></td></tr>" >> $output
 echo "<tr><td>Baseline data</td><td><a href='baseline.txt' download='baseline.txt' >Download</a></td></tr>" >> $output
@@ -506,6 +519,22 @@
 echo "<tr><td>Baseline IGM PDF</td><td><a href='baseline_IGM.pdf' download='baseline_IGM.pdf' >Download</a></td></tr>" >> $output
 echo "<tr><td>Baseline IGM data</td><td><a href='baseline_IGM.txt' download='baseline_IGM.txt' >Download</a></td></tr>" >> $output
 
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>CSR</td></tr>" >> $output
+echo "<tr><td>The data for the CSR IGA pie plot</td><td><a href='IGA_pie.txt' download='IGA_pie.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for the CSR IGG pie plot</td><td><a href='IGG_pie.txt' download='IGG_pie.txt' >Download</a></td></tr>" >> $output
+
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Clonality</td></tr>" >> $output
+echo "<tr><td>Sequence overlap between subclasses</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB file with defined clones of IGA</td><td><a href='change_o/change-o-db-defined_clones-IGA.txt' download='change_o/change-o-db-defined_clones-IGA.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of IGA</td><td><a href='change_o/change-o-defined_clones-summary-IGA.txt' download='change_o/change-o-defined_clones-summary-IGA.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB file with defined clones of IGG</td><td><a href='change_o/change-o-db-defined_clones-IGG.txt' download='change_o/change-o-db-defined_clones-IGG.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of IGG</td><td><a href='change_o/change-o-defined_clones-summary-IGG.txt' download='change_o/change-o-defined_clones-summary-IGG.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB file with defined clones of IGM</td><td><a href='change_o/change-o-db-defined_clones-IGM.txt' download='change_o/change-o-db-defined_clones-IGM.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of IGM</td><td><a href='change_o/change-o-defined_clones-summary-IGM.txt' download='change_o/change-o-defined_clones-summary-IGM.txt' >Download</a></td></tr>" >> $output
+
+echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Filtered IMGT output files</td></tr>" >> $output
 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz' download='new_IMGT.txz' >Download</a></td></tr>" >> $output
 echo "<tr><td>An IMGT archive with just the matched and filtered IGA sequences</td><td><a href='new_IMGT_IGA.txz' download='new_IMGT_IGA.txz' >Download</a></td></tr>" >> $output
 echo "<tr><td>An IMGT archive with just the matched and filtered IGA1 sequences</td><td><a href='new_IMGT_IGA1.txz' download='new_IMGT_IGA1.txz' >Download</a></td></tr>" >> $output
@@ -517,15 +546,6 @@
 echo "<tr><td>An IMGT archive with just the matched and filtered IGG4 sequences</td><td><a href='new_IMGT_IGG4.txz' download='new_IMGT_IGG4.txz' >Download</a></td></tr>" >> $output
 echo "<tr><td>An IMGT archive with just the matched and filtered IGM sequences</td><td><a href='new_IMGT_IGM.txz' download='new_IMGT_IGM.txz' >Download</a></td></tr>" >> $output
 
-echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB file with defined clones of IGA</td><td><a href='change_o/change-o-db-defined_clones-IGA.txt' download='change_o/change-o-db-defined_clones-IGA.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB defined clones summary file of IGA</td><td><a href='change_o/change-o-defined_clones-summary-IGA.txt' download='change_o/change-o-defined_clones-summary-IGA.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB file with defined clones of IGG</td><td><a href='change_o/change-o-db-defined_clones-IGG.txt' download='change_o/change-o-db-defined_clones-IGG.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB defined clones summary file of IGG</td><td><a href='change_o/change-o-defined_clones-summary-IGG.txt' download='change_o/change-o-defined_clones-summary-IGG.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB file with defined clones of IGM</td><td><a href='change_o/change-o-db-defined_clones-IGM.txt' download='change_o/change-o-db-defined_clones-IGM.txt' >Download</a></td></tr>" >> $output
-echo "<tr><td>The Change-O DB defined clones summary file of IGM</td><td><a href='change_o/change-o-defined_clones-summary-IGM.txt' download='change_o/change-o-defined_clones-summary-IGM.txt' >Download</a></td></tr>" >> $output
-
 echo "</table>" >> $output
 
 echo "</div>" >> $output #downloads tab end