# HG changeset patch # User davidvanzessen # Date 1477567605 14400 # Node ID e85fec274cde825eb8858b72b34d0060c8a6dd99 # Parent faae21ba5c6387e11d4ec568f501121294511876 Uploaded diff -r faae21ba5c63 -r e85fec274cde merge_and_filter.r --- a/merge_and_filter.r Tue Oct 25 07:28:43 2016 -0400 +++ b/merge_and_filter.r Thu Oct 27 07:26:45 2016 -0400 @@ -114,7 +114,7 @@ } print(paste("Number of sequences in result after CDR/FR filtering:", nrow(result))) -print(paste("Number of matched sequences in result after CDR/FR filtering:", nrow(result[!grepl("unmatched", result$best_match),]))) +print(paste("Number of sequences in result after CDR/FR filtering:", nrow(result[!grepl("unmatched", result$best_match),]))) if(empty.region.filter == "leader"){ result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] @@ -123,7 +123,7 @@ } else if(empty.region.filter == "CDR1"){ result = result[!(grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] } else if(empty.region.filter == "FR2"){ - result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] + result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] } print(paste("Number of sequences in result after n filtering:", nrow(result))) @@ -183,15 +183,15 @@ chunk_hit_threshold = as.numeric(splt[1]) nt_hit_threshold = as.numeric(splt[2]) -higher_than=(summ$chunk_hit_percentage >= chunk_hit_threshold & summ$nt_hit_percentage >= nt_hit_threshold) +higher_than=(result$chunk_hit_percentage >= chunk_hit_threshold & result$nt_hit_percentage >= nt_hit_threshold) -unmatched=summ[NULL,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")] +unmatched=result[NULL,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")] -if(!all(higher_than, na.rm=T)){ #check for 'not all' because that would mean the unmatched set is empty - unmatched = summ[!higher_than,] +if(!all(higher_than, na.rm=T)){ #check for no unmatched + unmatched = result[!higher_than,] unmatched = unmatched[,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")] unmatched$best_match = paste("unmatched,", unmatched$best_match) - summ[!higher_than,"best_match"] = paste("unmatched,", summ[!higher_than,"best_match"]) + result[!higher_than,"best_match"] = paste("unmatched,", result[!higher_than,"best_match"]) } if(any(higher_than, na.rm=T)){ diff -r faae21ba5c63 -r e85fec274cde shm_csr.r --- a/shm_csr.r Tue Oct 25 07:28:43 2016 -0400 +++ b/shm_csr.r Thu Oct 27 07:26:45 2016 -0400 @@ -374,7 +374,7 @@ pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL) pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclasses", "( n =", sum(genesForPlot$Freq), ")")) - write.table(genesForPlot, "IGA.txt", sep="\t",quote=F,row.names=F,col.names=T) + write.table(genesForPlot, "IGA_pie.txt", sep="\t",quote=F,row.names=F,col.names=T) png(filename="IGA.png") print(pc) @@ -395,7 +395,7 @@ pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL) pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclasses", "( n =", sum(genesForPlot$Freq), ")")) - write.table(genesForPlot, "IGG.txt", sep="\t",quote=F,row.names=F,col.names=T) + write.table(genesForPlot, "IGG_pie.txt", sep="\t",quote=F,row.names=F,col.names=T) png(filename="IGG.png") print(pc) diff -r faae21ba5c63 -r e85fec274cde wrapper.sh --- a/wrapper.sh Tue Oct 25 07:28:43 2016 -0400 +++ b/wrapper.sh Thu Oct 27 07:26:45 2016 -0400 @@ -478,24 +478,37 @@ echo "infolink" >> $output echo "The complete datasetDownload" >> $output echo "The filtered datasetDownload" >> $output -echo "The SHM Overview table as a datasetDownload" >> $output -echo "The data used to generate the first SHM Overview plotDownload" >> $output -echo "The data used to generate the second SHM Overview plotDownload" >> $output -echo "The data used to generate the third SHM Overview plotDownload" >> $output echo "The alignment info on the unmatched sequencesDownload" >> $output +echo "SHM Overview" >> $output +echo "The SHM Overview table as a datasetDownload" >> $output +echo "Motif data per sequence IDDownload" >> $output +echo "Mutation data per sequence IDDownload" >> $output +echo "Base count for every sequenceView" >> $output +echo "The data used to generate the RGYW/WRCY and TW/WA plotDownload" >> $output +echo "The data used to generate the relative transition and transversion plotDownload" >> $output +echo "The data used to generate the absolute transition and transversion plotDownload" >> $output + +echo "SHM Frequency" >> $output echo "The data generate the frequency scatter plotDownload" >> $output echo "The data used to generate the frequency by class plotDownload" >> $output echo "The data for frequency by subclassDownload" >> $output +echo "Transition Tables" >> $output +echo "The data for the 'all' transition plotDownload" >> $output +echo "The data for the 'IGA' transition plotDownload" >> $output +echo "The data for the 'IGA1' transition plotDownload" >> $output +echo "The data for the 'IGA1' transition plotDownload" >> $output +echo "The data for the 'IGG' transition plotDownload" >> $output +echo "The data for the 'IGG1' transition plotDownload" >> $output +echo "The data for the 'IGG2' transition plotDownload" >> $output +echo "The data for the 'IGG3' transition plotDownload" >> $output +echo "The data for the 'IGG4' transition plotDownload" >> $output +echo "The data for the 'IGM' transition plotDownload" >> $output -echo "Motif data per sequence IDDownload" >> $output -echo "Mutation data per sequence IDDownload" >> $output +echo "Antigen Selection" >> $output echo "AA mutation data per sequence IDDownload" >> $output echo "Absent AA location data per sequence IDDownload" >> $output -echo "CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than onceView" >> $output - -echo "Base count for every sequenceView" >> $output echo "Baseline PDF (http://selection.med.yale.edu/baseline/)Download" >> $output echo "Baseline dataDownload" >> $output @@ -506,6 +519,22 @@ echo "Baseline IGM PDFDownload" >> $output echo "Baseline IGM dataDownload" >> $output +echo "CSR" >> $output +echo "The data for the CSR IGA pie plotDownload" >> $output +echo "The data for the CSR IGG pie plotDownload" >> $output + +echo "Clonality" >> $output +echo "Sequence overlap between subclassesView" >> $output +echo "The Change-O DB file with defined clones and subclass annotationDownload" >> $output +echo "The Change-O DB defined clones summary fileDownload" >> $output +echo "The Change-O DB file with defined clones of IGADownload" >> $output +echo "The Change-O DB defined clones summary file of IGADownload" >> $output +echo "The Change-O DB file with defined clones of IGGDownload" >> $output +echo "The Change-O DB defined clones summary file of IGGDownload" >> $output +echo "The Change-O DB file with defined clones of IGMDownload" >> $output +echo "The Change-O DB defined clones summary file of IGMDownload" >> $output + +echo "Filtered IMGT output files" >> $output echo "An IMGT archive with just the matched and filtered sequencesDownload" >> $output echo "An IMGT archive with just the matched and filtered IGA sequencesDownload" >> $output echo "An IMGT archive with just the matched and filtered IGA1 sequencesDownload" >> $output @@ -517,15 +546,6 @@ echo "An IMGT archive with just the matched and filtered IGG4 sequencesDownload" >> $output echo "An IMGT archive with just the matched and filtered IGM sequencesDownload" >> $output -echo "The Change-O DB file with defined clones and subclass annotationDownload" >> $output -echo "The Change-O DB defined clones summary fileDownload" >> $output -echo "The Change-O DB file with defined clones of IGADownload" >> $output -echo "The Change-O DB defined clones summary file of IGADownload" >> $output -echo "The Change-O DB file with defined clones of IGGDownload" >> $output -echo "The Change-O DB defined clones summary file of IGGDownload" >> $output -echo "The Change-O DB file with defined clones of IGMDownload" >> $output -echo "The Change-O DB defined clones summary file of IGMDownload" >> $output - echo "" >> $output echo "" >> $output #downloads tab end