# HG changeset patch # User davidvanzessen # Date 1479909940 18000 # Node ID 4e596473c25c16a27999f79090bbf862940be7ef # Parent 61d0a6318711d3b52e248a6f4f787ee033628df0 Uploaded diff -r 61d0a6318711 -r 4e596473c25c merge_and_filter.r --- a/merge_and_filter.r Thu Nov 17 07:33:21 2016 -0500 +++ b/merge_and_filter.r Wed Nov 23 09:05:40 2016 -0500 @@ -96,6 +96,8 @@ result$JGene = gsub("^Homsap ", "", result$J.GENE.and.allele) result$JGene = gsub("[*].*", "", result$JGene) +print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) + splt = strsplit(class.filter, "_")[[1]] chunk_hit_threshold = as.numeric(splt[1]) nt_hit_threshold = as.numeric(splt[2]) @@ -110,6 +112,8 @@ result$best_match = "all" } +print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) + write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T) print(paste("Number of empty CDR1 sequences:", sum(result$CDR1.IMGT.seq == ""))) @@ -135,6 +139,8 @@ filtering.steps = rbind(filtering.steps, c("After empty CDR2, FR3 filter", nrow(result))) } +print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) + if(empty.region.filter == "leader"){ result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] } else if(empty.region.filter == "FR1"){ @@ -145,6 +151,9 @@ result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),] } +print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) +print(result[result$Sequence.ID == "JY8QFUQ01BNS72",c("Sequence.ID","best_match")]) + print(paste("Number of sequences in result after n filtering:", nrow(result))) filtering.steps = rbind(filtering.steps, c("After N filter", nrow(result))) @@ -183,6 +192,9 @@ result = result[!duplicated(result$unique.def),] } +print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) +print(result[result$Sequence.ID == "JY8QFUQ01BNS72",c("Sequence.ID","best_match")]) + write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T) filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result))) @@ -207,7 +219,7 @@ - +print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")]) result = result[,!(names(result) %in% c("past", "best_match_class"))] diff -r 61d0a6318711 -r 4e596473c25c shm_csr.py --- a/shm_csr.py Thu Nov 17 07:33:21 2016 -0500 +++ b/shm_csr.py Wed Nov 23 09:05:40 2016 -0500 @@ -284,4 +284,5 @@ with open(seq_motif_file, 'w') as o: o.write("ID\tRGYWC\tWRCY\tWA\tTW\n") for ID in IDlist: - o.write(ID + "\t" + str(round(RGYWCount[ID], 2)) + "\t" + str(round(WRCYCount[ID], 2)) + "\t" + str(round(WACount[ID], 2)) + "\t" + str(round(TWCount[ID], 2)) + "\n") + #o.write(ID + "\t" + str(round(RGYWCount[ID], 2)) + "\t" + str(round(WRCYCount[ID], 2)) + "\t" + str(round(WACount[ID], 2)) + "\t" + str(round(TWCount[ID], 2)) + "\n") + o.write(ID + "\t" + str(RGYWCount[ID]) + "\t" + str(WRCYCount[ID]) + "\t" + str(WACount[ID]) + "\t" + str(TWCount[ID]) + "\n") diff -r 61d0a6318711 -r 4e596473c25c wrapper.sh --- a/wrapper.sh Thu Nov 17 07:33:21 2016 -0500 +++ b/wrapper.sh Wed Nov 23 09:05:40 2016 -0500 @@ -447,58 +447,58 @@ PWD="$tmp" -echo "
" >> $output #clonality tab + echo "
" >> $output #clonality tab -function clonality_table { - local infile=$1 - local outfile=$2 - - echo "" >> $outfile - echo "" >> $outfile - - first='true' - - while read size clones seqs - do - if [[ "$first" == "true" ]]; then - first="false" - continue - fi - echo "" >> $outfile - done < $infile - - echo "
Clone sizeNr of clonesNr of sequences
$size$clones$seqs
" >> $outfile -} -echo "
" >> $output + function clonality_table { + local infile=$1 + local outfile=$2 + + echo "" >> $outfile + echo "" >> $outfile + + first='true' + + while read size clones seqs + do + if [[ "$first" == "true" ]]; then + first="false" + continue + fi + echo "" >> $outfile + done < $infile + + echo "
Clone sizeNr of clonesNr of sequences
$size$clones$seqs
" >> $outfile + } + echo "
" >> $output -echo "
" >> $output -clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output -echo "
" >> $output + echo "
" >> $output + clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output + echo "
" >> $output -echo "
" >> $output -clonality_table $outdir/change_o/change-o-defined_clones-summary-IGA.txt $output -echo "
" >> $output + echo "
" >> $output + clonality_table $outdir/change_o/change-o-defined_clones-summary-IGA.txt $output + echo "
" >> $output -echo "
" >> $output -clonality_table $outdir/change_o/change-o-defined_clones-summary-IGG.txt $output -echo "
" >> $output + echo "
" >> $output + clonality_table $outdir/change_o/change-o-defined_clones-summary-IGG.txt $output + echo "
" >> $output -echo "
" >> $output -clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output -echo "
" >> $output + echo "
" >> $output + clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output + echo "
" >> $output -echo "
" >> $output -clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output -echo "
" >> $output + echo "
" >> $output + clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output + echo "
" >> $output -echo "
" >> $output -cat "$outdir/sequence_overview/index.html" >> $output -echo "
" >> $output + echo "
" >> $output + cat "$outdir/sequence_overview/index.html" >> $output + echo "
" >> $output -echo "
" >> $output #clonality tabber end + echo "
" >> $output #clonality tabber end -echo "
" >> $output #clonality tab end + echo "
" >> $output #clonality tab end fi