# HG changeset patch
# User davidvanzessen
# Date 1479909940 18000
# Node ID 4e596473c25c16a27999f79090bbf862940be7ef
# Parent 61d0a6318711d3b52e248a6f4f787ee033628df0
Uploaded
diff -r 61d0a6318711 -r 4e596473c25c merge_and_filter.r
--- a/merge_and_filter.r Thu Nov 17 07:33:21 2016 -0500
+++ b/merge_and_filter.r Wed Nov 23 09:05:40 2016 -0500
@@ -96,6 +96,8 @@
result$JGene = gsub("^Homsap ", "", result$J.GENE.and.allele)
result$JGene = gsub("[*].*", "", result$JGene)
+print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")])
+
splt = strsplit(class.filter, "_")[[1]]
chunk_hit_threshold = as.numeric(splt[1])
nt_hit_threshold = as.numeric(splt[2])
@@ -110,6 +112,8 @@
result$best_match = "all"
}
+print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")])
+
write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T)
print(paste("Number of empty CDR1 sequences:", sum(result$CDR1.IMGT.seq == "")))
@@ -135,6 +139,8 @@
filtering.steps = rbind(filtering.steps, c("After empty CDR2, FR3 filter", nrow(result)))
}
+print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")])
+
if(empty.region.filter == "leader"){
result = result[!(grepl("n|N", result$FR1.IMGT.seq) | grepl("n|N", result$FR2.IMGT.seq) | grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR1.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
} else if(empty.region.filter == "FR1"){
@@ -145,6 +151,9 @@
result = result[!(grepl("n|N", result$FR3.IMGT.seq) | grepl("n|N", result$CDR2.IMGT.seq) | grepl("n|N", result$CDR3.IMGT.seq)),]
}
+print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")])
+print(result[result$Sequence.ID == "JY8QFUQ01BNS72",c("Sequence.ID","best_match")])
+
print(paste("Number of sequences in result after n filtering:", nrow(result)))
filtering.steps = rbind(filtering.steps, c("After N filter", nrow(result)))
@@ -183,6 +192,9 @@
result = result[!duplicated(result$unique.def),]
}
+print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")])
+print(result[result$Sequence.ID == "JY8QFUQ01BNS72",c("Sequence.ID","best_match")])
+
write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T)
filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result)))
@@ -207,7 +219,7 @@
-
+print(result[result$Sequence.ID == "JY8QFUQ01C310D",c("Sequence.ID","best_match")])
result = result[,!(names(result) %in% c("past", "best_match_class"))]
diff -r 61d0a6318711 -r 4e596473c25c shm_csr.py
--- a/shm_csr.py Thu Nov 17 07:33:21 2016 -0500
+++ b/shm_csr.py Wed Nov 23 09:05:40 2016 -0500
@@ -284,4 +284,5 @@
with open(seq_motif_file, 'w') as o:
o.write("ID\tRGYWC\tWRCY\tWA\tTW\n")
for ID in IDlist:
- o.write(ID + "\t" + str(round(RGYWCount[ID], 2)) + "\t" + str(round(WRCYCount[ID], 2)) + "\t" + str(round(WACount[ID], 2)) + "\t" + str(round(TWCount[ID], 2)) + "\n")
+ #o.write(ID + "\t" + str(round(RGYWCount[ID], 2)) + "\t" + str(round(WRCYCount[ID], 2)) + "\t" + str(round(WACount[ID], 2)) + "\t" + str(round(TWCount[ID], 2)) + "\n")
+ o.write(ID + "\t" + str(RGYWCount[ID]) + "\t" + str(WRCYCount[ID]) + "\t" + str(WACount[ID]) + "\t" + str(TWCount[ID]) + "\n")
diff -r 61d0a6318711 -r 4e596473c25c wrapper.sh
--- a/wrapper.sh Thu Nov 17 07:33:21 2016 -0500
+++ b/wrapper.sh Wed Nov 23 09:05:40 2016 -0500
@@ -447,58 +447,58 @@
PWD="$tmp"
-echo "
" >> $output #clonality tab
+ echo "
" >> $output #clonality tab
-function clonality_table {
- local infile=$1
- local outfile=$2
-
- echo "
" >> $outfile
- echo "Clone size | Nr of clones | Nr of sequences |
" >> $outfile
-
- first='true'
-
- while read size clones seqs
- do
- if [[ "$first" == "true" ]]; then
- first="false"
- continue
- fi
- echo "$size | $clones | $seqs |
" >> $outfile
- done < $infile
-
- echo "
" >> $outfile
-}
-echo "
" >> $output
+ function clonality_table {
+ local infile=$1
+ local outfile=$2
+
+ echo "
" >> $outfile
+ echo "Clone size | Nr of clones | Nr of sequences |
" >> $outfile
+
+ first='true'
+
+ while read size clones seqs
+ do
+ if [[ "$first" == "true" ]]; then
+ first="false"
+ continue
+ fi
+ echo "$size | $clones | $seqs |
" >> $outfile
+ done < $infile
+
+ echo "
" >> $outfile
+ }
+ echo "
" >> $output
-echo "
" >> $output
-clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output
-echo "
" >> $output
+ echo "
" >> $output
+ clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output
+ echo "
" >> $output
-echo "
" >> $output
-clonality_table $outdir/change_o/change-o-defined_clones-summary-IGA.txt $output
-echo "
" >> $output
+ echo "
" >> $output
+ clonality_table $outdir/change_o/change-o-defined_clones-summary-IGA.txt $output
+ echo "
" >> $output
-echo "
" >> $output
-clonality_table $outdir/change_o/change-o-defined_clones-summary-IGG.txt $output
-echo "
" >> $output
+ echo "
" >> $output
+ clonality_table $outdir/change_o/change-o-defined_clones-summary-IGG.txt $output
+ echo "
" >> $output
-echo "
" >> $output
-clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output
-echo "
" >> $output
+ echo "
" >> $output
+ clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output
+ echo "
" >> $output
-echo "
" >> $output
-clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output
-echo "
" >> $output
+ echo "
" >> $output
+ clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output
+ echo "
" >> $output
-echo "
" >> $output
-cat "$outdir/sequence_overview/index.html" >> $output
-echo "
" >> $output
+ echo "
" >> $output
+ cat "$outdir/sequence_overview/index.html" >> $output
+ echo "
" >> $output
-echo "
" >> $output #clonality tabber end
+ echo "
" >> $output #clonality tabber end
-echo "
" >> $output #clonality tab end
+ echo "
" >> $output #clonality tab end
fi