# HG changeset patch
# User davidvanzessen
# Date 1478867508 18000
# Node ID 59765d2c8890bf8a5ae82fe78374052b80c5a36d
# Parent 933fb21568cecfd301573cd6a73ca58c0b7b99f5
Uploaded
diff -r 933fb21568ce -r 59765d2c8890 merge_and_filter.r
--- a/merge_and_filter.r Fri Nov 11 03:49:30 2016 -0500
+++ b/merge_and_filter.r Fri Nov 11 07:31:48 2016 -0500
@@ -6,22 +6,24 @@
mutationanalysisfile = args[3]
mutationstatsfile = args[4]
hotspotsfile = args[5]
-gene_identification_file= args[6]
-output = args[7]
-before.unique.file = args[8]
-unmatchedfile = args[9]
-method=args[10]
-functionality=args[11]
-unique.type=args[12]
-filter.unique=args[13]
-class.filter=args[14]
-empty.region.filter=args[15]
+aafile = args[6]
+gene_identification_file= args[7]
+output = args[8]
+before.unique.file = args[9]
+unmatchedfile = args[10]
+method=args[11]
+functionality=args[12]
+unique.type=args[13]
+filter.unique=args[14]
+class.filter=args[15]
+empty.region.filter=args[16]
summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
mutationstats = read.table(mutationstatsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
hotspots = read.table(hotspotsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+AAs = read.table(aafile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
gene_identification = read.table(gene_identification_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
if(method == "blastn"){
@@ -81,6 +83,10 @@
names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq")
result = merge(result, sequences, by="Sequence.ID", all.x=T)
+AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")]
+names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA")
+result = merge(result, AAs, by="Sequence.ID", all.x=T)
+
print(paste("Number of sequences in result after merging with sequences:", nrow(result)))
result$VGene = gsub("^Homsap ", "", result$V.GENE.and.allele)
diff -r 933fb21568ce -r 59765d2c8890 shm_csr.xml
--- a/shm_csr.xml Fri Nov 11 03:49:30 2016 -0500
+++ b/shm_csr.xml Fri Nov 11 07:31:48 2016 -0500
@@ -22,10 +22,10 @@
-
-
-
-
+
+
+
+
diff -r 933fb21568ce -r 59765d2c8890 wrapper.sh
--- a/wrapper.sh Fri Nov 11 03:49:30 2016 -0500
+++ b/wrapper.sh Fri Nov 11 07:31:48 2016 -0500
@@ -62,7 +62,7 @@
echo "---------------- merge_and_filter.r ----------------"
echo "---------------- merge_and_filter.r ----------------
" >> $log
-Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
+Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $PWD/aa.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
if [[ "$fast" == "no" ]] ; then