changeset 57:cb779a45537b draft

Uploaded
author davidvanzessen
date Tue, 18 Jul 2017 04:28:11 -0400
parents ee807645b224
children 1a8e1dd21b16
files check_unique_id.r merge_and_filter.r new_imgt.r wrapper.sh
diffstat 4 files changed, 21 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/check_unique_id.r	Mon Jul 17 10:44:40 2017 -0400
+++ b/check_unique_id.r	Tue Jul 18 04:28:11 2017 -0400
@@ -8,7 +8,7 @@
 	stop("First argument doesn't contain the 'Sequence number' column")
 }
 
-tbl = table(current$Sequence.ID)
+tbl = table(current[,"Sequence ID"])
 l_tbl = length(tbl)
 check = any(tbl > 1)
 
--- a/merge_and_filter.r	Mon Jul 17 10:44:40 2017 -0400
+++ b/merge_and_filter.r	Tue Jul 18 04:28:11 2017 -0400
@@ -115,12 +115,6 @@
 print("mutation analysis files columns")
 print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])]))
 
-print(head(summ$Sequence.ID))
-
-print("_-------------------------------------")
-
-print(head(mutationanalysis$Sequence.ID))
-
 result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID")
 
 print(paste("Number of sequences after merging with mutation analysis file:", nrow(result)))
--- a/new_imgt.r	Mon Jul 17 10:44:40 2017 -0400
+++ b/new_imgt.r	Tue Jul 18 04:28:11 2017 -0400
@@ -14,6 +14,8 @@
 	merged = merged[!grepl("unmatched", merged$best_match),]
 }
 
+nrow_dat = 0
+
 for(f in list.files(imgt.dir, pattern="*.txt$")){
 	#print(paste("filtering", f))
 	path = file.path(imgt.dir, f)
@@ -21,9 +23,13 @@
 	
 	dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,]
 	
+	nrow_dat = nrow(dat)
+	
 	if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file
 		dat[,grepl("^FR1", names(dat))] = 0
 	}
 	
 	write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="")
 }
+
+print(paste("Creating new zip for ", gene, "with", nrow_dat, "sequences"))
--- a/wrapper.sh	Mon Jul 17 10:44:40 2017 -0400
+++ b/wrapper.sh	Tue Jul 18 04:28:11 2017 -0400
@@ -48,17 +48,19 @@
 fi
 
 cat "`find $PWD/files/ -name "1_*"`" > $PWD/summary.txt
+cat "`find $PWD/files/ -name "2_*"`" > $PWD/gapped_nt.txt
 cat "`find $PWD/files/ -name "3_*"`" > $PWD/sequences.txt
 cat "`find $PWD/files/ -name "4_*"`" > $PWD/gapped_aa.txt
 cat "`find $PWD/files/ -name "5_*"`" > $PWD/aa.txt
 cat "`find $PWD/files/ -name "6_*"`" > $PWD/junction.txt
 cat "`find $PWD/files/ -name "7_*"`" > $PWD/mutationanalysis.txt
 cat "`find $PWD/files/ -name "8_*"`" > $PWD/mutationstats.txt
+cat "`find $PWD/files/ -name "9_*"`" > $PWD/aa_change_stats.txt
 cat "`find $PWD/files/ -name "10_*"`" > $PWD/hotspots.txt
 
 echo "---------------- unique id check ----------------"
 
-Rscript $dir/check_unique_id.r $PWD/summary.txt $PWD/sequences.txt $PWD/gapped_aa.txt $PWD/aa.txt $PWD/junction.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt
+Rscript $dir/check_unique_id.r $PWD/summary.txt $PWD/gapped_nt.txt $PWD/sequences.txt $PWD/gapped_aa.txt $PWD/aa.txt $PWD/junction.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/aa_change_stats.txt $PWD/hotspots.txt
 
 if [[ ${#BLASTN_DIR} -ge 5 ]] ; then
 	echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}"
@@ -77,23 +79,23 @@
 
 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt "$PWD/gapped_aa.txt" $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${filter_unique_count} ${class_filter} ${empty_region_filter} 2>&1
 
-if [[ "${naive_output}" == "yes" ]] ; then
+if [[ "${naive_output}" == "yes" ]] || [[ "$fast" == "no" ]] ; then
 
 	echo "---------------- creating new IMGT zips ----------------"
 	echo "---------------- creating new IMGT zips ----------------<br />" >> $log
 
 	mkdir $outdir/new_IMGT
 
-	cat "`find $PWD/files/ -name "1_*"`" > "$outdir/new_IMGT/1_Summary.txt"
-	cat "`find $PWD/files/ -name "2_*"`" > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
-	cat "`find $PWD/files/ -name "3_*"`" > "$outdir/new_IMGT/3_Nt-sequences.txt"
-	cat "`find $PWD/files/ -name "4_*"`" > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
-	cat "`find $PWD/files/ -name "5_*"`" > "$outdir/new_IMGT/5_AA-sequences.txt"
-	cat "`find $PWD/files/ -name "6_*"`" > "$outdir/new_IMGT/6_Junction.txt"
-	cat "`find $PWD/files/ -name "7_*"`" > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
-	cat "`find $PWD/files/ -name "8_*"`" > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
-	cat "`find $PWD/files/ -name "9_*"`" > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
-	cat "`find $PWD/files/ -name "10_*"`" > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
+	cp $PWD/summary.txt "$outdir/new_IMGT/1_Summary.txt"
+	cp $PWD/gapped_nt.txt "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
+	cp $PWD/sequences.txt "$outdir/new_IMGT/3_Nt-sequences.txt"
+	cp $PWD/gapped_aa.txt "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
+	cp $PWD/aa.txt "$outdir/new_IMGT/5_AA-sequences.txt"
+	cp $PWD/junction.txt "$outdir/new_IMGT/6_Junction.txt"
+	cp $PWD/mutationanalysis.txt "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
+	cp $PWD/mutationstats.txt "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
+	cp $PWD/aa_change_stats.txt "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
+	cp $PWD/hotspots.txt "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
 
 	mkdir $outdir/new_IMGT_IGA
 	cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA