diff wrapper.sh @ 0:c33d93683a09 draft

Uploaded
author davidvanzessen
date Thu, 13 Oct 2016 10:52:24 -0400
parents
children faae21ba5c63
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wrapper.sh	Thu Oct 13 10:52:24 2016 -0400
@@ -0,0 +1,632 @@
+#!/bin/bash
+#set -e
+dir="$(cd "$(dirname "$0")" && pwd)"
+input=$1
+method=$2
+log=$3 #becomes the main html page at the end
+outdir=$4
+output="$outdir/index.html" #copied to $log location at the end
+title=$5
+include_fr1=$6
+functionality=$7
+unique=$8
+naive_output_ca=$9
+naive_output_cg=${10}
+naive_output_cm=${11}
+filter_unique=${12}
+class_filter=${13}
+empty_region_filter=${14}
+mkdir $outdir
+
+tar -xzf $dir/style.tar.gz -C $outdir
+
+echo "---------------- read parameters ----------------"
+echo "---------------- read parameters ----------------<br />" > $log
+
+echo "unpacking IMGT file"
+
+type="`file $input`"
+if [[ "$type" == *"Zip archive"* ]] ; then
+	echo "Zip archive"
+	echo "unzip $input -d $PWD/files/"
+	unzip $input -d $PWD/files/
+elif [[ "$type" == *"XZ compressed data"* ]] ; then
+	echo "ZX archive"
+	echo "tar -xJf $input -C $PWD/files/"
+	mkdir -p $PWD/files/$title
+	tar -xJf $input -C $PWD/files/$title
+fi
+
+cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
+cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
+cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
+cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
+cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
+cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
+cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
+
+if [[ ${#BLASTN_DIR} -ge 5 ]] ; then
+	echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}"
+else
+	BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin"
+	echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}"
+fi
+
+echo "---------------- class identification ----------------"
+echo "---------------- class identification ----------------<br />" >> $log
+
+python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
+
+echo "---------------- merge_and_filter.r ----------------"
+echo "---------------- merge_and_filter.r ----------------<br />" >> $log
+
+Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
+
+echo "---------------- creating new IMGT zips ----------------"
+echo "---------------- creating new IMGT zips ----------------<br />" >> $log
+
+mkdir $outdir/new_IMGT
+
+cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
+cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
+cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
+cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
+cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
+cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
+cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
+cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
+cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
+cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
+
+mkdir $outdir/new_IMGT_IGA
+cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA
+
+mkdir $outdir/new_IMGT_IGA1
+cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1
+
+mkdir $outdir/new_IMGT_IGA2
+cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2
+
+mkdir $outdir/new_IMGT_IGG
+cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG
+
+mkdir $outdir/new_IMGT_IGG1
+cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1
+
+mkdir $outdir/new_IMGT_IGG2
+cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2
+
+mkdir $outdir/new_IMGT_IGG3
+cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3
+
+mkdir $outdir/new_IMGT_IGG4
+cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4
+
+mkdir $outdir/new_IMGT_IGM
+cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM
+
+Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
+
+Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1
+
+Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1
+
+Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1
+
+
+tmp="$PWD"
+cd $outdir/new_IMGT/ #tar weirdness...
+tar -cJf ../new_IMGT.txz *
+
+cd $outdir/new_IMGT_IGA/
+tar -cJf ../new_IMGT_IGA.txz *
+
+cd $outdir/new_IMGT_IGA1/
+tar -cJf ../new_IMGT_IGA1.txz *
+
+cd $outdir/new_IMGT_IGA2/
+tar -cJf ../new_IMGT_IGA2.txz *
+
+cd $outdir/new_IMGT_IGG/
+tar -cJf ../new_IMGT_IGG.txz *
+
+cd $outdir/new_IMGT_IGG1/
+tar -cJf ../new_IMGT_IGG1.txz *
+
+cd $outdir/new_IMGT_IGG2/
+tar -cJf ../new_IMGT_IGG2.txz *
+
+cd $outdir/new_IMGT_IGG3/
+tar -cJf ../new_IMGT_IGG3.txz *
+
+cd $outdir/new_IMGT_IGG4/
+tar -cJf ../new_IMGT_IGG4.txz *
+
+cd $outdir/new_IMGT_IGM/
+tar -cJf ../new_IMGT_IGM.txz *
+
+cd $tmp
+
+echo "---------------- shm_csr.r ----------------"
+echo "---------------- shm_csr.r ----------------<br />" >> $log
+
+classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,unmatched"
+echo "R mutation analysis"
+Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
+
+
+echo "---------------- shm_csr.py ----------------"
+echo "---------------- shm_csr.py ----------------<br />" >> $log
+
+python $dir/shm_csr.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
+
+echo "---------------- aa_histogram.r ----------------"
+echo "---------------- aa_histogram.r ----------------<br />" >> $log
+
+Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM" $outdir/ 2>&1
+if [ -e "$outdir/aa_histogram_.png" ]; then
+        mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
+        mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
+fi
+
+genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM)
+
+funcs=(sum mean median)
+funcs=(sum)
+
+echo "---------------- sequence_overview.r ----------------"
+echo "---------------- sequence_overview.r ----------------<br />" >> $log
+
+mkdir $outdir/sequence_overview
+
+Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
+
+echo "<table border='1'>" > $outdir/base_overview.html
+
+while IFS=$'\t' read ID class seq A C G T
+do
+	echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
+done < $outdir/sequence_overview/ntoverview.txt
+
+echo "<html><center><h1>$title</h1></center>" > $output
+echo "<meta name='viewport' content='width=device-width, initial-scale=1'>" >> $output
+echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
+echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
+echo "<script type='text/javascript' src='script.js'></script>" >> $output
+echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
+echo "<link rel='stylesheet' type='text/css' href='pure-min.css'>" >> $output
+
+matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
+unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
+total_count=$((matched_count + unmatched_count))
+perc_count=$((unmatched_count / total_count * 100))
+perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
+perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
+
+echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
+echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
+echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
+
+echo "---------------- main tables ----------------"
+echo "---------------- main tables ----------------<br />" >> $log
+
+echo "<div class='tabber'>" >> $output
+echo "<div class='tabbertab' title='SHM Overview'>" >> $output
+
+for func in ${funcs[@]}
+do
+	
+	echo "---------------- $func table ----------------"
+	echo "---------------- $func table ----------------<br />" >> $log
+	
+	cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
+	
+	echo "---------------- pattern_plots.r ----------------"
+	echo "---------------- pattern_plots.r ----------------<br />" >> $log
+
+	Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/plot1 $outdir/plot2 $outdir/plot3 2>&1
+	
+	echo "<table class='pure-table pure-table-striped'>" >> $output
+	echo "<thead><tr><th>info</th>" >> $output
+	
+	if [ "${class_filter}" != "101_101" ] ; then
+	
+		for gene in ${genes[@]}
+		do
+			tmp=`cat $outdir/${gene}_${func}_n.txt`
+			echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
+		done
+		
+		tmp=`cat $outdir/all_${func}_n.txt`
+		echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
+		tmp=`cat $outdir/unmatched_${func}_n.txt`
+		echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output
+
+		while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
+		do
+			if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh
+				echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td><td>${unx}/${uny} (${unz})</td></tr>" >> $output
+			elif [ "$name" == "Median of Number of Mutations (%)" ] ; then
+				echo "<tr><td>$name</td><td>${caz}%</td><td>${ca1z}%</td><td>${ca2z}%</td><td>${cgz}%</td><td>${cg1z}%</td><td>${cg2z}%</td><td>${cg3z}%</td><td>${cg4z}%</td><td>${cmz}%</td><td>${allz}%</td><td>${unz}%</td></tr>" >> $output
+			else
+				echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
+			fi
+		done < $outdir/data_${func}.txt
+		
+	else
+		tmp=`cat $outdir/unmatched_${func}_n.txt`
+		echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
+		
+		while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
+		do
+			if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh
+				echo "<tr><td>$name</td><td>${unx}/${uny}</td></tr>" >> $output
+			elif [ "$name" == "Median of Number of Mutations (%)" ] ; then
+				echo "<tr><td>$name</td><td>${unz}%</td></tr>" >> $output
+			else
+				echo "<tr><td>$name</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
+			fi
+		done < $outdir/data_${func}.txt
+		
+	fi
+	echo "</table>" >> $output
+	#echo "<a href='data_${func}.txt'>Download data</a>" >> $output
+done
+
+echo "<img src='plot1.png' /><br />" >> $output
+echo "<img src='plot2.png' /><br />" >> $output
+echo "<img src='plot3.png' /><br />" >> $output
+
+echo "</div>" >> $output #SHM overview tab end
+
+echo "---------------- images ----------------"
+echo "---------------- images ----------------<br />" >> $log
+
+echo "<div class='tabbertab' title='SHM Frequency'>" >> $output
+
+if [ -a $outdir/scatter.png ]
+then
+	echo "<img src='scatter.png'/><br />" >> $output
+fi
+if [ -a $outdir/frequency_ranges.png ]
+then
+	echo "<img src='frequency_ranges.png'/><br />" >> $output
+fi
+
+echo "</div>" >> $output #SHM frequency tab end
+
+echo "<div class='tabbertab' title='Transition tables'>" >> $output
+
+echo "<table border='0'>" >> $output
+
+for gene in ${genes[@]}
+do
+	echo "<tr>" >> $output
+	echo "<td><h1>${gene}</h1></td>" >> $output
+	echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output
+	echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output
+	echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output
+	echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output
+	first="true"
+	while IFS=, read from a c g t
+		do
+			if [ "$first" == "true" ] ; then
+				echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
+				first="false"
+			else
+				echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
+			fi
+	done < $outdir/transitions_${gene}_sum.txt
+	echo "</table></td>" >> $output
+	
+	echo "</tr>" >> $output
+done
+
+echo "<tr>" >> $output
+echo "<td><h1>All</h1></td>" >> $output
+echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output
+echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output
+echo "<td><table style='border-left-width: 1;' class='pure-table transition-table pure-table-bordered'>" >> $output
+echo "<tr><td></td><td colspan="5"><center>To</center></td></tr>" >> $output
+first="true"
+while IFS=, read from a c g t
+	do
+		if [ "$first" == "true" ] ; then
+			echo "<tr><td rowspan='5'>From</td><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
+			first="false"
+		else
+			echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
+		fi
+done < $outdir/transitions_all_sum.txt
+echo "</table></td>" >> $output
+
+echo "</tr>" >> $output
+
+echo "</table>" >> $output
+
+echo "</div>" >> $output #transition tables tab end
+
+echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
+
+if [ -a $outdir/aa_histogram.png ]
+then
+	echo "<img src='aa_histogram.png'/><br />" >> $output
+	echo "<img src='aa_histogram_IGA.png'/><br />" >> $output
+	echo "<img src='aa_histogram_IGG.png'/><br />" >> $output
+	echo "<img src='aa_histogram_IGM.png'/><br />" >> $output
+fi
+
+echo "<embed src='baseline.pdf' width='700px' height='1000px'>" >> $output
+echo "<embed src='baseline_IGA.pdf' width='700px' height='1000px'>" >> $output
+echo "<embed src='baseline_IGG.pdf' width='700px' height='1000px'>" >> $output
+echo "<embed src='baseline_IGM.pdf' width='700px' height='1000px'>" >> $output
+
+echo "</div>" >> $output #antigen selection tab end
+
+echo "<div class='tabbertab' title='CSR'>" >> $output #CSR tab
+
+if [ -a $outdir/IGA.png ] 
+then
+	echo "<img src='IGA.png'/><br />" >> $output
+fi
+if [ -a $outdir/IGG.png ]
+then
+	echo "<img src='IGG.png'/><br />" >> $output
+fi
+
+echo "</div>" >> $output #CSR tab end
+
+echo "---------------- change-o MakeDB ----------------"
+
+mkdir $outdir/change_o
+
+tmp="$PWD"
+
+cd $outdir/change_o
+
+bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt
+bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt
+
+Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1
+
+echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1"
+
+if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then
+	bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt
+	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt
+else
+	echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt"
+	echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt"
+fi
+
+if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then
+	bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt
+	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt
+else
+	echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt"
+	echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt"
+fi
+
+if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then
+	bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt
+	bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt
+else
+	echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt"
+	echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt"
+fi
+
+PWD="$tmp"
+
+echo "<div class='tabbertab' title='Clonality'>" >> $output #clonality tab
+
+function clonality_table {
+	local infile=$1
+	local outfile=$2
+	
+	echo "<table class='pure-table pure-table-striped'>" >> $outfile
+	echo "<thead><tr><th>Clone size</th><th>Nr of clones</th><th>Nr of sequences</th></tr></thead>" >> $outfile
+	
+	first='true'
+	
+	while read size clones seqs
+	do
+		if [[ "$first" == "true" ]]; then
+			first="false"
+			continue
+		fi
+		echo "<tr><td>$size</td><td>$clones</td><td>$seqs</td></tr>" >> $outfile
+	done < $infile
+	
+	echo "</table>" >> $outfile
+}
+echo "<div class='tabber'>" >> $output
+
+echo "<div class='tabbertab' title='All'>" >> $output
+clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output
+echo "</div>" >> $output
+
+echo "<div class='tabbertab' title='Ca'>" >> $output
+clonality_table $outdir/change_o/change-o-defined_clones-summary-IGA.txt $output
+echo "</div>" >> $output
+
+echo "<div class='tabbertab' title='Cg'>" >> $output
+clonality_table $outdir/change_o/change-o-defined_clones-summary-IGG.txt $output
+echo "</div>" >> $output
+
+echo "<div class='tabbertab' title='Cm'>" >> $output
+clonality_table $outdir/change_o/change-o-defined_clones-summary-IGM.txt $output
+echo "</div>" >> $output
+
+echo "<div class='tabbertab' title='Overview'>" >> $output
+cat "$outdir/sequence_overview/index.html" >> $output
+echo "</div>" >> $output
+
+
+echo "</div>" >> $output #clonality tabber end
+
+echo "</div>" >> $output #clonality tab end
+
+echo "<div class='tabbertab' title='Downloads'>" >> $output
+
+echo "<table class='pure-table pure-table-striped'>" >> $output
+echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output
+echo "<tr><td>The complete dataset</td><td><a href='merged.txt' download='merged.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The filtered dataset</td><td><a href='filtered.txt' download='filtered.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt' download='data_sum.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the first SHM Overview plot</td><td><a href='plot1.txt' download='plot1.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the second SHM Overview plot</td><td><a href='plot2.txt' download='plot2.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the third SHM Overview plot</td><td><a href='plot3.txt' download='plot3.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt' download='unmatched.txt' >Download</a></td></tr>" >> $output
+
+echo "<tr><td>The data  generate the frequency scatter plot</td><td><a href='scatter.txt' download='scatter.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data used to generate the frequency by class plot</td><td><a href='frequency_ranges_classes.txt' download='frequency_ranges_classes.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The data for frequency by subclass</td><td><a href='frequency_ranges_subclasses.txt' download='frequency_ranges_subclasses.txt' >Download</a></td></tr>" >> $output
+
+
+echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt' download='aa_id_mutations.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt' download='absent_aa_id.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output
+
+echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output
+
+echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf' download='baseline.pdf' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline data</td><td><a href='baseline.txt' download='baseline.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGA PDF</td><td><a href='baseline_IGA.pdf' download='baseline_IGA.pdf' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGA data</td><td><a href='baseline_IGA.txt' download='baseline_IGA.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGG PDF</td><td><a href='baseline_IGG.pdf' download='baseline_IGG.pdf' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGG data</td><td><a href='baseline_IGG.txt' download='baseline_IGG.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGM PDF</td><td><a href='baseline_IGM.pdf' download='baseline_IGM.pdf' >Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline IGM data</td><td><a href='baseline_IGM.txt' download='baseline_IGM.txt' >Download</a></td></tr>" >> $output
+
+echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz' download='new_IMGT.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGA sequences</td><td><a href='new_IMGT_IGA.txz' download='new_IMGT_IGA.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGA1 sequences</td><td><a href='new_IMGT_IGA1.txz' download='new_IMGT_IGA1.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGA2 sequences</td><td><a href='new_IMGT_IGA2.txz' download='new_IMGT_IGA2.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG sequences</td><td><a href='new_IMGT_IGG.txz' download='new_IMGT_IGG.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG1 sequences</td><td><a href='new_IMGT_IGG1.txz' download='new_IMGT_IGG1.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG2 sequences</td><td><a href='new_IMGT_IGG2.txz' download='new_IMGT_IGG2.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG3 sequences</td><td><a href='new_IMGT_IGG3.txz' download='new_IMGT_IGG3.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGG4 sequences</td><td><a href='new_IMGT_IGG4.txz' download='new_IMGT_IGG4.txz' >Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered IGM sequences</td><td><a href='new_IMGT_IGM.txz' download='new_IMGT_IGM.txz' >Download</a></td></tr>" >> $output
+
+echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB file with defined clones of IGA</td><td><a href='change_o/change-o-db-defined_clones-IGA.txt' download='change_o/change-o-db-defined_clones-IGA.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of IGA</td><td><a href='change_o/change-o-defined_clones-summary-IGA.txt' download='change_o/change-o-defined_clones-summary-IGA.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB file with defined clones of IGG</td><td><a href='change_o/change-o-db-defined_clones-IGG.txt' download='change_o/change-o-db-defined_clones-IGG.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of IGG</td><td><a href='change_o/change-o-defined_clones-summary-IGG.txt' download='change_o/change-o-defined_clones-summary-IGG.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB file with defined clones of IGM</td><td><a href='change_o/change-o-db-defined_clones-IGM.txt' download='change_o/change-o-db-defined_clones-IGM.txt' >Download</a></td></tr>" >> $output
+echo "<tr><td>The Change-O DB defined clones summary file of IGM</td><td><a href='change_o/change-o-defined_clones-summary-IGM.txt' download='change_o/change-o-defined_clones-summary-IGM.txt' >Download</a></td></tr>" >> $output
+
+echo "</table>" >> $output
+
+echo "</div>" >> $output #downloads tab end
+
+echo "</div>" >> $output #tabs end 
+
+echo "</html>" >> $output
+
+echo "---------------- baseline ----------------"
+echo "---------------- baseline ----------------<br />" >> $log
+tmp="$PWD"
+
+mkdir $outdir/baseline
+
+
+mkdir $outdir/baseline/IGA_IGG_IGM
+if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
+	cd $outdir/baseline/IGA_IGG_IGM
+	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "IGA_IGG_IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"	
+else
+	echo "No sequences" > "$outdir/baseline.txt"
+fi
+
+mkdir $outdir/baseline/IGA
+if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then
+	cd $outdir/baseline/IGA
+	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt"
+else
+	echo "No IGA sequences" > "$outdir/baseline_IGA.txt"
+fi
+
+mkdir $outdir/baseline/IGG
+if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then
+	cd $outdir/baseline/IGG
+	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGG.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt"
+else
+	echo "No IGG sequences" > "$outdir/baseline_IGG.txt"
+fi
+
+mkdir $outdir/baseline/IGM
+if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then
+	cd $outdir/baseline/IGM
+	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt"
+else
+	echo "No IGM sequences" > "$outdir/baseline_IGM.txt"
+fi
+
+cd $tmp
+
+echo "---------------- naive_output.r ----------------"
+echo "---------------- naive_output.r ----------------<br />" >> $log
+
+if [[ "$naive_output" != "None" ]]
+then
+	cp $outdir/new_IMGT_IGA.txz ${naive_output_ca}
+	cp $outdir/new_IMGT_IGG.txz ${naive_output_cg}
+	cp $outdir/new_IMGT_IGM.txz ${naive_output_cm}
+fi
+
+echo "</table>" >> $outdir/base_overview.html
+
+mv $log $outdir/log.html
+
+echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
+echo "<table border = 1>" >> $log
+echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
+tIFS="$TMP"
+IFS=$'\t'
+while read step seq perc
+	do
+		echo "<tr>" >> $log
+		echo "<td>$step</td>" >> $log
+		echo "<td>$seq</td>" >> $log
+		echo "<td>${perc}%</td>" >> $log
+		echo "</tr>" >> $log
+done < $outdir/filtering_steps.txt
+echo "</table border></center></html>" >> $log
+
+IFS="$tIFS"
+
+
+echo "---------------- Done! ----------------"
+echo "---------------- Done! ----------------<br />" >> $outdir/log.html
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+