Mercurial > repos > davidvanzessen > shm_csr
diff baseline/wrapper.sh @ 0:c33d93683a09 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 13 Oct 2016 10:52:24 -0400 |
parents | |
children | 8728284105ee |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/baseline/wrapper.sh Thu Oct 13 10:52:24 2016 -0400 @@ -0,0 +1,104 @@ +#!/bin/bash +dir="$(cd "$(dirname "$0")" && pwd)" + +testID=$1 +species=$2 +substitutionModel=$3 +mutabilityModel=$4 +clonal=$5 +fixIndels=$6 +region=$7 +inputs=$8 +inputs=($inputs) +IDs=$9 +IDs=($IDs) +ref=${10} +output=${11} +selection=${12} +output_table=${13} +outID="result" + +echo "$PWD" + +echo "testID = $testID" +echo "species = $species" +echo "substitutionModel = $substitutionModel" +echo "mutabilityModel = $mutabilityModel" +echo "clonal = $clonal" +echo "fixIndels = $fixIndels" +echo "region = $region" +echo "inputs = ${inputs[@]}" +echo "IDs = ${IDs[@]}" +echo "ref = $ref" +echo "output = $output" +echo "outID = $outID" + +fasta="$PWD/baseline.fasta" + + +count=0 +for current in ${inputs[@]} +do + f=$(file $current) + zipType="Zip archive" + if [[ "$f" == *"$zipType"* ]] || [[ "$f" == *"XZ compressed data"* ]] + then + id=${IDs[$count]} + echo "id=$id" + if [[ "$f" == *"Zip archive"* ]] ; then + echo "Zip archive" + echo "unzip $input -d $PWD/files/" + unzip $current -d "$PWD/$id/" + elif [[ "$f" == *"XZ compressed data"* ]] ; then + echo "ZX archive" + echo "tar -xJf $input -C $PWD/files/" + mkdir -p "$PWD/$id/files" + tar -xJf $current -C "$PWD/$id/files/" + fi + summaryfile="$PWD/summary_${id}.txt" + gappedfile="$PWD/gappednt_${id}.txt" + filtered="$PWD/filtered_${id}.txt" + filecount=`ls -l $PWD/$id/ | wc -l` + if [[ "$filecount" -eq "2" ]] + then + cat $PWD/$id/*/1_* > $summaryfile + cat $PWD/$id/*/2_* > $gappedfile + else + cat $PWD/$id/1_* > $summaryfile + cat $PWD/$id/2_* > $gappedfile + fi + Rscript $dir/filter.r $summaryfile $gappedfile "$selection" $filtered 2>&1 + + final="$PWD/final_${id}.txt" + cat $filtered | cut -f2,4,7 > $final + python $dir/script_imgt.py --input $final --ref $ref --output $fasta --id $id + else + python $dir/script_xlsx.py --input $current --ref $ref --output $fasta + fi + count=$((count+1)) +done + +if [[ $(wc -l < $fasta) -eq "1" ]]; then + echo "No sequences in the fasta file, exiting" + exit 0 +fi + +workdir="$PWD" +cd $dir +echo "file: ${inputs[0]}" +#Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region ${inputs[0]} $workdir/ $outID 2>&1 +Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region $fasta $workdir/ $outID 2>&1 + +echo "$workdir/${outID}.txt" + +rows=`tail -n +2 $workdir/${outID}.txt | grep -v "All sequences combined" | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '` +rows=($rows) +#unset rows[${#rows[@]}-1] + +cd $dir +Rscript --verbose $dir/comparePDFs.r $workdir/${outID}.RData $output ${rows[@]} 2>&1 +cp $workdir/result.txt ${output_table} + + + +