4
|
1 #!/bin/bash
|
|
2 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
3
|
|
4 testID=$1
|
|
5 species=$2
|
|
6 substitutionModel=$3
|
|
7 mutabilityModel=$4
|
|
8 clonal=$5
|
|
9 fixIndels=$6
|
|
10 region=$7
|
|
11 inputs=$8
|
|
12 inputs=($inputs)
|
|
13 IDs=$9
|
|
14 IDs=($IDs)
|
|
15 ref=${10}
|
|
16 output=${11}
|
|
17 selection=${12}
|
|
18 output_table=${13}
|
|
19 outID="result"
|
|
20
|
|
21 echo "$PWD"
|
|
22
|
|
23 echo "testID = $testID"
|
|
24 echo "species = $species"
|
|
25 echo "substitutionModel = $substitutionModel"
|
|
26 echo "mutabilityModel = $mutabilityModel"
|
|
27 echo "clonal = $clonal"
|
|
28 echo "fixIndels = $fixIndels"
|
|
29 echo "region = $region"
|
|
30 echo "inputs = ${inputs[@]}"
|
|
31 echo "IDs = ${IDs[@]}"
|
|
32 echo "ref = $ref"
|
|
33 echo "output = $output"
|
|
34 echo "outID = $outID"
|
|
35
|
|
36 fasta="$PWD/baseline.fasta"
|
|
37
|
|
38
|
|
39 count=0
|
|
40 for current in ${inputs[@]}
|
|
41 do
|
|
42 f=$(file $current)
|
|
43 zipType="Zip archive"
|
|
44 if [[ "$f" == *"$zipType"* ]] || [[ "$f" == *"XZ compressed data"* ]]
|
|
45 then
|
|
46 id=${IDs[$count]}
|
|
47 echo "id=$id"
|
|
48 if [[ "$f" == *"Zip archive"* ]] ; then
|
|
49 echo "Zip archive"
|
|
50 echo "unzip $input -d $PWD/files/"
|
|
51 unzip $current -d "$PWD/$id/"
|
|
52 elif [[ "$f" == *"XZ compressed data"* ]] ; then
|
|
53 echo "ZX archive"
|
|
54 echo "tar -xJf $input -C $PWD/files/"
|
|
55 mkdir -p "$PWD/$id/files"
|
|
56 tar -xJf $current -C "$PWD/$id/files/"
|
|
57 fi
|
|
58 summaryfile="$PWD/summary_${id}.txt"
|
|
59 gappedfile="$PWD/gappednt_${id}.txt"
|
|
60 filtered="$PWD/filtered_${id}.txt"
|
|
61 filecount=`ls -l $PWD/$id/ | wc -l`
|
|
62 if [[ "$filecount" -eq "2" ]]
|
|
63 then
|
|
64 cat $PWD/$id/*/1_* > $summaryfile
|
|
65 cat $PWD/$id/*/2_* > $gappedfile
|
|
66 else
|
|
67 cat $PWD/$id/1_* > $summaryfile
|
|
68 cat $PWD/$id/2_* > $gappedfile
|
|
69 fi
|
|
70 Rscript $dir/filter.r $summaryfile $gappedfile "$selection" $filtered 2>&1
|
|
71
|
|
72 final="$PWD/final_${id}.txt"
|
|
73 cat $filtered | cut -f2,4,7 > $final
|
|
74 python $dir/script_imgt.py --input $final --ref $ref --output $fasta --id $id
|
|
75 else
|
|
76 python $dir/script_xlsx.py --input $current --ref $ref --output $fasta
|
|
77 fi
|
|
78 count=$((count+1))
|
|
79 done
|
|
80
|
|
81 if [[ $(wc -l < $fasta) -eq "1" ]]; then
|
|
82 echo "No sequences in the fasta file, exiting"
|
|
83 exit 0
|
|
84 fi
|
|
85
|
|
86 workdir="$PWD"
|
|
87 cd $dir
|
|
88 echo "file: ${inputs[0]}"
|
|
89 #Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region ${inputs[0]} $workdir/ $outID 2>&1
|
|
90 Rscript --verbose $dir/Baseline_Main.r $testID $species $substitutionModel $mutabilityModel $clonal $fixIndels $region $fasta $workdir/ $outID 2>&1
|
|
91
|
|
92 echo "$workdir/${outID}.txt"
|
|
93
|
|
94 rows=`tail -n +2 $workdir/${outID}.txt | grep -v "All sequences combined" | grep -n 'Group' | grep -Eoh '^[0-9]+' | tr '\n' ' '`
|
|
95 rows=($rows)
|
|
96 #unset rows[${#rows[@]}-1]
|
|
97
|
|
98 cd $dir
|
|
99 Rscript --verbose $dir/comparePDFs.r $workdir/${outID}.RData $output ${rows[@]} 2>&1
|
|
100 cp $workdir/result.txt ${output_table}
|
|
101
|
|
102
|
|
103
|
|
104
|