0
|
1 #!/usr/bin/env bash
|
|
2 DIR=$1
|
|
3 FASTAS1=$3
|
|
4 FASTAS2=$4
|
|
5 OUT=$2
|
|
6
|
|
7 echo "Computing index CSV..." > index.csv.temp
|
|
8
|
|
9 while [ "$(find . -size 0 | wc -l)" -ne 0 ]; do
|
|
10 sleep 10s
|
|
11 done
|
|
12
|
|
13
|
|
14 EXT="mat"
|
|
15 EXTSCORE="scr.txt"
|
|
16 EXTGENERAL=".fasta"
|
|
17
|
|
18 # data is like
|
|
19 # HOMSA.Chr.10.fasta
|
|
20
|
|
21
|
|
22
|
|
23 if [ $# != 4 ]; then
|
|
24 echo "***ERROR*** Use: $0 <directory> <out> <fastas_directory_1> <fastas_directory_2>"
|
|
25 exit -1
|
|
26 fi
|
|
27
|
|
28
|
|
29 rm $OUT
|
|
30
|
|
31 for elem in $(ls -d $DIR/*.$EXT | awk -F "/" '{print $NF}' | awk -F ".$EXT" '{print $1}')
|
|
32 do
|
|
33
|
|
34 IFS='-', read -a splits <<< "$elem" # yields MUSMU.Chr.8.fasta and MUSMU.Chr.Y.fasta
|
|
35 IFS='.', read -a getnum <<< "$elem" # yields MUSMU Chr 8 fasta and MUSMU Chr Y fasta
|
|
36
|
|
37 scorepath=$(basename $elem .mat).$EXTSCORE
|
|
38
|
|
39 sed -i "/X.*/d" $DIR/$scorepath
|
|
40 sed -i "s/\[1\]//g" $DIR/$scorepath
|
|
41
|
|
42 score=$( head -1 $DIR/$scorepath)
|
|
43 len1=$( head -1 $DIR/$elem.mat.events.txt | awk -F="," '{print $1}')
|
|
44 len2=$( head -1 $DIR/$elem.mat.events.txt | awk -F="," '{print $2}')
|
|
45
|
|
46
|
|
47
|
|
48 file1=${splits[0]}
|
|
49 file2=$(basename ${splits[1]} .mat)
|
|
50
|
|
51 ID1=$(head -1 $FASTAS1/$file1)
|
|
52 ID2=$(head -1 $FASTAS2/$file2)
|
|
53
|
|
54
|
|
55 #scorepath="$(basename $elem .mat).$EXTSCORE"
|
|
56 #score="$(head -1 $DIR/$scorepath)"
|
|
57
|
|
58 counter=0
|
|
59 numX=${getnum[2]}
|
|
60 numY=${getnum[5]}
|
|
61
|
|
62
|
|
63
|
|
64
|
|
65 echo "$(basename ${splits[0]} $EXTGENERAL),$(basename ${splits[1]} ${EXTGENERAL}),$ID1,$ID2,$elem.$EXT.filt.png,$numX,$numY,$score,$len1 $len2" >> $OUT
|
|
66
|
|
67 done
|
|
68
|
|
69 sort -k5,5n -k6,6n -o $OUT $OUT
|
|
70
|
|
71
|
|
72 sed -i '1iSpX, SpY, IDX, IDY, IMG, CHNumberX, CHNumberY, Score, LengthX, LengthY' $OUT
|
|
73
|
|
74 rm index.csv.temp
|
|
75
|