annotate chromeister/bin/generate-one-score.sh @ 0:7fdf47a0bae8 draft

Uploaded
author alvarofaure
date Wed, 12 Dec 2018 07:18:40 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
1 #!/usr/bin/env bash
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
2 CSV=$1
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
3 TH=$2
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
4
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
5 if [ $# -ne 2 ]; then
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
6 echo " ==== ERROR ... you called this script inappropriately."
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
7 echo ""
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
8 echo " usage: $0 <index.csv> <threshold>"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
9 echo ""
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
10 exit -1
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
11 fi
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
12
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
13
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
14 BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
15
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
16 # get first genome in list (they are sorted)
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
17 currgenome=$(tail -n +2 "$CSV" | head -1 | awk -F "," '{print $6}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
18
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
19 # fill array of chromosomes similarity
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
20
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
21 array=()
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
22 arraytosort=()
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
23 names=()
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
24 homologies=()
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
25 condition=0
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
26 othergencounter=0
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
27 # for problems with chromo X and Y
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
28 highest=1
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
29 # For all lines
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
30
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
31 cat $CSV | tail -n +2 > $1.temp
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
32
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
33 while IFS= read -r i
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
34 do
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
35
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
36 othergenome=$(echo "$i" | awk -F "," '{print $6}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
37 if [ "$condition" -eq 0 ]; then
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
38 currgenome=$othergenome
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
39 condition=1
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
40 fi
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
41
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
42 if [ "$othergenome" != "$currgenome" ]; then
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
43
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
44 # Sort the array with temporal values
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
45 #printf '%s\n' "${arraytosort[@]}"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
46 #echo "name is $currgenome"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
47
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
48 sorted=($(printf '%s\n' "${arraytosort[@]}"|sort))
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
49
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
50 #echo "For chroomo $currgenome we have "
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
51 #echo $(printf '%s,' "${sorted[@]}")
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
52 # accumulate sum until threshold is reached
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
53 usedValues=1
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
54 usedValuesNext=2
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
55 first=${sorted[0]}
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
56 next=${sorted[${usedValues}]}
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
57 nextofnext=${sorted[${usedValuesNext}]}
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
58 finalvalue=$first
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
59 divisor=0
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
60 currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
61 TH=$(printf '%4.6f' $TH)
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
62 #echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
63 condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
64 #echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
65 while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
66 do
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
67 usedValues=`expr $usedValues + 1`
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
68 usedValuesNext=`expr $usedValuesNext + 1`
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
69 finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
70 next=${sorted[${usedValues}]}
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
71 nextofnext=${sorted[${usedValuesNext}]}
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
72
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
73 currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
74 condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
75 divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
76
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
77 done
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
78
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
79 #echo "so this is what we got $finalvalue, when divided using $divisor"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
80
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
81 # array holds the results
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
82 #array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
83 finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
84 array[$highest]=$finalvalue
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
85 homologies[$highest]=$usedValues
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
86
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
87 highest=`expr $highest + 1`
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
88 condition=0
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
89 names+=($currgenome)
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
90 othergencounter=0
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
91 unset arraytosort
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
92
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
93
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
94
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
95
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
96 getvalue=$(echo "$i" | awk -F "," '{print $8}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
97 # Copy value to array
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
98 arraytosort[$othergencounter]=$getvalue
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
99 #currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
100 othergencounter=`expr $othergencounter + 1`
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
101 else
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
102 getvalue=$(echo "$i" | awk -F "," '{print $8}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
103 # Copy value to array
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
104 arraytosort[$othergencounter]=$getvalue
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
105 #currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
106 othergencounter=`expr $othergencounter + 1`
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
107
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
108 fi
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
109
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
110 done < "$1.temp"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
111
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
112 # do the last!!!
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
113 #if [ "$lastprint" == "$currgenome" ]; then
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
114 # Sort the array with temporal values
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
115 sorted=($(printf '%s\n' "${arraytosort[@]}"|sort))
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
116
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
117 usedValues=1
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
118 usedValuesNext=2
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
119 first=${sorted[0]}
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
120 next=${sorted[${usedValues}]}
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
121 nextofnext=${sorted[${usedValuesNext}]}
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
122 finalvalue=$first
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
123 divisor=0
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
124 currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
125 TH=$(printf '%4.6f' $TH)
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
126 #echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
127 condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
128 #echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
129 while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ];
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
130 do
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
131 usedValues=`expr $usedValues + 1`
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
132 usedValuesNext=`expr $usedValuesNext + 1`
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
133 finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
134 next=${sorted[${usedValues}]}
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
135 nextofnext=${sorted[${usedValuesNext}]}
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
136
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
137 currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
138 condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
139 divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
140
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
141 done
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
142
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
143 #echo "so this is what we got $finalvalue, when divided using $divisor"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
144
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
145 # array holds the results
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
146 #array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
147 finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
148 array[$highest]=$finalvalue
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
149 homologies[$highest]=$usedValues
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
150
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
151
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
152 highest=`expr $highest + 1`
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
153 currsum=0
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
154 names+=($currgenome)
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
155 currgenome=$othergenome
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
156 othergencounter=0
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
157 #fi
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
158
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
159
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
160 highest=`expr $highest - 1`
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
161 rm $1.temp
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
162
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
163 tsum=0
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
164 echo "deleteme" > $1.inter
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
165 rm $1.inter
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
166 aux=0
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
167 for ((i = 1; i <= highest; i++)); do
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
168 echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}" >> $1.inter
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
169 #echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
170 aux=`expr $aux + 1`
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
171 #val=${array[${i}]}
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
172 #tsum=$(awk -v a="$tsum" -v b="$val" '{print a=a+b}')
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
173 done
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
174
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
175 #awk -v a="$tsum" b="$highest" '{print a/b}'
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
176
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
177 #sumfirst=$(awk -F "," 'BEGIN{suma=0}{suma = suma + $8}END{print suma}' "$CSV")
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
178 #echo "$sumfirst"
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
179
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
180
7fdf47a0bae8 Uploaded
alvarofaure
parents:
diff changeset
181