comparison chromeister/bin/generate-one-score.sh @ 0:ee6b15b409e5 draft

Uploaded
author bitlab
date Thu, 13 Dec 2018 06:27:57 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ee6b15b409e5
1 #!/usr/bin/env bash
2 CSV=$1
3 TH=$2
4
5 if [ $# -ne 2 ]; then
6 echo " ==== ERROR ... you called this script inappropriately."
7 echo ""
8 echo " usage: $0 <index.csv> <threshold>"
9 echo ""
10 exit -1
11 fi
12
13
14 BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
15
16 # get first genome in list (they are sorted)
17 currgenome=$(tail -n +2 "$CSV" | head -1 | awk -F "," '{print $6}')
18
19 # fill array of chromosomes similarity
20
21 array=()
22 arraytosort=()
23 names=()
24 homologies=()
25 condition=0
26 othergencounter=0
27 # for problems with chromo X and Y
28 highest=1
29 # For all lines
30
31 cat $CSV | tail -n +2 > $1.temp
32
33 while IFS= read -r i
34 do
35
36 othergenome=$(echo "$i" | awk -F "," '{print $6}')
37 if [ "$condition" -eq 0 ]; then
38 currgenome=$othergenome
39 condition=1
40 fi
41
42 if [ "$othergenome" != "$currgenome" ]; then
43
44 # Sort the array with temporal values
45 #printf '%s\n' "${arraytosort[@]}"
46 #echo "name is $currgenome"
47
48 sorted=($(printf '%s\n' "${arraytosort[@]}"|sort))
49
50 #echo "For chroomo $currgenome we have "
51 #echo $(printf '%s,' "${sorted[@]}")
52 # accumulate sum until threshold is reached
53 usedValues=1
54 usedValuesNext=2
55 first=${sorted[0]}
56 next=${sorted[${usedValues}]}
57 nextofnext=${sorted[${usedValuesNext}]}
58 finalvalue=$first
59 divisor=0
60 currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }')
61 TH=$(printf '%4.6f' $TH)
62 #echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')"
63 condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ')
64 #echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue"
65 while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ];
66 do
67 usedValues=`expr $usedValues + 1`
68 usedValuesNext=`expr $usedValuesNext + 1`
69 finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}')
70 next=${sorted[${usedValues}]}
71 nextofnext=${sorted[${usedValuesNext}]}
72
73 currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }')
74 condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ')
75 divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}')
76
77 done
78
79 #echo "so this is what we got $finalvalue, when divided using $divisor"
80
81 # array holds the results
82 #array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}')
83 finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}')
84 array[$highest]=$finalvalue
85 homologies[$highest]=$usedValues
86
87 highest=`expr $highest + 1`
88 condition=0
89 names+=($currgenome)
90 othergencounter=0
91 unset arraytosort
92
93
94
95
96 getvalue=$(echo "$i" | awk -F "," '{print $8}')
97 # Copy value to array
98 arraytosort[$othergencounter]=$getvalue
99 #currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}')
100 othergencounter=`expr $othergencounter + 1`
101 else
102 getvalue=$(echo "$i" | awk -F "," '{print $8}')
103 # Copy value to array
104 arraytosort[$othergencounter]=$getvalue
105 #currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}')
106 othergencounter=`expr $othergencounter + 1`
107
108 fi
109
110 done < "$1.temp"
111
112 # do the last!!!
113 #if [ "$lastprint" == "$currgenome" ]; then
114 # Sort the array with temporal values
115 sorted=($(printf '%s\n' "${arraytosort[@]}"|sort))
116
117 usedValues=1
118 usedValuesNext=2
119 first=${sorted[0]}
120 next=${sorted[${usedValues}]}
121 nextofnext=${sorted[${usedValuesNext}]}
122 finalvalue=$first
123 divisor=0
124 currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }')
125 TH=$(printf '%4.6f' $TH)
126 #echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')"
127 condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ')
128 #echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue"
129 while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ];
130 do
131 usedValues=`expr $usedValues + 1`
132 usedValuesNext=`expr $usedValuesNext + 1`
133 finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}')
134 next=${sorted[${usedValues}]}
135 nextofnext=${sorted[${usedValuesNext}]}
136
137 currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }')
138 condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ')
139 divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}')
140
141 done
142
143 #echo "so this is what we got $finalvalue, when divided using $divisor"
144
145 # array holds the results
146 #array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}')
147 finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}')
148 array[$highest]=$finalvalue
149 homologies[$highest]=$usedValues
150
151
152 highest=`expr $highest + 1`
153 currsum=0
154 names+=($currgenome)
155 currgenome=$othergenome
156 othergencounter=0
157 #fi
158
159
160 highest=`expr $highest - 1`
161 rm $1.temp
162
163 tsum=0
164 echo "deleteme" > $1.inter
165 rm $1.inter
166 aux=0
167 for ((i = 1; i <= highest; i++)); do
168 echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}" >> $1.inter
169 #echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}"
170 aux=`expr $aux + 1`
171 #val=${array[${i}]}
172 #tsum=$(awk -v a="$tsum" -v b="$val" '{print a=a+b}')
173 done
174
175 #awk -v a="$tsum" b="$highest" '{print a/b}'
176
177 #sumfirst=$(awk -F "," 'BEGIN{suma=0}{suma = suma + $8}END{print suma}' "$CSV")
178 #echo "$sumfirst"
179
180
181