Mercurial > repos > alvarofaure > bitlab
comparison chromeister/bin/generate-one-score.sh @ 1:3d1fbde7e0cc draft default tip
Deleted selected files
author | alvarofaure |
---|---|
date | Thu, 13 Dec 2018 03:41:58 -0500 |
parents | 7fdf47a0bae8 |
children |
comparison
equal
deleted
inserted
replaced
0:7fdf47a0bae8 | 1:3d1fbde7e0cc |
---|---|
1 #!/usr/bin/env bash | |
2 CSV=$1 | |
3 TH=$2 | |
4 | |
5 if [ $# -ne 2 ]; then | |
6 echo " ==== ERROR ... you called this script inappropriately." | |
7 echo "" | |
8 echo " usage: $0 <index.csv> <threshold>" | |
9 echo "" | |
10 exit -1 | |
11 fi | |
12 | |
13 | |
14 BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" | |
15 | |
16 # get first genome in list (they are sorted) | |
17 currgenome=$(tail -n +2 "$CSV" | head -1 | awk -F "," '{print $6}') | |
18 | |
19 # fill array of chromosomes similarity | |
20 | |
21 array=() | |
22 arraytosort=() | |
23 names=() | |
24 homologies=() | |
25 condition=0 | |
26 othergencounter=0 | |
27 # for problems with chromo X and Y | |
28 highest=1 | |
29 # For all lines | |
30 | |
31 cat $CSV | tail -n +2 > $1.temp | |
32 | |
33 while IFS= read -r i | |
34 do | |
35 | |
36 othergenome=$(echo "$i" | awk -F "," '{print $6}') | |
37 if [ "$condition" -eq 0 ]; then | |
38 currgenome=$othergenome | |
39 condition=1 | |
40 fi | |
41 | |
42 if [ "$othergenome" != "$currgenome" ]; then | |
43 | |
44 # Sort the array with temporal values | |
45 #printf '%s\n' "${arraytosort[@]}" | |
46 #echo "name is $currgenome" | |
47 | |
48 sorted=($(printf '%s\n' "${arraytosort[@]}"|sort)) | |
49 | |
50 #echo "For chroomo $currgenome we have " | |
51 #echo $(printf '%s,' "${sorted[@]}") | |
52 # accumulate sum until threshold is reached | |
53 usedValues=1 | |
54 usedValuesNext=2 | |
55 first=${sorted[0]} | |
56 next=${sorted[${usedValues}]} | |
57 nextofnext=${sorted[${usedValuesNext}]} | |
58 finalvalue=$first | |
59 divisor=0 | |
60 currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }') | |
61 TH=$(printf '%4.6f' $TH) | |
62 #echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')" | |
63 condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ') | |
64 #echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue" | |
65 while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ]; | |
66 do | |
67 usedValues=`expr $usedValues + 1` | |
68 usedValuesNext=`expr $usedValuesNext + 1` | |
69 finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}') | |
70 next=${sorted[${usedValues}]} | |
71 nextofnext=${sorted[${usedValuesNext}]} | |
72 | |
73 currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }') | |
74 condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ') | |
75 divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}') | |
76 | |
77 done | |
78 | |
79 #echo "so this is what we got $finalvalue, when divided using $divisor" | |
80 | |
81 # array holds the results | |
82 #array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}') | |
83 finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}') | |
84 array[$highest]=$finalvalue | |
85 homologies[$highest]=$usedValues | |
86 | |
87 highest=`expr $highest + 1` | |
88 condition=0 | |
89 names+=($currgenome) | |
90 othergencounter=0 | |
91 unset arraytosort | |
92 | |
93 | |
94 | |
95 | |
96 getvalue=$(echo "$i" | awk -F "," '{print $8}') | |
97 # Copy value to array | |
98 arraytosort[$othergencounter]=$getvalue | |
99 #currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}') | |
100 othergencounter=`expr $othergencounter + 1` | |
101 else | |
102 getvalue=$(echo "$i" | awk -F "," '{print $8}') | |
103 # Copy value to array | |
104 arraytosort[$othergencounter]=$getvalue | |
105 #currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}') | |
106 othergencounter=`expr $othergencounter + 1` | |
107 | |
108 fi | |
109 | |
110 done < "$1.temp" | |
111 | |
112 # do the last!!! | |
113 #if [ "$lastprint" == "$currgenome" ]; then | |
114 # Sort the array with temporal values | |
115 sorted=($(printf '%s\n' "${arraytosort[@]}"|sort)) | |
116 | |
117 usedValues=1 | |
118 usedValuesNext=2 | |
119 first=${sorted[0]} | |
120 next=${sorted[${usedValues}]} | |
121 nextofnext=${sorted[${usedValuesNext}]} | |
122 finalvalue=$first | |
123 divisor=0 | |
124 currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }') | |
125 TH=$(printf '%4.6f' $TH) | |
126 #echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')" | |
127 condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ') | |
128 #echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue" | |
129 while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ]; | |
130 do | |
131 usedValues=`expr $usedValues + 1` | |
132 usedValuesNext=`expr $usedValuesNext + 1` | |
133 finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}') | |
134 next=${sorted[${usedValues}]} | |
135 nextofnext=${sorted[${usedValuesNext}]} | |
136 | |
137 currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }') | |
138 condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ') | |
139 divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}') | |
140 | |
141 done | |
142 | |
143 #echo "so this is what we got $finalvalue, when divided using $divisor" | |
144 | |
145 # array holds the results | |
146 #array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}') | |
147 finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}') | |
148 array[$highest]=$finalvalue | |
149 homologies[$highest]=$usedValues | |
150 | |
151 | |
152 highest=`expr $highest + 1` | |
153 currsum=0 | |
154 names+=($currgenome) | |
155 currgenome=$othergenome | |
156 othergencounter=0 | |
157 #fi | |
158 | |
159 | |
160 highest=`expr $highest - 1` | |
161 rm $1.temp | |
162 | |
163 tsum=0 | |
164 echo "deleteme" > $1.inter | |
165 rm $1.inter | |
166 aux=0 | |
167 for ((i = 1; i <= highest; i++)); do | |
168 echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}" >> $1.inter | |
169 #echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}" | |
170 aux=`expr $aux + 1` | |
171 #val=${array[${i}]} | |
172 #tsum=$(awk -v a="$tsum" -v b="$val" '{print a=a+b}') | |
173 done | |
174 | |
175 #awk -v a="$tsum" b="$highest" '{print a/b}' | |
176 | |
177 #sumfirst=$(awk -F "," 'BEGIN{suma=0}{suma = suma + $8}END{print suma}' "$CSV") | |
178 #echo "$sumfirst" | |
179 | |
180 | |
181 |