Mercurial > repos > bitlab > chromeister
view chromeister/bin/generate-one-score.sh @ 0:4616cc3476d4 draft
Uploaded
author | bitlab |
---|---|
date | Sat, 15 Dec 2018 18:06:48 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env bash CSV=$1 TH=$2 if [ $# -ne 2 ]; then echo " ==== ERROR ... you called this script inappropriately." echo "" echo " usage: $0 <index.csv> <threshold>" echo "" exit -1 fi BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" # get first genome in list (they are sorted) currgenome=$(tail -n +2 "$CSV" | head -1 | awk -F "," '{print $6}') # fill array of chromosomes similarity array=() arraytosort=() names=() homologies=() condition=0 othergencounter=0 # for problems with chromo X and Y highest=1 # For all lines cat $CSV | tail -n +2 > $1.temp while IFS= read -r i do othergenome=$(echo "$i" | awk -F "," '{print $6}') if [ "$condition" -eq 0 ]; then currgenome=$othergenome condition=1 fi if [ "$othergenome" != "$currgenome" ]; then # Sort the array with temporal values #printf '%s\n' "${arraytosort[@]}" #echo "name is $currgenome" sorted=($(printf '%s\n' "${arraytosort[@]}"|sort)) #echo "For chroomo $currgenome we have " #echo $(printf '%s,' "${sorted[@]}") # accumulate sum until threshold is reached usedValues=1 usedValuesNext=2 first=${sorted[0]} next=${sorted[${usedValues}]} nextofnext=${sorted[${usedValuesNext}]} finalvalue=$first divisor=0 currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }') TH=$(printf '%4.6f' $TH) #echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')" condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ') #echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue" while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ]; do usedValues=`expr $usedValues + 1` usedValuesNext=`expr $usedValuesNext + 1` finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}') next=${sorted[${usedValues}]} nextofnext=${sorted[${usedValuesNext}]} currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }') condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ') divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}') done #echo "so this is what we got $finalvalue, when divided using $divisor" # array holds the results #array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}') finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}') array[$highest]=$finalvalue homologies[$highest]=$usedValues highest=`expr $highest + 1` condition=0 names+=($currgenome) othergencounter=0 unset arraytosort getvalue=$(echo "$i" | awk -F "," '{print $8}') # Copy value to array arraytosort[$othergencounter]=$getvalue #currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}') othergencounter=`expr $othergencounter + 1` else getvalue=$(echo "$i" | awk -F "," '{print $8}') # Copy value to array arraytosort[$othergencounter]=$getvalue #currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}') othergencounter=`expr $othergencounter + 1` fi done < "$1.temp" # do the last!!! #if [ "$lastprint" == "$currgenome" ]; then # Sort the array with temporal values sorted=($(printf '%s\n' "${arraytosort[@]}"|sort)) usedValues=1 usedValuesNext=2 first=${sorted[0]} next=${sorted[${usedValues}]} nextofnext=${sorted[${usedValuesNext}]} finalvalue=$first divisor=0 currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }') TH=$(printf '%4.6f' $TH) #echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')" condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ') #echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue" while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ]; do usedValues=`expr $usedValues + 1` usedValuesNext=`expr $usedValuesNext + 1` finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}') next=${sorted[${usedValues}]} nextofnext=${sorted[${usedValuesNext}]} currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }') condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ') divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}') done #echo "so this is what we got $finalvalue, when divided using $divisor" # array holds the results #array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}') finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}') array[$highest]=$finalvalue homologies[$highest]=$usedValues highest=`expr $highest + 1` currsum=0 names+=($currgenome) currgenome=$othergenome othergencounter=0 #fi highest=`expr $highest - 1` rm $1.temp tsum=0 echo "deleteme" > $1.inter rm $1.inter aux=0 for ((i = 1; i <= highest; i++)); do echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}" >> $1.inter #echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}" aux=`expr $aux + 1` #val=${array[${i}]} #tsum=$(awk -v a="$tsum" -v b="$val" '{print a=a+b}') done #awk -v a="$tsum" b="$highest" '{print a/b}' #sumfirst=$(awk -F "," 'BEGIN{suma=0}{suma = suma + $8}END{print suma}' "$CSV") #echo "$sumfirst"