diff chromeister/bin/generate-one-score.sh @ 1:3d1fbde7e0cc draft default tip

Deleted selected files
author alvarofaure
date Thu, 13 Dec 2018 03:41:58 -0500
parents 7fdf47a0bae8
children
line wrap: on
line diff
--- a/chromeister/bin/generate-one-score.sh	Wed Dec 12 07:18:40 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,181 +0,0 @@
-#!/usr/bin/env bash
-CSV=$1
-TH=$2
-
-if [ $# -ne 2 ]; then
-   echo " ==== ERROR ... you called this script inappropriately."
-   echo ""
-   echo "   usage:  $0 <index.csv> <threshold>"
-   echo ""
-   exit -1
-fi
-
-
-BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-# get first genome in list (they are sorted)
-currgenome=$(tail -n +2 "$CSV" | head -1 | awk -F "," '{print $6}')
-
-# fill array of chromosomes similarity
-
-array=()
-arraytosort=()
-names=()
-homologies=()
-condition=0
-othergencounter=0
-# for problems with chromo X and Y
-highest=1 
-# For all lines
-
-cat $CSV | tail -n +2 > $1.temp
-
-while IFS= read -r i
-do
-
-	othergenome=$(echo "$i" | awk -F "," '{print $6}')
-	if [ "$condition" -eq 0 ]; then
-		currgenome=$othergenome
-		condition=1
-	fi
-	
-	if [ "$othergenome" != "$currgenome" ]; then
-	
-		# Sort the array with temporal values
-		#printf '%s\n' "${arraytosort[@]}"
-		#echo "name is $currgenome"
-		
-		sorted=($(printf '%s\n' "${arraytosort[@]}"|sort))
-		
-		#echo "For chroomo $currgenome we have "
-		#echo $(printf '%s,' "${sorted[@]}")
-		# accumulate sum until threshold is reached
-		usedValues=1
-		usedValuesNext=2
-		first=${sorted[0]}
-		next=${sorted[${usedValues}]}
-		nextofnext=${sorted[${usedValuesNext}]}
-		finalvalue=$first
-		divisor=0
-		currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }')
-		TH=$(printf '%4.6f' $TH)
-		#echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')"
-		condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ')
-		#echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue"
-		while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ];
-		do
-			usedValues=`expr $usedValues + 1`
-			usedValuesNext=`expr $usedValuesNext + 1`
-			finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}')
-			next=${sorted[${usedValues}]}
-			nextofnext=${sorted[${usedValuesNext}]}
-			
-			currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }')
-			condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ')
-			divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}')
-			
-		done
-
-		#echo "so this is what we got $finalvalue, when divided using $divisor"
-		
-		# array holds the results
-		#array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}')
-		finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}')
-		array[$highest]=$finalvalue
-		homologies[$highest]=$usedValues
-		
-		highest=`expr $highest + 1`
-		condition=0
-		names+=($currgenome)
-		othergencounter=0
-		unset arraytosort
-		
-		
-		
-		
-		getvalue=$(echo "$i" | awk -F "," '{print $8}')
-		# Copy value to array 
-		arraytosort[$othergencounter]=$getvalue
-		#currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}')
-		othergencounter=`expr $othergencounter + 1`
-	else
-		getvalue=$(echo "$i" | awk -F "," '{print $8}')
-		# Copy value to array 
-		arraytosort[$othergencounter]=$getvalue
-		#currsum=$(awk -v a="$currsum" -v b="$getvalue" 'BEGIN {print a=a+(1-b); exit}')
-		othergencounter=`expr $othergencounter + 1`
-
-	fi
-
-done < "$1.temp"
-
-# do the last!!!
-#if [ "$lastprint" == "$currgenome" ]; then
-# Sort the array with temporal values
-sorted=($(printf '%s\n' "${arraytosort[@]}"|sort))
-
-usedValues=1
-usedValuesNext=2
-first=${sorted[0]}
-next=${sorted[${usedValues}]}
-nextofnext=${sorted[${usedValuesNext}]}
-finalvalue=$first
-divisor=0
-currdiff=$(LC_NUMERIC=POSIX awk -v a="$next" -v b="$nextofnext" 'BEGIN {print b-a }')
-TH=$(printf '%4.6f' $TH)
-#echo "$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("comp %f > %f = %d",a,b,a>b)} ')"
-condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d",a>b)} ')
-#echo "first $first next $next result $currdiff condition $condition divisor $divisor th $TH finalvalue $finalvalue"
-while [ $condition -eq 1 -a $usedValuesNext -lt ${#sorted[@]} ];
-do
-        usedValues=`expr $usedValues + 1`
-        usedValuesNext=`expr $usedValuesNext + 1`
-        finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$next" 'BEGIN {print (a+b)}')
-        next=${sorted[${usedValues}]}
-        nextofnext=${sorted[${usedValuesNext}]}
-
-        currdiff=$(LC_NUMERIC=POSIX awk -v a="$nextofnext" -v b="$next" 'BEGIN {printf("%f", b-a) }')
-        condition=$(LC_NUMERIC=POSIX awk -v a="$currdiff" -v b="$TH" 'BEGIN { printf("%d", a>b)} ')
-        divisor=$(LC_NUMERIC=POSIX awk -v a="$divisor" 'BEGIN {print a+0.1}')
-
-done
-
-#echo "so this is what we got $finalvalue, when divided using $divisor"
-
-# array holds the results
-#array[$highest]=$(awk -v a="$currsum" -v b="$othergencounter" 'BEGIN {print a/b}')
-finalvalue=$(LC_NUMERIC=POSIX awk -v a="$finalvalue" -v b="$usedValues" -v c="$divisor" 'BEGIN {printf("%f", a/(b-c))}')
-array[$highest]=$finalvalue
-homologies[$highest]=$usedValues
-
-
-highest=`expr $highest + 1`
-currsum=0
-names+=($currgenome)	
-currgenome=$othergenome
-othergencounter=0
-#fi
-
-
-highest=`expr $highest - 1`
-rm $1.temp
-
-tsum=0
-echo "deleteme" > $1.inter
-rm $1.inter
-aux=0
-for ((i = 1; i <= highest; i++)); do
-	echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}" >> $1.inter
-	#echo "${names[${aux}]} ${array[${i}]} ${homologies[${i}]}"
-	aux=`expr $aux + 1`
-	#val=${array[${i}]}
-	#tsum=$(awk -v a="$tsum" -v b="$val" '{print a=a+b}')
-done
-
-#awk -v a="$tsum" b="$highest" '{print a/b}'
-
-#sumfirst=$(awk -F "," 'BEGIN{suma=0}{suma = suma + $8}END{print suma}' "$CSV")
-#echo "$sumfirst"
-
-
-