Mercurial > repos > bitlab > bitlab

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/.gitignore	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,9 @@
+*
+
+!.gitignore
+!bin/*.sh
+!bin/*.mat
+
+!src/*
+
+!*/
Binary file gecko/bin/FragHits has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/bin/allVsAll.sh	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+DIR=$1
+L=$2
+S=$3
+WL=$4
+EXT=$5
+
+array=()
+x=0
+
+if [ $# != 5 ]; then
+	echo "***ERROR*** Use: $0 genomesDirectory L(200) S(40) K(8) fastaFilesExtension"
+	exit -1
+fi
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+for elem in $(ls -d $DIR/*.$EXT | awk -F "/" '{print $NF}' | awk -F ".$EXT" '{print $1}')
+do
+	array[$x]=$elem
+	x=`expr $x + 1`
+	#echo "X: $elem"
+done
+
+for ((i=0 ; i < ${#array[@]} ; i++))
+do
+	for ((j=i ; j < ${#array[@]} ; j++))
+	do
+		if [ $i != $j ]; then
+				seqX=${array[$i]}
+				seqY=${array[$j]}
+				echo "----------${seqX}-${seqY}-----------"
+			if [[ ! -f frags/${seqX}-${seqY}.frags ]];	then
+
+				echo "${BINDIR}/workflow.sh $DIR/${seqX}.$EXT $DIR/${seqY}.$EXT $L $S $WL 1"
+				${BINDIR}/workflow.sh $DIR/${seqX}.$EXT $DIR/${seqY}.$EXT $L $S $WL 1
+
+			fi
+		fi
+	done
+done
Binary file gecko/bin/combineFrags has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/bin/comparison.sh	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+FL=1000   # frequency limit
+
+if [ $# != 7 ]; then
+   echo " ==== ERROR ... you called this script inappropriately."
+   echo ""
+   echo "   usage:  $0 seqXName seqYName lenght similarity WL fixedL strand"
+   echo ""
+   exit -1
+fi
+
+seqXName=$(basename "$1")
+extensionX="${seqXName##*.}"
+seqXName="${seqXName%.*}"
+
+seqYName=$(basename "$2")
+extensionY="${seqYName##*.}"
+seqYName="${seqYName%.*}"
+
+#seqXName=`basename $1 .fasta`
+#seqYName=`basename $2 .fasta`
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+length=${3}
+similarity=${4}
+WL=${5} # wordSize
+fixedL=${6}
+strand=${7}
+distance=$((4*${WL}))
+
+if [[ ! -f ../hits/${seqXName}-${seqYName}-K${WL}.hits.sorted.filtered ]]; then
+	echo "${BINDIR}/hits ${seqXName} ${seqYName} ${seqXName}-${seqYName}-K${WL}.hits ${FL} ${WL}"
+	${BINDIR}/hits ${seqXName} ${seqYName} ${seqXName}-${seqYName}-K${WL}.hits ${FL} ${WL}
+
+	echo "${BINDIR}/sortHits 10000000 32 ${seqXName}-${seqYName}-K${WL}.hits ${seqXName}-${seqYName}-K${WL}.hits.sorted"
+	${BINDIR}/sortHits 10000000 32 ${seqXName}-${seqYName}-K${WL}.hits ${seqXName}-${seqYName}-K${WL}.hits.sorted
+
+	echo "${BINDIR}/filterHits ${seqXName}-${seqYName}-K${WL}.hits.sorted ${seqXName}-${seqYName}-K${WL}.hits.sorted.filtered ${WL}"
+	${BINDIR}/filterHits ${seqXName}-${seqYName}-K${WL}.hits.sorted ${seqXName}-${seqYName}-K${WL}.hits.sorted.filtered ${WL}
+
+	mv ${seqXName}-${seqYName}-K${WL}.hits.sorted.filtered ../hits/
+fi
+
+ln -s ../hits/${seqXName}-${seqYName}-K${WL}.hits.sorted.filtered .
+
+echo "${BINDIR}/FragHits $1 $2 ${seqXName}-${seqYName}-K${WL}.hits.sorted.filtered ${seqXName}-${seqYName}-s${strand}.frags ${length} ${similarity} ${WL} ${fixedL} ${strand}"
+${BINDIR}/FragHits $1 $2 ${seqXName}-${seqYName}-K${WL}.hits.sorted.filtered ${seqXName}-${seqYName}-s${strand}.frags ${length} ${similarity} ${WL} ${fixedL} ${strand}
+
+echo "--------------------DONE------------------"
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/bin/dictionary.sh	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+FL=1000   # frequency limit
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+if [ $# != 1 ]; then
+   echo " ==== ERROR ... you called this script inappropriately."
+   echo ""
+   echo "   usage:  $0 seqXName.fasta"
+   echo ""
+   exit -1
+fi
+
+seqName=$(basename "$1")
+extension="${seqName##*.}"
+seqName="${seqName%.*}"
+
+# find words and order
+echo "${BINDIR}/words $1 ${seqName}.words.unsort"
+${BINDIR}/words $1 ${seqName}.words.unsort
+echo "${BINDIR}/sortWords 10000000 32 ${seqName}.words.unsort ${seqName}.words.sort"
+${BINDIR}/sortWords 10000000 32 ${seqName}.words.unsort ${seqName}.words.sort
+
+# Create hash table in disk
+echo "${BINDIR}/w2hd ${seqName}.words.sort ${seqName}"
+${BINDIR}/w2hd ${seqName}.words.sort ${seqName}
+
Binary file gecko/bin/filterFrags has changed
Binary file gecko/bin/filterHits has changed
Binary file gecko/bin/fragStat has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/bin/frags2align.sh	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+if [ $# != 4 ]; then
+   echo " ==== ERROR ... you called this script inappropriately."
+   echo ""
+   echo "   usage:  $0 fragsFILE.frags fastaX fastaY alignments.txt"
+   echo ""
+   exit -1
+fi
+
+MGDIR=/home/galaxy-bitlab/temp
+
+FRAGS=$1
+FASTAX=$2
+FASTAY=$3
+ALIGN=$4
+
+fragsName=$(basename "$1")
+fastaXname=$(basename "$2")
+fastaYname=$(basename "$3")
+alignName=$(basename "$4")
+fragsName="${fragsName%.*}"
+fastaXname="${fastaXname%.*}"
+fastaYname="${fastaYname%.*}"
+alignName="${alignName%.*}"
+
+cp $1 $MGDIR/${fragsName}.frags
+cp $2 $MGDIR/${fastaXname}.fasta
+cp $3 $MGDIR/${fastaYname}.fasta
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+# generate indices
+
+$BINDIR/indexmaker $MGDIR/$fastaXname.fasta $MGDIR/$fastaXname.idx
+$BINDIR/indexmaker $MGDIR/$fastaYname.fasta $MGDIR/$fastaYname.idx
+
+$BINDIR/reverseComplement $MGDIR/$fastaYname.fasta $MGDIR/$fastaYname.rev
+$BINDIR/indexmaker $MGDIR/$fastaYname.rev $MGDIR/$fastaYname.rev.idx
+
+# extract frags
+
+$BINDIR/frags2text $MGDIR/$fragsName.frags $MGDIR/$fastaXname.fasta $MGDIR/$fastaYname.fasta $MGDIR/$fastaYname.rev $MGDIR/$fastaXname.idx $MGDIR/$fastaYname.idx $MGDIR/$fastaYname.rev.idx $MGDIR/$alignName.txt
+
+rm $MGDIR/$fastaXname.idx
+rm $MGDIR/$fastaYname.idx
+rm $MGDIR/$fastaYname.rev
+rm $MGDIR/$fastaYname.rev.idx
+
+mv $MGDIR/$alignName.txt $ALIGN
+
Binary file gecko/bin/frags2text has changed
Binary file gecko/bin/getInfo has changed
Binary file gecko/bin/hdStat has changed
Binary file gecko/bin/hits has changed
Binary file gecko/bin/hitsStat has changed
Binary file gecko/bin/indexmaker has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/bin/matrix.mat	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,6 @@
+ACGT
+ACGT
+4	-4	-4	-4
+-4	4	-4	-4
+-4	-4	4	-4
+-4	-4	-4	4
Binary file gecko/bin/reverseComplement has changed
Binary file gecko/bin/sortHits has changed
Binary file gecko/bin/sortWords has changed
Binary file gecko/bin/w2hd has changed
Binary file gecko/bin/words has changed
Binary file gecko/bin/wordsStat has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/bin/workflow.sh	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,165 @@
+#!/bin/bash
+
+FL=1000   # frequency limit
+
+if [ $# != 8 ]; then
+   echo " ==== ERROR ... you called this script inappropriately."
+   echo ""
+   echo "   usage:  $0 seqXName seqYName lenght similarity WL fixedL output.frags output.csv"
+   echo ""
+   exit -1
+fi
+
+#{
+
+
+MYRAND=$((( RANDOM % 10000000) +1))
+MGDIR=${PWD}/${MYRAND}
+echo "MGDIR $MGDIR"
+mkdir -p ${MGDIR}
+
+genoXname=$(basename "$1")
+genoYname=$(basename "$2")
+genoXname="${genoXname%.*}"
+genoYname="${genoYname%.*}"
+
+
+cp $1 $MGDIR/${genoXname}.fasta
+cp $2 $MGDIR/${genoYname}.fasta
+mkdir -p ${MGDIR}/dictionaries
+mkdir -p ${MGDIR}/fragments
+
+genoXExt="fasta"
+genoYExt="fasta"
+
+
+
+BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+
+length=${3}
+similarity=${4}
+WL=${5} # wordSize
+fixedL=${6}
+output=${7}
+csv=${8}
+
+mkdir ${MGDIR}/intermediateFiles
+
+mkdir ${MGDIR}/intermediateFiles/${genoXname}-${genoYname}
+mkdir ${MGDIR}/results
+mkdir ${MGDIR}/intermediateFiles/dictionaries
+mkdir ${MGDIR}/intermediateFiles/hits
+
+# Copiamos los fastas
+ln -s ${MGDIR}/${genoXname}.${genoXname} ${MGDIR}/intermediateFiles/${genoXname}-${genoYname}
+ln -s ${MGDIR}/${genoYname}.${genoYname} ${MGDIR}/intermediateFiles/${genoYname}-${genoXname}
+
+cd ${MGDIR}/intermediateFiles/${genoXname}-${genoYname}
+
+###############
+
+
+
+echo "${BINDIR}/reverseComplement ${MGDIR}/${genoYname}.${genoXExt} ${genoYname}-revercomp.${genoYExt}"
+${BINDIR}/reverseComplement ${MGDIR}/${genoYname}.${genoYExt} ${MGDIR}/${genoYname}-revercomp.${genoYExt}
+
+if [[ ! -f ../dictionaries/${genoXname}.d2hP ]];	then
+	echo "${BINDIR}/dictionary.sh ${MGDIR}/${genoXname}.${genoXExt} &"
+	${BINDIR}/dictionary.sh ${MGDIR}/${genoXname}.${genoXExt} &
+fi
+
+if [[ ! -f ../dictionaries/${seqYName}.d2hP ]];	then
+	echo "${BINDIR}/dictionary.sh ${MGDIR}/${genoYname}.${genoYExt} &"
+	${BINDIR}/dictionary.sh ${MGDIR}/${genoYname}.${genoYExt} &
+fi
+
+if [[ ! -f ../dictionaries/${genoYname}-revercomp.d2hP ]];	then
+	echo "${BINDIR}/dictionary.sh ${MGDIR}/${genoYname}-revercomp.${genoYExt} &"
+	${BINDIR}/dictionary.sh ${MGDIR}/${genoYname}-revercomp.${genoYExt} &
+fi
+
+echo "Waiting for the calculation of the dictionaries"
+
+for job in `jobs -p`
+do
+    #echo $job
+    wait $job
+done
+
+
+
+mv ${genoXname}.d2hP ../dictionaries/
+mv ${genoXname}.d2hW ../dictionaries/
+mv ${genoYname}.d2hP ../dictionaries/
+mv ${genoYname}.d2hW ../dictionaries/
+mv ${genoYname}-revercomp.d2hP ../dictionaries/
+mv ${genoYname}-revercomp.d2hW ../dictionaries/
+
+
+
+# Hacemos enlace simbolico
+ln -s ../dictionaries/${genoXname}.d2hP .
+ln -s ../dictionaries/${genoXname}.d2hW .
+
+ln -s ../dictionaries/${genoYname}.d2hP .
+ln -s ../dictionaries/${genoYname}.d2hW .
+
+ln -s ../dictionaries/${genoYname}-revercomp.d2hP .
+ln -s ../dictionaries/${genoYname}-revercomp.d2hW .
+
+echo "${BINDIR}/comparison.sh ${MGDIR}/${genoXname}.${genoXExt} ${MGDIR}/${genoYname}.${genoYExt} ${length} ${similarity} ${WL} ${fixedL} f &"
+${BINDIR}/comparison.sh ${MGDIR}/${genoXname}.${genoXExt} ${MGDIR}/${genoYname}.${genoYExt} ${length} ${similarity} ${WL} ${fixedL} f &
+
+echo "${BINDIR}/comparison.sh ${MGDIR}/${genoXname}.${genoXExt} ${MGDIR}/${genoYname}-revercomp.${genoYExt} ${length} ${similarity} ${WL} ${fixedL} r &"
+${BINDIR}/comparison.sh ${MGDIR}/${genoXname}.${genoXExt} ${MGDIR}/${genoYname}-revercomp.${genoYExt} ${length} ${similarity} ${WL} ${fixedL} r &
+
+echo "Waiting for the comparisons"
+
+for job in `jobs -p`
+do
+    #echo $job
+    wait $job
+done
+
+
+#echo "rm ${seqYName}-revercomp.${extensionY}"
+#rm ${seqYName}-revercomp.${extensionY}
+
+
+echo "${BINDIR}/combineFrags ${genoXname}-${genoYname}-sf.frags ${genoXname}-${genoYname}-revercomp-sr.frags ${genoXname}-${genoYname}.frags"
+${BINDIR}/combineFrags ${genoXname}-${genoYname}-sf.frags ${genoXname}-${genoYname}-revercomp-sr.frags ${genoXname}-${genoYname}.frags
+
+#Borramos todo menos los frags y los diccionarios
+
+# Get Info from frags
+echo "${BINDIR}/getInfo ${genoXname}-${genoYname}.frags > ${genoXname}-${genoYname}.csv"
+${BINDIR}/getInfo ${genoXname}-${genoYname}.frags > ${genoXname}-${genoYname}.csv.tmp
+cat ${genoXname}-${genoYname}.frags.INF ${genoXname}-${genoYname}.csv.tmp > ${genoXname}-${genoYname}.csv
+rm -rf ${genoXname}-${genoYname}.csv.tmp
+
+if [[ -L "../../${genoXname}.fasta" ]]
+then
+	rm ../../${genoYname}.fasta
+fi
+
+if [[ -L "../../${genoXname}.fasta" ]]
+then
+	rm ../../${genoYname}.fasta
+fi
+
+#Movemos los frags y los info
+mv ${genoXname}-${genoYname}.frags $output
+mv ${genoXname}-${genoYname}.csv $csv
+
+
+
+#echo "Borrando ${seqXName}-${seqYName}"
+cd ..
+
+#rm -rf ${seqXName}-${seqYName}
+
+cd ..
+
+
+rm -r ${MGDIR}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/frags2align.xml	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,13 @@
+<tool id="frags2align" name="Frags to align">
+  <description>Extract alignments from a binary Frags file</description>
+  <inputs>
+    <param name="fragsFile" type="data" format="frags" label="Frags file" help="Binary Frags file" />
+    <param name="fastaX" type="data" format="fasta" label="x Genome" help="Single genome file in fasta format"/>
+    <param name="fastaY" type="data" value="fasta" label="y Genome" help="Single genome file in fasta format" />
+  </inputs>
+  <command>/home/galaxy-bitlab/galaxy/tools/gecko/bin/frags2align.sh $fragsFile $fastaX $fastaY $alignments </command>
+  <outputs>
+    <data name="alignments" format="txt" label="Frags to align for genomes  ${fastaX.name}_${fastaY.name}"/>
+  </outputs>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/gecko.xml	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,44 @@
+<tool id="GECKO" name="GECKO" version="1.0.1">
+  <description>A pairwise genome comparison software for the detection of High-scoring Segment Pairs</description>
+
+  <command>/home/galaxy-bitlab/galaxy/tools/gecko/bin/workflow.sh $genome1 $genome2 $length $similarity $wl 1 $frags_output0 $csv_output1</command>
+
+  <inputs>
+    <param name="genome1" type="data" format="fasta" label="Sequence X" help="Query sequence file in fasta format" />
+    <param name="genome2" type="data" format="fasta" label="Sequence Y" help="Query reference file in fasta format"/>
+    <param name="length" type="integer" value="20" label="Minimum length to report a frag" />
+    <param name="similarity" type="integer" value="20" label="Minimum similarity to report frag" />
+    <param name="wl" type="integer" value="16" label="Word length" />
+  </inputs>
+
+  <outputs>
+     <data name="frags_output0" format="data" label="Frags comparison for ${genome1.name}_${genome2.name}"/>
+     <data name="csv_output1" format="csv" label="Csv comparison for ${genome1.name}_${genome2.name}"/>
+  </outputs>
+
+  <help>
+
+GECKO (GEnome Comparison with K-mers Out-of-core) is a fast, modular application designed to identify collections of HSPs in a pairwise genome comparisons. By employing novel filtering and data storing strategies, it is able to compare genome-sized sequences in less time.
+
+-----
+
+**Manual**
+
+To use GECKO, simply upload two .fasta files and select these as Sequence X and as Sequence Y.
+Once so, choose the parameters that suit best your comparison:
+
+
+- Minimum length: This parameter is the minimum length in nucleotides for an HSP (similarity fragment) to be conserved. Any HSP below this length will be filtered out of the comparison. It is recommended to use around 40 bp for small organisms (e.g. bacterial mycoplasma or E. Coli) and around 100 bp or more for larger organisms (e.g. human chromosomes).
+
+- Minimum similarity: This parameter is analogous to the minimum length, however, instead of length, the similarity is used as threshold. The similarity is calculated as the score attained by an HSP divided by the maximum possible score. Use values above 50 to filter noise.
+
+- Word length: This parameter is the seed size used to find HSPs. A smaller seed size will increase sensitivity and decrease performance, whereas a larger seed size will decrease sensitivity and increase performance. Recommended values are 12 or 16 for smaller organisms and 32 for larger organisms. These values must be multiples of 4.
+
+</help>
+
+   <citations>
+	   <citation type="doi">10.1186/s12859-015-0679-9</citation>
+   </citations>
+
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/FragHits.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,327 @@
+/* FragHITS  Fragments detection betweem two bioogical seqeunces
+
+ Search for all By-Identitity ungapped fragments with length > MinLen
+
+ This program is an enhancement of AllFrags (that searches the full space)
+ In this case, start-Fragments (HITS) has been detected in previous steps and
+ this hits are used as seeds to extend the fragment. A backward step is also
+ included to extend the fragment in the second opposite direction from the hit
+
+ Syntax:
+
+ AllFragsHits SeqX.file SeqY.file HitsFile Out.file Lmin SimThr
+
+ Some new parameters have been included:
+ - HitsFile (binary) long Diag/posX/posY
+ - Lmin     Minimum length in the case of fixed length and percentage otherwise
+ - SimThr   Similarity (identity) Threshold
+
+ - SeqX & SeqY files are in fasta format
+ - Out file (binary) will save a fragment with the format specified in structs.h.
+
+ oscart@uma.es
+ -------------------------------------------*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include "commonFunctions.h"
+#include "structs.h"
+#include "comparisonFunctions.h"
+
+#define POINT 4
+
+int HitsControl(char **av);
+int FragFromHit(long M[1000][100], struct FragFile *myF, hit *H, struct Sequence *sX,
+		uint64_t n0, struct Sequence *sY,
+		uint64_t n1, uint64_t nSeqs1, uint64_t LenOrPer, uint64_t SimTh, int WL,
+		int fixedL, char strand);
+
+int main(int ac, char **av) {
+
+	if (ac != 10)
+		terror(
+				"usage: FragsHits SeqX SeqY Hits.File Outfile Lmin SimThr WL fixedLen(1/0) strand(f/r)");
+
+	if (av[9][0] != 'f' && av[9][0] != 'r')
+		terror("strand argument must be 'f' or 'r'");
+
+	HitsControl(av);
+
+	return 0;
+}
+
+int HitsControl(char **av) {
+	struct Sequence *sX, *sY;
+	uint64_t n0, n1, nSeqs0, nSeqs1;
+	uint64_t Lmin, SimTh;
+	int WL, fixedL, i, j;
+	int newFrag;
+	uint64_t nFrags = 0, nHitsUsed = 0, nHits = 0;
+	int64_t lastOffset, lastDiagonal;
+	struct FragFile myF;
+	char infoFileName[200], matFileName[200];
+	char strand;
+	FILE *f, *fH, *fOut, *fInf, *fMat;
+
+	hit h;
+
+	//MAT
+	long coverage[1000][100];
+	for(i=0; i<1000; i++){
+		for(j=0; j<100; j++){
+			coverage[i][j]=0;
+		}
+	}
+	//---
+
+	//Initialize values
+	Lmin = atoi(av[5]);
+	SimTh = atoi(av[6]);
+	WL = atoi(av[7]);
+	fixedL = atoi(av[8]);
+	strand = av[9][0];
+
+	//Open files
+	if ((f = fopen(av[1], "rt")) == NULL)
+		terror("opening seqX file");
+	sX = LeeSeqDB(f, &n0, &nSeqs0, 0);
+	fclose(f);
+
+	if ((f = fopen(av[2], "rt")) == NULL)
+		terror("opening seqY file");
+	sY = LeeSeqDB(f, &n1, &nSeqs1, 0);
+	fclose(f);
+
+	if ((fH = fopen(av[3], "rb")) == NULL)
+		terror("opening HITS file");
+
+	if ((fOut = fopen(av[4], "wb")) == NULL)
+		terror("opening output file");
+
+	//Write sequence lengths
+	writeSequenceLength(&n0, fOut);
+	writeSequenceLength(&n1, fOut);
+
+	// read Hits
+	if(fread(&h, sizeof(hit), 1, fH)!=1){
+		terror("Empty hits file");
+	}
+	lastDiagonal = h.diag;
+	lastOffset = h.posX - 1;
+
+	while (!feof(fH)) {
+		nHits++;
+		if (lastDiagonal != h.diag) {
+			//Different diagonal update the variables
+			lastDiagonal = h.diag;
+			lastOffset = h.posX - 1;
+		}
+		//We have a casting here because of a funny error
+		//where the program was saying that -1 > 0
+		if (lastOffset > ((int64_t) h.posX)) {
+			//The hit is covered by a previous frag in the same diagonal
+			if(fread(&h, sizeof(hit), 1, fH)!=1){
+				if(ferror(fH))terror("Error reading from hits file");
+			}
+			continue;
+		}
+
+		nHitsUsed++;
+		newFrag = FragFromHit(coverage, &myF, &h, sX, n0, sY, n1, nSeqs1, Lmin, SimTh,
+				WL, fixedL, strand);
+		if (newFrag) {
+			writeFragment(&myF, fOut);
+			lastOffset = h.posX + myF.length;
+			nFrags++;
+		}
+		if(fread(&h, sizeof(hit), 1, fH)!=1){
+			if(ferror(fH))terror("Error reading from hits file");
+		}
+	}
+
+	fclose(fH);
+	fclose(fOut);
+
+	sprintf(matFileName, "%s.MAT", av[4]);
+	if ((fMat = fopen(matFileName, "wt")) == NULL)
+		terror("opening MAT file");
+
+	for(i=0; i<1000; i++){
+		for(j=0; j<100; j++){
+			fprintf(fMat, "%ld\t", coverage[i][j]);
+		}
+		fprintf(fMat, "\n");
+	}
+
+	fclose(fMat);
+
+	// metadata info
+	sprintf(infoFileName, "%s.INF", av[4]);
+	if ((fInf = fopen(infoFileName, "wt")) == NULL)
+		terror("opening INFO file");
+
+	fprintf(fInf, "All by-Identity Ungapped Fragments (Hits based approach)\n");
+	fprintf(fInf, "[Abr.98/Apr.2010/Dec.2011 -- <ortrelles@uma.es>\n");
+	fprintf(fInf, "SeqX filename        : %s\n", av[1]);
+	fprintf(fInf, "SeqY filename        : %s\n", av[2]);
+	fprintf(fInf, "SeqX name            : %s\n", sX->ident);
+	fprintf(fInf, "SeqY name            : %s\n", sY->ident);
+	fprintf(fInf, "SeqX length          : %" PRIu64 "\n", n0);
+	fprintf(fInf, "SeqY length          : %" PRIu64 "\n", n1);
+	fprintf(fInf, "Min.fragment.length  : %" PRIu64 "\n", Lmin);
+	fprintf(fInf, "Min.Identity         : %" PRIu64 "\n", SimTh);
+	fprintf(fInf, "Tot Hits (seeds)     : %" PRIu64 "\n", nHits);
+	fprintf(fInf, "Tot Hits (seeds) used: %" PRIu64 "\n", nHitsUsed);
+	fprintf(fInf, "Total fragments      : %" PRIu64 "\n", nFrags);
+	fprintf(fInf, "========================================================\n");
+	fclose(fInf);
+
+	return nHitsUsed;
+}
+
+/**
+ * Compute a fragments from one seed point
+ * Similarirty thershold and length > mimL
+ */
+int FragFromHit(long M[1000][100], struct FragFile *myF, hit *H, struct Sequence *sX,
+		uint64_t n0, struct Sequence *sY,
+		uint64_t n1, uint64_t nSeqs1, uint64_t Lm, uint64_t SimTh, int WL,
+		int fixedL, char strand) {
+	int64_t ldiag, ldiag2;
+	int64_t xfil, ycol;
+	/* for version with backward search */
+	int64_t xfil2, ycol2;
+	int fragmentLength = 0;
+	/* for version Maximum global---*/
+	int64_t xfilmax, ycolmax;
+	/* for version with backward search */
+	int64_t xfilmax2, ycolmax2;
+	int nIdentities = 0, maxIdentities = 0;
+	char valueX, valueY;
+	int fscore = 0, fscoreMax = 0; // full score
+
+	uint64_t minLength =
+			(fixedL) ?
+					Lm :
+					(uint64_t) (min(getSeqLength(sX),
+							getSeqLength(sY)) * (Lm / 100.0));
+
+	// Initialize values
+	ldiag = min(n0 - H->posX, n1 - H->posY);
+	//var to know how much we have until we reach the origin of coordinates
+	ldiag2 = min(H->posX, H->posY);
+	xfil = H->posX + WL;
+	xfil2 = H->posX - 1;
+	ycol = H->posY + WL;
+	ycol2 = H->posY - 1;
+	fragmentLength += WL;
+	xfilmax = xfil;
+	xfilmax2 = xfil2;
+	ycolmax = ycol;
+	ycolmax2 = ycol2;
+	nIdentities = maxIdentities = WL;
+	fscore = POINT * WL;
+	fscoreMax = fscore;
+
+	// now, looking for end_frag---
+	while (fragmentLength < ldiag) {
+		valueX = getValue(sX, xfil);
+		valueY = getValue(sY, ycol);
+		if (valueX == '*' || valueY == '*') {
+			//separator between sequences
+			break;
+		}
+
+		if (valueX == 'N' || valueY == 'N') {
+			fscore -= 1;
+		} else {
+			if (valueX == valueY) {
+				// match
+				fscore += POINT;
+				nIdentities++;
+				if (fscoreMax < fscore) {
+					fscoreMax = fscore;
+					xfilmax = xfil;
+					ycolmax = ycol;
+					maxIdentities = nIdentities;
+				}
+			} else {
+				fscore -= POINT;
+			}
+		}
+
+		xfil++;
+		ycol++;
+		fragmentLength++;
+		if (fscore < 0)
+			break;
+	}
+
+	/**
+	 * Backward search --- Oscar (Sept.2013)
+	 **/
+	fragmentLength = 0;
+	fscore = fscoreMax;
+	xfilmax2 = H->posX;
+	ycolmax2 = H->posY;
+	nIdentities = maxIdentities;
+	if (xfil2 >= 0 && ycol2 >= 0)
+		while (fragmentLength < ldiag2) {
+			valueX = getValue(sX, xfil2);
+			valueY = getValue(sY, ycol2);
+			if (valueX == '*' || valueY == '*') {
+				//separator between sequences
+				break;
+			}
+
+			if (valueX == 'N' || valueY == 'N') {
+				fscore -= 1;
+			} else {
+				if (valueX == valueY) {
+					// matches----
+					fscore += POINT;
+					nIdentities++;
+					if (fscoreMax < fscore) {
+						fscoreMax = fscore;
+						xfilmax2 = xfil2;
+						ycolmax2 = ycol2;
+						maxIdentities = nIdentities;
+					}
+				} else {
+					fscore -= POINT;
+				}
+			}
+
+			xfil2--;
+			ycol2--;
+			fragmentLength++;
+			if (fscore < 0)
+				break;
+		}
+
+	// Set the values of the FragFile
+	myF->diag = H->diag;
+	myF->xStart = (uint64_t) xfilmax2 - H->seqX;
+	myF->yStart = (uint64_t) ycolmax2 - H->seqY;
+	myF->xEnd = (uint64_t) xfilmax - H->seqX;
+	myF->yEnd = (uint64_t) ycolmax - H->seqY;;
+	myF->length = myF->xEnd - myF->xStart + 1;
+	myF->score = fscoreMax;
+	myF->ident = maxIdentities;
+	myF->similarity = myF->score * 100.0
+			/ scoreMax(&sX->datos[myF->xStart], &sY->datos[myF->yStart],
+					myF->length, POINT);
+	myF->seqX = H->seqX;
+	myF->seqY = (strand=='f')? H->seqY : nSeqs1 - H->seqY - 1;
+	myF->block = 0;
+	myF->strand = strand;
+
+	M[min(myF->length, 999)][(int)myF->similarity]++;
+
+	if (myF->length > minLength && myF->similarity > SimTh)
+		return 1;
+	else
+		return 0;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/Makefile	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,65 @@
+CC=gcc
+CXX=g++
+CFLAGS=-O3 -D_FILE_OFFSET_BITS=64 -Wall
+BIN=../bin
+
+all: reverseComplement words sortWords w2hd wordsStat hits sortHits filterHits FragHits combineFrags hitsStat hdStat fragStat getInfoCSB filterFrags indexmaker frags2text
+
+hits: hits.c
+	$(CC) $(CFLAGS) hits.c comparisonFunctions.c dictionaryFunctions.c commonFunctions.c -o $(BIN)/hits
+
+sortHits: sortHits.c
+	$(CC) $(CFLAGS) -DBaseType=hit sortHits.c comparisonFunctions.c commonFunctions.c quicksort.c -lpthread -o $(BIN)/sortHits
+
+filterHits: filterHits.c
+	$(CC) $(CFLAGS) filterHits.c comparisonFunctions.c commonFunctions.c -o $(BIN)/filterHits
+
+FragHits: FragHits.c
+	$(CC) $(CFLAGS) FragHits.c comparisonFunctions.c commonFunctions.c -o $(BIN)/FragHits
+
+combineFrags: combineFrags.c
+	$(CC) $(CFLAGS) combineFrags.c comparisonFunctions.c commonFunctions.c -o $(BIN)/combineFrags
+
+hitsStat: hitsStat.c
+	$(CC) $(CFLAGS) hitsStat.c commonFunctions.c -o $(BIN)/hitsStat
+
+hdStat: hdStat.c
+	$(CC) $(CFLAGS) hdStat.c commonFunctions.c dictionaryFunctions.c -o $(BIN)/hdStat
+
+fragStat: fragStat.c
+	$(CC) $(CFLAGS) fragStat.c comparisonFunctions.c commonFunctions.c -o $(BIN)/fragStat
+
+reverseComplement: reverseComplement.c
+	$(CC) $(CFLAGS) reverseComplement.c commonFunctions.c -o $(BIN)/reverseComplement
+
+words: words.c
+	$(CC) $(CFLAGS) words.c dictionaryFunctions.c commonFunctions.c -o $(BIN)/words
+
+sortWords: sortWords.c
+	$(CC) $(CFLAGS) -DBaseType=wentry sortWords.c dictionaryFunctions.c commonFunctions.c quicksort.c -lpthread -o $(BIN)/sortWords
+
+w2hd: w2hd.c
+	$(CC) $(CFLAGS) w2hd.c dictionaryFunctions.c commonFunctions.c -o $(BIN)/w2hd
+
+wordsStat: wordsStat.c
+	$(CC) $(CFLAGS) wordsStat.c dictionaryFunctions.c commonFunctions.c -o $(BIN)/wordsStat
+
+getInfoCSB:
+	$(CC) $(CFLAGS) getInfoCSB.c fragmentv2.c commonFunctions.c comparisonFunctions.c -lm -o $(BIN)/getInfo
+
+filterFrags:
+	$(CC) $(CFLAGS) filterFrags.c fragmentv2.c commonFunctions.c comparisonFunctions.c -lm -o $(BIN)/filterFrags
+
+indexmaker:
+	$(CC) $(CFLAGS) indexmaker.c fragmentv2.c commonFunctions.c comparisonFunctions.c -lm -o $(BIN)/indexmaker
+
+frags2text:
+	$(CC) $(CFLAGS) frags2text.c fragmentv2.c commonFunctions.c comparisonFunctions.c -lm -o $(BIN)/frags2text
+
+clean:
+	rm -rf $(BIN)/getInfo
+	rm -rf $(BIN)/filterFrags
+	rm -rf $(BIN)/indexmaker
+	rm -rf $(BIN)/frags2text
+	rm -rf $(BIN)/reverseComplement $(BIN)/words $(BIN)/sortWords $(BIN)/w2hd $(BIN)/wordsStat
+	rm -rf $(BIN)/hits $(BIN)/sortHits $(BIN)/filterHits $(BIN)/FragHits $(BIN)/combineFrags $(BIN)/hitsStat $(BIN)/fragStat $(BIN)/hdStat
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/combineFrags.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,182 @@
+/* reduce 2 fragments files produced in parallel
+
+ Syntax: reduceFrags PrefixFRAGSfiles nChunks fileOUT
+
+ --------------------------------------------------------*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "comparisonFunctions.h"
+
+void writeINFHeader(FILE *fIn, FILE *fOut);
+void getNumberOfHits(FILE *infFile, uint64_t *nHits, uint64_t *nHitsUsed, uint64_t *nFrags);
+
+int main(int ac, char** av) {
+	uint64_t n1, n2, nHitsForward, nHitsForwardUsed, nHitsReverse, nHitsReverseUsed, nFragsForward, nFragsReverse;
+	char infoFileName[200], matFileName[200];
+	FILE *fIn, *fInfIn1, *fInfIn2, *fInf, *fOut, *fMat;
+
+	struct FragFile frag;
+
+	long coverage[1000][100], valor, i, j;
+	for(i=0; i<1000; i++){
+                for(j=0; j<100; j++){
+                        coverage[i][j]=0;
+                }
+        }
+
+	if (ac != 4)
+		terror("Syntax: combineFrags forwardStrandFrags reverseStrandFrags fileOUT");
+
+	if ((fIn = fopen(av[1], "rb")) == NULL)
+		terror("Input forwardStrandFrags open error");
+
+	sprintf(infoFileName, "%s.INF", av[1]);
+	if ((fInfIn1 = fopen(infoFileName, "rt")) == NULL)
+		terror("Input forwardStrandFrags.INF open error");
+
+	//MAT
+	sprintf(matFileName, "%s.MAT", av[1]);
+        if ((fMat = fopen(matFileName, "rt")) == NULL)
+                terror("opening forwardStrandFrags.MAT file");
+	for (i=0;i<1000;i++){
+		for (j=0;j<100;j++){
+			if(fscanf(fMat,"%ld\t",&valor)!=1){
+				terror("Error reading forwardStrandFrags.MAT file");
+			}
+			coverage[i][j]+=valor;
+		}
+	}
+
+	fclose(fMat);
+
+	if ((fOut = fopen(av[3], "wb")) == NULL)
+		terror("OUTput file open error");
+
+	// sequences lengths
+	readSequenceLength(&n1, fIn);
+	readSequenceLength(&n2, fIn);
+
+	writeSequenceLength(&n1, fOut);
+	writeSequenceLength(&n2, fOut);
+
+	//First file...
+	readFragment(&frag, fIn);
+
+	while (!feof(fIn)) {
+		writeFragment(&frag, fOut);
+		readFragment(&frag, fIn);
+	}
+
+	fclose(fIn);
+
+	if ((fIn = fopen(av[2], "rb")) == NULL)
+		terror("Input reverseStrandFrags open error");
+
+	sprintf(infoFileName, "%s.INF", av[2]);
+	if ((fInfIn2 = fopen(infoFileName, "rt")) == NULL)
+		terror("Input reverseStrandFrags.INF open error");
+
+	//MAT
+	sprintf(matFileName, "%s.MAT", av[2]);
+	if ((fMat = fopen(matFileName, "rt")) == NULL)
+                terror("opening reverseStrandFrags.MAT file");
+	for (i=0;i<1000;i++){
+		for (j=0;j<100;j++){
+			if(fscanf(fMat,"%ld\t",&valor)!=1)
+				terror("Error reading reverseStrandFrags.MAT file");
+			coverage[i][j]+=valor;
+		}
+	}
+
+	fclose(fMat);
+
+	readSequenceLength(&n1, fIn);
+	readSequenceLength(&n2, fIn);
+
+	//Second file...
+	readFragment(&frag, fIn);
+
+	while (!feof(fIn)) {
+		writeFragment(&frag, fOut);
+		readFragment(&frag, fIn);
+	}
+
+	//MAT
+	sprintf(matFileName, "%s.MAT", av[3]);
+        if ((fMat = fopen(matFileName, "wt")) == NULL)
+                terror("opening MAT file");
+	for (i=0;i<1000;i++){
+		for (j=0;j<100;j++){
+			fprintf(fMat,"%ld\t",coverage[i][j]);
+		}
+		fprintf(fMat, "\n");
+	}
+
+	fclose(fMat);
+
+	// metadata info
+	sprintf(infoFileName, "%s.INF", av[3]);
+	if ((fInf = fopen(infoFileName, "wt")) == NULL)
+		terror("opening INFO file");
+
+	writeINFHeader(fInfIn1, fInf);
+	if(fseek(fInfIn1, 0, SEEK_SET) != 0)
+		terror("Error rewinding inf file");
+
+	getNumberOfHits(fInfIn1, &nHitsForward, &nHitsForwardUsed, &nFragsForward);
+	getNumberOfHits(fInfIn2, &nHitsReverse, &nHitsReverseUsed, &nFragsReverse);
+
+	fprintf(fInf, "Tot Hits (seeds)     : %" PRIu64 "\n", nHitsForward + nHitsReverse);
+	fprintf(fInf, "Tot Hits (seeds) used: %" PRIu64 "\n", nHitsForwardUsed + nHitsReverseUsed);
+	fprintf(fInf, "Total fragments      : %" PRIu64 "\n", nFragsForward + nFragsReverse);
+	fprintf(fInf, "========================================================\n");
+	fclose(fInf);
+
+	fclose(fIn);
+	fclose(fInfIn1);
+	fclose(fInfIn2);
+
+	fclose(fOut);
+
+	return 0;
+
+}
+
+void writeINFHeader(FILE *fIn, FILE *fOut){
+	char tmp[1024];
+	int i=0;
+	while((i < 10) && (fgets(tmp, 1024, fIn)!=NULL)){
+		fprintf(fOut, "%s", tmp);
+		i++;
+	}
+}
+
+void getNumberOfHits(FILE *infFile, uint64_t *nHits, uint64_t *nHitsUsed, uint64_t *nFrags){
+	char tmp[1024];
+	int i=0;
+	while((i < 10) && (fgets(tmp, 1024, infFile)!=NULL)){
+		i++;
+	}
+
+	if(fgets(tmp, 1024, infFile)==NULL)
+		terror("Wrong format in INF file, Tot Hits (seeds) line expected");
+
+	if(sscanf(tmp, "Tot Hits (seeds)     : %" PRIu64 "\n", nHits)!=1)
+		terror("Error reading: Tot Hits (seeds)");
+
+	if(fgets(tmp, 1024, infFile)==NULL)
+		terror("Wrong format in INF file, Tot Hits (seeds) used line expected");
+
+	if(sscanf(tmp, "Tot Hits (seeds) used: %" PRIu64 "\n", nHitsUsed)!=1)
+		terror("Error reading: Tot Hits (seeds) used");
+
+	if(fgets(tmp, 1024, infFile)==NULL)
+		terror("Wrong format in INF file, Total fragments line expected");
+
+	if(sscanf(tmp, "Total fragments      : %" PRIu64 "\n", nFrags)!=1)
+		terror("Error reading: Total fragments");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/commonFunctions.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,20 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include "structs.h"
+
+void terror(char *s) {
+	printf("ERR**** %s ****\n", s);
+	exit(-1);
+}
+
+char buffered_fgetc(char *buffer, uint64_t *pos, uint64_t *read, FILE *f) {
+    if (*pos >= READBUF) {
+        *pos = 0;
+        memset(buffer, 0, READBUF);
+        *read = fread(buffer, 1, READBUF, f);
+    }
+    *pos = *pos + 1;
+    return buffer[*pos-1];
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/commonFunctions.h	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,14 @@
+#ifndef COMMON_FUNCTIONS_H
+#define COMMON_FUNCTIONS_H
+
+/**
+ * Print the error message 's' and exit(-1)
+ */
+void terror(char *s);
+
+/**
+ * Function to buffer file reading
+ */
+char buffered_fgetc(char *buffer, uint64_t *pos, uint64_t *read, FILE *f);
+
+#endif /* COMMON_FUNCTIONS_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/comparisonFunctions.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,430 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <arpa/inet.h>
+#include "structs.h"
+#include "commonFunctions.h"
+
+#define MAXBUF 10000000
+
+int readHashEntry(hashentry *h, FILE *f, uint64_t freqThr) {
+	do {
+		if(fread(h, sizeof(hashentry), 1, f)!=1){
+			if(ferror(f))terror("Error reading hash entry from the file");
+		}
+	} while (!feof(f) && h->num > freqThr);
+
+	if (feof(f))
+		return -1;
+
+	return h->num;
+}
+
+void loadWordOcurrences(hashentry he, location** pos, FILE** f) {
+	// Load word position for he.word
+	if (he.num > MAXBUF) {
+		*pos = (location*) realloc(pos, he.num * sizeof(location));
+		if (pos == NULL)
+			terror("memory for position array");
+	}
+	fseek(*f, he.pos, SEEK_SET);
+	if(fread(*pos, sizeof(location), he.num, *f)!=he.num){
+		terror("Not possible to read the number of elements described");
+	}
+}
+
+unsigned long scoreMax(char *seq, char *seq2, uint64_t len, int point) {
+	//CHANGE WHEN USING PAM MATRIX
+	//Scor=0; for (i=0;ii<len;i++) if (Seq[i]==Seq2[i])scor+=Ptos
+	/* TO DELETE WARNINGS*/
+	if(seq+1){
+	}
+
+	if(seq2+1){
+	}
+	/**********************/
+
+	return len * point;
+}
+
+struct Sequence *LeeSeqDB(FILE *f, uint64_t *n, uint64_t *nSeqs,
+                          int fAst) {
+    char c;
+    uint64_t lon = 0, k = 0, seqs = 0;
+    uint64_t lonFinal = 0;
+    uint64_t SIZE = 0;
+    uint64_t i = 0, r = 0;
+    struct Sequence *sX;
+    char *seq = NULL;
+
+    //Initialize
+    *n = 0;
+
+    //Memory
+    if ((sX = calloc(1, sizeof(struct Sequence))) == NULL)
+        terror("Memory...");
+
+    fseek(f, 0, SEEK_END);
+    SIZE = ftell(f);
+    fseek(f, 0, SEEK_SET);
+
+    if ((sX->datos = calloc(SIZE, sizeof(char))) == NULL) {
+        terror("Memory for sequence...");
+    }
+
+    if ((seq = calloc(READBUF, sizeof(char))) == NULL) {
+        terror("Memory for sequence...");
+    }
+
+    i = READBUF + 1;
+
+    if (!fAst)
+        while ((c = buffered_fgetc(seq, &i, &r, f)) != '>' && (!feof(f) || (feof(f) &&  i < r ) )); //start seq
+    if (feof(f) && i >= r)
+        return 0;
+    while ((c = buffered_fgetc(seq, &i, &r, f)) == ' ');
+
+    while (k < MAXLID && c != '\n' && c != ' ') {
+        if (feof(f) && i >= r)
+            return 0;
+
+        sX->ident[k++] = c;
+        c = buffered_fgetc(seq, &i, &r, f);
+    }
+
+    sX->ident[k] = 0; //end of data.
+    while (c != '\n')
+        c = buffered_fgetc(seq, &i, &r, f);
+    c = buffered_fgetc(seq, &i, &r, f);
+
+    //start list with sX2
+    while (!feof(f) || (feof(f) &&  i < r ) ) {
+        c = toupper(c);
+        if (c == '>') {
+            fAst = 1;
+            seqs++;
+            sX->datos[lon++] = '*';
+            while (c != '\n') {
+                if ((feof(f) &&  i >= r ))
+                    return 0;
+                c = buffered_fgetc(seq, &i, &r, f);
+            }
+            //break;
+        }
+        if (isupper(c))
+            sX->datos[lon++] = c;
+        if (c == '*') {
+            sX->datos[lon++] = c;
+        }
+        c = buffered_fgetc(seq, &i, &r, f);
+    }
+
+    free(seq);
+
+    sX->datos[lon] = 0x00;
+
+    lonFinal += lon;
+    *nSeqs = seqs + 1;
+    *n = lonFinal - seqs;
+    return sX;
+}
+
+char getValue(struct Sequence *s, uint64_t pos) {
+    return s->datos[pos];
+}
+
+long getSeqLength(struct Sequence *s) {
+    uint64_t s1 = 0;
+    while (s1 > 0 && s->datos[s1] != '*') {
+        s1--;
+    }
+    s1++;
+    char *tmp = strchr(s->datos + s1, '*');
+    if (tmp == NULL) {
+        return strlen(s->datos) - s1 + 1;
+    }
+    return tmp - (s->datos + s1) + 1;
+}
+
+void endianessConversion(char *source, char *target, int numberOfBytes){
+	int i,j;
+	for(i=numberOfBytes-1;i>=0;i--){
+		j=numberOfBytes-1-i;
+		target[j]=source[i];
+	}
+}
+
+
+/**
+ * Function to read a fragment from the specified file
+ */
+void readFragment(struct FragFile *frag, FILE *f){
+	char tmpArray[8];
+
+	if(htons(1)==1){
+		//big endian
+		if(fread(&frag->diag, sizeof(int64_t), 1, f)!=1){
+			if(feof(f))return;
+			terror("Error reading the HSP diagonal");
+		}
+		if(fread(&frag->xStart, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP xStart");
+		}
+		if(fread(&frag->yStart, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP yStart");
+		}
+		if(fread(&frag->xEnd, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP xEnd");
+		}
+		if(fread(&frag->yEnd, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP yEnd");
+		}
+		if(fread(&frag->length, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP length");
+		}
+		if(fread(&frag->ident, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP identities");
+		}
+		if(fread(&frag->score, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP score");
+		}
+		if(fread(&frag->similarity, sizeof(float), 1, f)!=1){
+			terror("Error reading the HSP similarity");
+		}
+		if(fread(&frag->seqX, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP seqX");
+		}
+		if(fread(&frag->seqY, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP seqY");
+		}
+		if(fread(&frag->block, sizeof(int64_t), 1, f)!=1){
+			terror("Error reading the HSP block");
+		}
+		frag->strand = fgetc(f);
+	} else {
+		//little endian
+		if(fread(tmpArray, sizeof(int64_t), 1, f)!=1){
+			if(feof(f))return;
+			terror("Error reading the HSP diagonal");
+		}
+		endianessConversion(tmpArray, (char *)(&frag->diag), sizeof(int64_t));
+		if(fread(tmpArray, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP xStart");
+		}
+		endianessConversion(tmpArray, (char *)(&frag->xStart), sizeof(uint64_t));
+		if(fread(tmpArray, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP yStart");
+		}
+		endianessConversion(tmpArray, (char *)(&frag->yStart), sizeof(uint64_t));
+		if(fread(tmpArray, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP xEnd");
+		}
+		endianessConversion(tmpArray, (char *)(&frag->xEnd), sizeof(uint64_t));
+		if(fread(tmpArray, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP yEnd");
+		}
+		endianessConversion(tmpArray, (char *)(&frag->yEnd), sizeof(uint64_t));
+		if(fread(tmpArray, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP length");
+		}
+		endianessConversion(tmpArray, (char *)(&frag->length), sizeof(uint64_t));
+		if(fread(tmpArray, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP identity");
+		}
+		endianessConversion(tmpArray, (char *)(&frag->ident), sizeof(uint64_t));
+		if(fread(tmpArray, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP score");
+		}
+		endianessConversion(tmpArray, (char *)(&frag->score), sizeof(uint64_t));
+		if(fread(tmpArray, sizeof(float), 1, f)!=1){
+			terror("Error reading the HSP float");
+		}
+		endianessConversion(tmpArray, (char *)(&frag->similarity), sizeof(float));
+		if(fread(tmpArray, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP seqX");
+		}
+		endianessConversion(tmpArray, (char *)(&frag->seqX), sizeof(uint64_t));
+		if(fread(tmpArray, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading the HSP seqY");
+		}
+		endianessConversion(tmpArray, (char *)(&frag->seqY), sizeof(uint64_t));
+		if(fread(tmpArray, sizeof(int64_t), 1, f)!=1){
+			terror("Error reading the HSP block");
+		}
+		endianessConversion(tmpArray, (char *)(&frag->block), sizeof(int64_t));
+		frag->strand = fgetc(f);
+	}
+}
+
+/**
+ * Function to write a fragment to the specified file
+ */
+void writeFragment(struct FragFile *frag, FILE *f){
+	char tmpArray[8];
+	if(htons(1)==1){
+		//Big endian
+		fwrite(&frag->diag, sizeof(int64_t), 1, f);
+		fwrite(&frag->xStart, sizeof(uint64_t), 1, f);
+		fwrite(&frag->yStart, sizeof(uint64_t), 1, f);
+		fwrite(&frag->xEnd, sizeof(uint64_t), 1, f);
+		fwrite(&frag->yEnd, sizeof(uint64_t), 1, f);
+		fwrite(&frag->length, sizeof(uint64_t), 1, f);
+		fwrite(&frag->ident, sizeof(uint64_t), 1, f);
+		fwrite(&frag->score, sizeof(uint64_t), 1, f);
+		fwrite(&frag->similarity, sizeof(float), 1, f);
+		fwrite(&frag->seqX, sizeof(uint64_t), 1, f);
+		fwrite(&frag->seqY, sizeof(uint64_t), 1, f);
+		fwrite(&frag->block, sizeof(int64_t), 1, f);
+		fputc(frag->strand, f);
+	} else {
+		//Little endian
+		endianessConversion((char *)(&frag->diag), tmpArray, sizeof(int64_t));
+		fwrite(tmpArray, sizeof(int64_t), 1, f);
+		endianessConversion((char *)(&frag->xStart), tmpArray, sizeof(uint64_t));
+		fwrite(tmpArray, sizeof(uint64_t), 1, f);
+		endianessConversion((char *)(&frag->yStart), tmpArray, sizeof(uint64_t));
+		fwrite(tmpArray, sizeof(uint64_t), 1, f);
+		endianessConversion((char *)(&frag->xEnd), tmpArray, sizeof(uint64_t));
+		fwrite(tmpArray, sizeof(uint64_t), 1, f);
+		endianessConversion((char *)(&frag->yEnd), tmpArray, sizeof(uint64_t));
+		fwrite(tmpArray, sizeof(uint64_t), 1, f);
+		endianessConversion((char *)(&frag->length), tmpArray, sizeof(uint64_t));
+		fwrite(tmpArray, sizeof(uint64_t), 1, f);
+		endianessConversion((char *)(&frag->ident), tmpArray, sizeof(uint64_t));
+		fwrite(tmpArray, sizeof(uint64_t), 1, f);
+		endianessConversion((char *)(&frag->score), tmpArray, sizeof(uint64_t));
+		fwrite(tmpArray, sizeof(uint64_t), 1, f);
+		endianessConversion((char *)(&frag->similarity), tmpArray, sizeof(float));
+		fwrite(tmpArray, sizeof(float), 1, f);
+		endianessConversion((char *)(&frag->seqX), tmpArray, sizeof(uint64_t));
+		fwrite(tmpArray, sizeof(uint64_t), 1, f);
+		endianessConversion((char *)(&frag->seqY), tmpArray, sizeof(uint64_t));
+		fwrite(tmpArray, sizeof(uint64_t), 1, f);
+		endianessConversion((char *)(&frag->block), tmpArray, sizeof(int64_t));
+		fwrite(tmpArray, sizeof(int64_t), 1, f);
+		fputc(frag->strand, f);
+	}
+}
+
+/**
+ * Function to read the sequence length
+ */
+void readSequenceLength(uint64_t *length, FILE *f){
+	char tmpArray[8];
+	if(htons(1)==1){
+		//big endian
+		if(fread(length, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading sequence length");
+		}
+	} else {
+		//little endian
+		if(fread(tmpArray, sizeof(uint64_t), 1, f)!=1){
+			terror("Error reading sequence length");
+		}
+		endianessConversion(tmpArray, (char *)length, sizeof(uint64_t));
+	}
+}
+
+/**
+ * Function to write the sequence length
+ */
+void writeSequenceLength(uint64_t *length, FILE *f){
+	char tmpArray[8];
+	if(htons(1)==1){
+		//big endian
+		fwrite(length, sizeof(uint64_t), 1, f);
+	} else {
+		//little endian
+		endianessConversion((char *)length, tmpArray, sizeof(uint64_t));
+		fwrite(tmpArray, sizeof(uint64_t), 1, f);
+	}
+}
+
+/**
+ * Function to return the sizeof a fragment.
+ * Due to architeture issues the value of sizeof built-in
+ * function could be different
+ */
+long int sizeofFragment(){
+	return 9*sizeof(uint64_t)+2*sizeof(int64_t)+1*sizeof(float)+1*sizeof(char);
+}
+
+/**************** ARJONA *******************/
+void cpyFrag2(struct FragFile *f, struct FragFile g){
+
+	f->diag = g.diag;
+	f->xStart = g.xStart;
+	f->xEnd = g.xEnd;
+	f->yStart = g.yStart;
+	f->yEnd = g.yEnd;
+	f->length = g.length;
+	f->score = g.score;
+	f->similarity = g.similarity;
+	f->seqX = g.seqX;
+	f->seqY = g.seqY;
+	f->ident = g.ident;
+	f->block = g.block;
+	f->strand = g.strand;
+}
+
+/******/
+struct FragFile* readFragments(char* s,int* nf,uint64_t *xtotal,uint64_t *ytotal){
+//Fragment* readFragments(char* s,int* nf,int *xtotal,int *ytotal){
+
+	FILE* fe;
+
+	struct FragFile* fs,f;
+	int n;
+
+	if((fe=fopen(s,"rb"))==NULL){
+		printf("***ERROR Opening input file");
+		exit(-1);
+	}
+	n=0;
+	readSequenceLength(xtotal, fe);
+	readSequenceLength(ytotal, fe);
+//	long int longFile;
+//	fseek(fe,0,SEEK_END);
+//	longFile=ftell(fe);
+//	n=(int)(longFile-2*sizeof(uint64_t))/sizeof(struct FragFile);
+//	printf("\n ReadFragments Complete\nnum: %d\n",n);
+
+	// Alternativa +++++++++++++++++
+	n=0;
+	readFragment(&f, fe);
+	while(!feof(fe)){
+		readFragment(&f, fe);
+		n++;
+	}
+//	printf("\n ReadFragments Complete\nnum: %d\n",n);
+	//+++++++++++++
+	rewind(fe);
+	fs=(struct FragFile*)malloc(sizeof(struct FragFile)*(n));
+	if(fs==NULL){printf("****ERROR: Out of memory\n");exit(-1);}
+
+	*nf=n;
+	readSequenceLength(xtotal, fe);
+	readSequenceLength(ytotal, fe);
+	n=0;
+	readFragment(&f, fe);
+	while(!feof(fe)){
+
+
+
+		if(f.length>0){
+			cpyFrag2(&fs[n],f);
+		}
+		n++;
+		readFragment(&f, fe);
+		//fprintf(stdout,"%d\t%" PRId64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%c" "\t%" PRIu64 "\n",n,fs[n].xStart, fs[n].yStart, fs[n].xEnd, fs[n].yEnd, fs[n].length, fs[n].strand, fs[n].ident);
+
+	}
+	*nf=n;
+
+
+	fclose(fe);
+	return fs;
+}
+/************************/
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/comparisonFunctions.h	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,67 @@
+#ifndef COMPARISON_FUNCTIONS_H
+#define COMPARISON_FUNCTIONS_H
+
+#define min(x,y)    (((x) < (y)) ? (x) : (y))
+
+/**
+ * Read a new hash entry from 'f' with no more occurences
+ * than 'freqThr'
+ */
+int readHashEntry(hashentry *h, FILE *f, uint64_t freqThr);
+
+/**
+ * Read the ocurrences of the given hash entry from the
+ * ocurrences file 'f' and stored them in 'pos'
+ */
+void loadWordOcurrences(hashentry he, location** pos, FILE** f);
+
+/**
+ * Get the maximum score possible between the two sequences
+ */
+unsigned long scoreMax(char *seq, char *seq2, uint64_t len, int point);
+
+/**
+ * Read the sequence from disk and store it in a list of Sequence struct
+ * n: sequence length
+ * ns: number of nodes in the list
+ */
+struct Sequence* LeeSeqDB(FILE *f, uint64_t *n, uint64_t *nSeqs, int fAst);
+
+/**
+ * Get the value of the sequence in a given position of the list node ns
+ */
+char getValue(struct Sequence *s, uint64_t pos);
+
+/**
+ * Get the length of the sequence 's'
+ */
+long getSeqLength(struct Sequence *s);
+
+/**
+ * Function to read a fragment from the specified file
+ */
+void readFragment(struct FragFile *frag, FILE *f);
+
+/**
+ * Function to write a fragment to the specified file
+ */
+void writeFragment(struct FragFile *frag, FILE *f);
+
+/**
+ * Function to read the sequence length
+ */
+void readSequenceLength(uint64_t *length, FILE *f);
+
+/**
+ * Function to write the sequence length
+ */
+void writeSequenceLength(uint64_t *length, FILE *f);
+
+/**
+ * Function to return the sizeof a fragment.
+ * Due to architeture issues the value of sizeof built-in
+ * function could be different
+ */
+long int sizeofFragment();
+
+#endif /* COMPARISON_FUNCTIONS_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/dictionaryFunctions.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,94 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "commonFunctions.h"
+
+int seq2word(char* buf, int wsize, word* w) {
+	int i;
+	int b = 6;
+	memset(w, 0, sizeof(word));
+
+	for (i = 0; i < wsize; i++) {
+		if (buf[i] >= 4)
+			return -1;
+		w->b[i / 4] |= buf[i] << b;
+		b -= 2;
+		if (b < 0)
+			b = 6;
+	}
+	return 0;
+}
+
+void skipIDLine(FILE *fIn) {
+	char c;
+	// first line (skip ID fasta Line)
+	c = fgetc(fIn);
+	while (c != '\n')
+		c = fgetc(fIn);
+}
+
+int letterToIndex(char c) {
+	// coding (a=0,c=1,g=2,t=3,'>'=4 others=9 )
+	switch (c) {
+	case 'A':
+		return 0;
+	case 'C':
+		return 1;
+	case 'G':
+		return 2;
+	case 'T':
+		return 3;
+	case '>':
+		return 4;
+	default:
+		return 9;
+	}
+}
+
+int wordcmp(unsigned char *w1, unsigned char *w2, int n) {
+
+	int i = 0, limit;
+
+	if(n%4 != 0){
+		w1[n/4] = w1[n/4] >> (2*(3-((n-1)%4)));
+		w1[n/4] = w1[n/4] << (2*(3-((n-1)%4)));
+		w2[n/4] = w2[n/4] >> (2*(3-((n-1)%4)));
+		w2[n/4] = w2[n/4] << (2*(3-((n-1)%4)));
+		limit=(n/4)+1;
+	} else {
+		limit = n/4;
+	}
+
+	for (i=0;i<limit;i++) {
+		if (w1[i]<w2[i]) return -1;
+		if (w1[i]>w2[i]) return +1;
+	}
+	return 0;
+}
+
+void showWord(word* w, char *ws) {
+	char Alf[] = { 'A', 'C', 'G', 'T' };
+	int i;
+	int wsize = 8;
+	unsigned char c;
+	for (i = 0; i < wsize; i++) {
+		c = w->b[i];
+		c = c >> 6;
+		ws[4*i] = Alf[(int) c];
+		c = w->b[i];
+		c = c << 2;
+		c = c >> 6;
+		ws[4*i+1] = Alf[(int) c];
+		c = w->b[i];
+		c = c << 4;
+		c = c >> 6;
+		ws[4*i+2] = Alf[(int) c];
+		c = w->b[i];
+		c = c << 6;
+		c = c >> 6;
+		ws[4*i+3] = Alf[(int) c];
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/dictionaryFunctions.h	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,34 @@
+#ifndef DICTIONARY_FUNCTIONS_H
+#define DICTIONARY_FUNCTIONS_H
+
+/**
+ * Compress the word stored in 'buf' using 2 bits per letter
+ * The result will be at word 'w'
+ */
+int seq2word(char* buf, int wsize, word* w);
+
+/**
+ * Function to skip the identification line of a fasta sequence
+ */
+void skipIDLine(FILE *fIn);
+
+/**
+ * Function to convert the alphabet letter
+ * to an index.
+ * coding (a=0,c=1,g=2,t=3,'>'=4 others=9)
+ */
+int letterToIndex(char c);
+
+/**
+ * Function to print in stdout the given compressed word
+ */
+void showWord(word* w, char *ws);
+
+/**
+ * Function to compare two k-mers
+ * The function returns 0 if equal, 1 if greater
+ * than and -1 otherwise
+ */
+int wordcmp(unsigned char *w1, unsigned char*w2, int n);
+
+#endif /* DICTIONARY_FUNCTIONS_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/filterFrags.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,94 @@
+/*
+
+filterFrags.c
+
+Exports frags to CSV filtering by thresholds
+
+Use: filterFrags file.frags length similarity
+
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <math.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "comparisonFunctions.h"
+#define  MAX_LEVELS  900000
+
+
+int main(int ac,char** av){
+
+
+	if(ac<4){
+		printf("Use: filterFrags <file.frags> <length> <similarity>\n");
+		exit(-1);
+	}
+
+	// Read fragments
+	struct FragFile frag;
+	uint64_t xtotal,ytotal;
+	//f=readFragmentsv2(av[1],&nf,&xtotal,&ytotal);
+
+	FILE * fFrags = fopen(av[1], "rb");
+	if(fFrags == NULL){ fprintf(stderr, "Could not open input file\n"); exit(-1); }
+
+	readSequenceLength(&xtotal, fFrags);
+	readSequenceLength(&ytotal, fFrags);
+
+
+
+
+	uint64_t min_l = (uint64_t) atoi(av[2]);
+	double min_sim = (double) atof(av[3])/100;
+
+	/******************************************/
+
+	// Print header. Frags file info
+	printf("All by-Identity Ungapped Fragments (Hits based approach)\n");
+	printf("[Abr.98/Apr.2010/Dec.2011 -- <ortrelles@uma.es>\n");
+	printf("SeqX filename        : Unknown\n");
+	printf("SeqY filename        : Unknown\n");
+	printf("SeqX name            : Unknown\n");
+	printf("SeqY name            : Unknown\n");
+	printf("SeqX length          : %"PRIu64"\n", xtotal);
+	printf("SeqY length          : %"PRIu64"\n", ytotal);
+	printf("Min.fragment.length  : 0\n");
+	printf("Min.Identity         : 0\n");
+	printf("Tot Hits (seeds)     : 0\n");
+	printf("Tot Hits (seeds) used: 0\n");
+	printf("Total fragments      : 0\n");
+
+	printf("========================================================\n");
+
+
+
+	printf("Type,xStart,yStart,xEnd,yEnd,strand(f/r),block,length,score,ident,similarity,%%ident,SeqX,SeqY\n");
+
+	double similarity,likeness;
+
+	while(!feof(fFrags)){
+		readFragment(&frag, fFrags);
+
+		similarity=100.0*(((double)frag.score)/((double)frag.length*4.0));
+		likeness=(((double)frag.ident)/((double)frag.length));
+
+		if(frag.strand=='r'){
+			frag.yStart = ytotal - frag.yStart - 1;
+			frag.yEnd = ytotal - frag.yEnd - 1;
+		}
+
+
+		if(similarity >= min_sim && (uint64_t)frag.length >= min_l){
+			printf("Frag,%"PRIu64",%"PRIu64",%"PRIu64",%"PRIu64",%c,%"PRId64",%"PRIu64",%"PRIu64",%"PRIu64",%.2f,%.2f,%"PRIu64",%"PRIu64"\n",frag.xStart,frag.yStart,frag.xEnd,frag.yEnd,frag.strand,frag.block,frag.length,frag.score,frag.ident,similarity,likeness,frag.seqX,frag.seqY);
+		}
+
+	}
+
+	fclose(fFrags);
+
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/filterHits.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,84 @@
+/* filterHits
+ Syntax: filterHits fileIn fileOut dist FilterIsolatedHits
+
+ fileIn  Ordered Hits file (Diag/posX/posY)
+ fileOut Ordered Out file  (Diag/posX/posY)
+ dist    Hits that occurs at a less than a distance "dist" from the
+ previous hit are removed (only the first hit remains)
+ distance is measure from END-to-Head of 2 hits
+ FilterIsolatedHits (0=not - 1=Yes)
+
+ ----------------------------------------ortrelles@uma.es /Dic2011---*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "comparisonFunctions.h"
+
+int differentSequences(hit h1, hit h2){
+	return h1.seqX != h2.seqX || h1.seqY != h2.seqY;
+}
+
+int main(int ac, char** av) {
+	int wSize = 32;
+	int64_t diagonal;
+	uint64_t lastPosition;
+	uint64_t originalNumberOfHits = 0, finalNumberOfHits = 0, hitsRead = 0;
+	FILE* fIn, *fOut;
+
+	hit hits[2];
+
+	if (ac != 4)
+		terror("USE:filterHits fileIn fileOut wordSize");
+
+	if ((fIn = fopen(av[1], "rb")) == NULL)
+		terror("opening HITS input file");
+
+	if ((fOut = fopen(av[2], "wb")) == NULL)
+		terror("opening HITS OUT file");
+	wSize =  atoi(av[3]);
+	lastPosition = 0;
+
+	hitsRead = fread(&hits[0], sizeof(hit), 1, fIn);
+	originalNumberOfHits += hitsRead;
+	if(hitsRead>0)
+		finalNumberOfHits += fwrite(&hits[0], sizeof(hit), 1, fOut);
+	diagonal = hits[0].diag;
+	while (hitsRead > 0) {
+		hitsRead = fread(&hits[1], sizeof(hit), 1, fIn);
+		originalNumberOfHits += hitsRead;
+
+		if(differentSequences(hits[0], hits[1])){
+			lastPosition = hits[1].posX + (2 * wSize - 1);
+			diagonal = hits[1].diag;
+			finalNumberOfHits += fwrite(&hits[1], sizeof(hit), 1, fOut);
+			memcpy(&hits[0], &hits[1], sizeof(hit));
+			continue;
+		}
+
+		if (hitsRead == 0 && originalNumberOfHits > 1) {
+			if(diagonal != hits[0].diag || hits[0].posX > (lastPosition)){
+				finalNumberOfHits += fwrite(&hits[0], sizeof(hit), 1, fOut);
+			}
+			continue;
+		}
+
+		if (diagonal != hits[1].diag || hits[1].posX > lastPosition) {
+			lastPosition = hits[1].posX + (2 * wSize - 1);
+			diagonal = hits[1].diag;
+			finalNumberOfHits += fwrite(&hits[1], sizeof(hit), 1, fOut);
+			memcpy(&hits[0], &hits[1], sizeof(hit));
+		}
+	}
+
+	fprintf(stdout,
+			"\nfilterHits\noriginal number of Hits=%" PRIu64 "  Final number of hits=%" PRIu64 "\n",
+			originalNumberOfHits, finalNumberOfHits);
+	fclose(fIn);
+	fclose(fOut);
+
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/fragStat.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,106 @@
+/* DEBUG: list the fragments file
+ *
+ * Sintax: ./leeFrags fragsFILE [lowUpThresh.file]
+ *
+ * optionally it can include a low/Up filter scheme
+ *
+ *                           O.Trelles >ortrelles @ uma.es>
+ *  -----------------------------------------------May 2012
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "comparisonFunctions.h"
+
+#define SCORE   4  // it depends on the score matrix
+#define MAXLENGTH  1000
+
+void loadThresholds(int** low, int** up, char* thresholdFile) {
+	int length, lowSimilarity, upSimilarity;
+	FILE *fThreshold;
+
+	if ((*low = (int*) calloc(MAXLENGTH, sizeof(int))) == NULL)
+		terror("enought memory for LowThreshold array");
+
+	if ((*up = (int*) calloc(MAXLENGTH, sizeof(int))) == NULL)
+		terror("enought memory for UpperThreshold array");
+
+	if ((fThreshold = fopen(thresholdFile, "rt")) == NULL)
+		terror("open Low-Upper threshold file");
+
+	if(fscanf(fThreshold, "%d\t%d\t%d\n", &length, &lowSimilarity, &upSimilarity)!=3){
+		terror("Error reading the threshold file. Wrong format intTABintTABint");
+	}
+	while (!feof(fThreshold)) {
+		if (length >= MAXLENGTH)
+			terror("LowUp threshold out of range (maxL)");
+		*low[length] = lowSimilarity;
+		*up[length] = upSimilarity;
+		if(fscanf(fThreshold, "%d\t%d\t%d\n", &length, &lowSimilarity, &upSimilarity)!=3){
+			terror("Error reading the threshold file. Wrong format intTABintTABint");
+		}
+	}
+	fclose(fThreshold);
+}
+
+int main(int ac, char** av) {
+	FILE* fFrags;
+	uint64_t n1, n2, nFrags = 0;
+	struct FragFile frag;
+	int *low, *up, length;
+	int similarity = 0, zone;
+	int flagT = 0;
+
+	if (ac != 3 && ac != 2)
+		terror("USE: ./leeFrags fragsFILE [lowUpThresh.file (Optional)]");
+	if (ac == 3)
+		flagT = 1;
+
+	// Load Thresholds---------------------------------------
+	if (flagT == 1) {
+		loadThresholds(&low, &up, av[2]);
+	}
+
+	// prepared for multiple files
+	if ((fFrags = fopen(av[1], "rb")) == NULL)
+		terror("Opening Frags binary file");
+
+	readSequenceLength(&n1, fFrags);
+	readSequenceLength(&n2, fFrags);
+	fprintf(stderr, "working with fragsFile=%s SeqX=%" PRIu64 " seqY=%" PRIu64 "\n", av[1], n1,
+			n2);
+
+	readFragment(&frag, fFrags);
+	while (!feof(fFrags)) {
+		length = frag.length;
+		if (length >= MAXLENGTH)
+			length = MAXLENGTH - 1;
+		fprintf(stdout,
+				"d=%" PRId64 "\tx=%" PRIu64 "\ty=%" PRIu64 "\tL=%" PRIu64 "\tsco=%" PRIu64 "\tseqX=%" PRIu64 "\tseqY=%" PRIu64 "\tSIM=%lf\tident=%" PRIu64 "\tstrand=%c\tblock=%" PRIu64 ,
+				frag.diag, frag.xStart, frag.yStart, frag.length, frag.score, frag.seqX, frag.seqY,
+				frag.similarity, frag.ident, frag.strand,frag.block);
+		if (flagT) {
+			if (similarity > up[length])
+				zone = 2;
+			else if (similarity > low[length])
+				zone = 1;
+			else
+				zone = 0;
+			fprintf(stdout, "\tLow=%d\tUp=%d\tZone=%d", low[length], up[length],
+					zone);
+
+		}
+		fprintf(stdout, "\n");
+		nFrags++;
+		readFragment(&frag, fFrags);
+	}
+	fprintf(stdout, "nFrags:%" PRIu64 "\n", nFrags);
+
+	fclose(fFrags);
+
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/fragmentv2.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,124 @@
+/*
+ * fragmentv2.c
+ *
+ *  Created on: 17/07/2013
+ *      Author: jarjonamedina
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <math.h>
+
+
+#include <inttypes.h>
+#include "structs.h"
+#include "comparisonFunctions.h"
+
+#include "fragmentv2.h"
+
+//#include "error.h"
+
+
+int writeFragments (struct FragFile* f,char* s,int nf, uint64_t xtotal, uint64_t ytotal){
+/*
+	FILE* fs;
+	int n;
+
+	if((fs=fopen(s,"wb"))==NULL){
+		printf("***ERROR Opening output file");
+		exit(-1);
+	}
+
+//
+//	fwrite(&xtotal,sizeof(int),1,fs);
+//	fwrite(&ytotal,sizeof(int),1,fs);
+
+	fwrite(&xtotal,sizeof(uint64_t),1,fs);
+	fwrite(&ytotal,sizeof(uint64_t),1,fs);
+
+	for(n=0;n<nf;n++){
+		fwrite(&f[n],sizeof(Fragment),1,fs);
+//		printf("%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%d\t%c\n",f[n].xIni,f[n].yIni,f[n].xFin,f[n].yFin,f[n].length,f[n].ident,f[n].score,f[n].seqX,f[n].seqY,f[n].block,f[n].strand);
+	}
+	fclose(fs);
+*/
+return 0;
+
+}
+
+//
+struct FragFile* readFragmentsv2(char* s,int* nf,uint64_t *xtotal,uint64_t *ytotal){
+//Fragment* readFragments(char* s,int* nf,int *xtotal,int *ytotal){
+
+	FILE* fe;
+
+	struct FragFile* fs;
+	int n;
+
+	if((fe=fopen(s,"rb"))==NULL){
+		printf("***ERROR Opening input file");
+		exit(-1);
+	}
+
+/*
+	if((fsalida=fopen("fragmentosv2.txt","w"))==NULL){
+		printf("Fallo al leer Fragmentos");
+		exit(-1);
+	}
+
+*/
+	n=0;
+//	fread(xtotal,sizeof( uint64_t),1,fe);
+//	fread(ytotal,sizeof( uint64_t),1,fe);
+
+	readSequenceLength(xtotal, fe);
+	readSequenceLength(ytotal, fe);
+
+	long int longFile;
+	fseek(fe,0,SEEK_END);
+	longFile=ftell(fe);
+	n=(int)(longFile-2*sizeof(uint64_t))/sizeof(struct FragFile);
+//	n=(int)(longFile-2*sizeof(int))/sizeof(Fragment);
+	//printf("\n ReadFragments Complete\nnum: %d\n",n);
+
+	rewind(fe);
+
+	fs=(struct FragFile*)malloc(sizeof(struct FragFile)*(n+1));
+	if(fs==NULL){
+		printf("****ERROR: Out of memory\n");
+		exit(-1);
+	}
+	//*nf=n-1;
+	*nf=n;
+	n=0;
+
+//	fread(xtotal,sizeof( uint64_t),1,fe);
+//	fread(ytotal,sizeof( uint64_t),1,fe);
+
+
+	readSequenceLength(xtotal, fe);
+	readSequenceLength(ytotal, fe);
+
+
+//	while(!feof(fe)){
+	for(n=0;n<*nf;n++){
+		readFragment(&fs[n], fe);
+		//fprintf(stdout,"d=%" PRId64 "\tx=%" PRIu64 "\ty=%" PRIu64 "\tL=%" PRIu64 "\tsco=%" PRIu64 "\tseqX=%" PRIu64 "\tseqY=%" PRIu64 "\tSIM=%f" "\tstrand=%c\n",fs[n].diag, fs[n].xStart, fs[n].yStart, fs[n].length, fs[n].score, fs[n].seqX, fs[n].seqY,fs[n].similarity, fs[n].strand);
+//		fprintf(stdout,"%" PRId64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%c" "\t%" PRIu64 "\n",fs[n].xStart, fs[n].yStart, fs[n].xEnd, fs[n].yEnd, fs[n].length, fs[n].strand, fs[n].ident);
+
+//		n++;
+	}
+
+
+
+	fclose(fe);
+//	fclose(fsalida);
+	return fs;
+}
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/fragmentv2.h	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,19 @@
+/*
+ * fragmentv2.h
+ *
+ *  Created on: 17/07/2013
+ *      Author: jarjonamedina
+ */
+
+#ifndef FRAGMENTV2_H_
+#define FRAGMENTV2_H_
+#include <inttypes.h>
+#include "structs.h"
+
+
+struct FragFile* readFragmentsv2(char* s,int* nf,uint64_t *xtotal,uint64_t *ytotal);
+
+int writeFragments(struct FragFile* f,char* s,int nf,uint64_t  xtotal, uint64_t ytotal);
+
+
+#endif /* FRAGMENTV2_H_ */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/fragmentv3.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,98 @@
+/*
+ * Fragmentv3.c
+ *
+ *  Created on: 23/02/14
+ *      Author: jarjonamedina
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <math.h>
+
+#include "fragmentv3.h"
+//#include "error.h"
+
+
+int writeFragmentsv3 (Fragmentv3* f,char* s,int nf, int xtotal, int ytotal){
+
+	FILE* fs;
+	int n;
+
+	if((fs=fopen(s,"wb"))==NULL){
+		printf("***ERROR Opening output file");
+		exit(-1);
+	}
+
+//
+	fwrite(&xtotal,sizeof(int),1,fs);
+	fwrite(&ytotal,sizeof(int),1,fs);
+
+	for(n=0;n<nf;n++){
+		fwrite(&f[n],sizeof(Fragmentv3),1,fs);
+#ifdef debug
+//		printf("%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%d\t%c\n",f[n].xIni,f[n].yIni,f[n].xFin,f[n].yFin,f[n].length,f[n].ident,f[n].score,f[n].seqX,f[n].seqY,f[n].block,f[n].strand);
+#endif
+	}
+	fclose(fs);
+
+return 0;
+}
+
+//
+Fragmentv3* readFragmentsv3(char* s,int* nf,int *xtotal,int *ytotal){
+
+	FILE* fe;
+
+	Fragmentv3* fs;
+	int n;
+
+	if((fe=fopen(s,"rb"))==NULL){
+		printf("***ERROR Opening input file: %s\n",s);
+		exit(-1);
+	}
+
+
+	n=0;
+	fread(xtotal,sizeof(int),1,fe);
+	fread(ytotal,sizeof(int),1,fe);
+
+#ifdef debug
+//	printf("readFragments\nxIni \tyIni \txFin \tyFin \tlength \tident \tscore \tseqX \tseqY \tblock \tstrand\n");
+#endif
+
+	long int longFile;
+	fseek(fe,0,SEEK_END);
+	longFile=ftell(fe);
+	n=(int)(longFile-2*sizeof(int))/sizeof(Fragmentv3);
+//	printf("\n ReadFragments Complete\nnum: %d\n",n);
+	rewind(fe);
+
+	//Incorporamos fragmento punto y final
+	fs=(Fragmentv3*)malloc(sizeof(Fragmentv3)*(n));
+	if(fs==NULL){
+		printf("****ERROR: Out of memory\n");
+		exit(-1);
+	}
+
+	*nf=n;
+	n=0;
+
+	fread(xtotal,sizeof(int),1,fe);
+	fread(ytotal,sizeof(int),1,fe);
+
+	while(!feof(fe)){
+		fread(&fs[n],sizeof(Fragmentv3),1,fe);
+		n++;
+	}
+
+	fclose(fe);
+
+	return fs;
+}
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/fragmentv3.h	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,63 @@
+/*
+ * fragmentv3.h
+ *
+ *  Created on: 23/02/2014
+ *      Author: jarjonamedina
+ */
+
+#ifndef FRAGMENTV3_H_
+#define FRAGMENTV3_H_
+/*
+ typedef struct {
+    unsigned long xIni;
+    unsigned long yIni;
+    unsigned long xFin;
+    unsigned long yFin;
+    unsigned long diag;
+    unsigned long length;
+    unsigned long ident;
+    unsigned long score;
+    //float similarity;
+    unsigned long seqX; //sequence number in the 'X' file
+    unsigned long seqY; //sequence number in the 'Y' file
+    unsigned long block;            //synteny block id
+    char strand;        //'f' for the forward strain and 'r' for the reverse
+}Fragment;
+
+*/
+
+
+/* The new one*/
+
+typedef struct {
+    unsigned long diag;
+    unsigned long xIni;
+    unsigned long yIni;
+    unsigned long xFin;
+    unsigned long yFin;
+    unsigned long length;
+    unsigned long ident;
+    unsigned long score;
+    float similarity;
+    unsigned long seqX; //sequence number in the 'X' file
+    unsigned long seqY; //sequence number in the 'Y' file
+    int block;          //synteny block id
+    char strand;        //'f' for the forward strain and 'r' for the reverse
+		// New fields
+		long double pvalue;
+		float COVLocusX;
+		float COVLocusY;
+		// Modelling E.E.
+		int id;
+		int gap;
+		int translocations;
+		int reversions;
+		int duplications;
+		int events;
+}Fragmentv3;
+
+Fragmentv3* readFragmentsv3(char* s,int* nf,int *xtotal,int *ytotal);
+int writeFragmentsv3(Fragmentv3* f,char* s,int nf, int xtotal, int ytotal);
+
+
+#endif /* FRAGMENTV3_H_ */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/frags2text.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,311 @@
+/*
+ *
+ * Sintax: ./frags2text fragsFILE.frags fastaX fastaY fragsFILE.txt
+
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "comparisonFunctions.h"
+
+#define TAB_INSERT 70
+
+struct rIndex2 * loadReadsIndex(char * filename, uint64_t * nReads){
+	struct rIndex2 * RR;
+	uint64_t nR=0,i;
+	FILE *f;
+	uint64_t fsize;
+
+    if ((f=fopen(filename,"rb"))==NULL) terror("Could not open index input file");
+
+	fseeko(f, 0, SEEK_END);
+	fsize = ftello(f);
+	rewind(f);
+	nR = fsize/sizeof(struct rIndex2);
+
+    if ((RR =(struct rIndex2*) calloc(nR,sizeof(struct rIndex2)))==NULL) terror("Could not allocate index");
+
+	for (i=0; i<nR; i++){
+		if(0 == fread(&RR[i],sizeof(struct rIndex2),1,f)) break;
+	}
+	fclose(f);
+	(*nReads) = nR;
+	return RR;
+}
+
+void write_headers(FILE * f1, FILE * f2, FILE * fO, uint64_t pos1, uint64_t pos2, struct FragFile * f){
+	fseek(f1, pos1, SEEK_SET);
+	char c = fgetc(f1);
+	while(c != '\n'){ fprintf(fO, "%c", c); c = fgetc(f1); }
+	fprintf(fO, " ALIGNED WITH ");
+	fseek(f2, pos2, SEEK_SET);
+	c = fgetc(f2);
+	while(c != '\n'){ fprintf(fO, "%c", c); c = fgetc(f2); }
+	fprintf(fO, " LENGTH: %"PRIu64" IDENT: %"PRIu64" STRAND: %c @(%"PRIu64", %"PRIu64")\n", f->length, f->ident, f->strand, f->xStart, f->yStart);
+}
+
+void get_both_seqs(char * fastaX, char * fastaY, uint64_t iniX, uint64_t finX, uint64_t iniY, uint64_t finY, uint64_t posX, uint64_t posY, uint64_t LacX, uint64_t LacY, uint64_t lX, uint64_t lY, FILE * fO){
+	char copyX[TAB_INSERT+1];
+	char copyY[TAB_INSERT+1];
+	memset(copyX, 0x0, TAB_INSERT+1);
+	memset(copyY, 0x0, TAB_INSERT+1);
+	uint64_t atcopyX = 0;
+	uint64_t atcopyY = 0;
+	uint64_t i;
+
+	uint64_t pos_used_x, pos_used_y;
+
+	// The X one
+	//fseek(fastaX, posX, SEEK_SET);
+	pos_used_x = posX;
+	char cX = fastaX[pos_used_x++];
+	while(cX != '\n'){ cX = fastaX[pos_used_x++]; }
+	uint64_t gpX = iniX - LacX;
+	uint64_t currX = 0, tab_counter;
+	while(currX < gpX){
+		cX = fastaX[pos_used_x++];
+		if(cX != '\n') ++currX;
+	}
+
+	// the other Y
+	//fseek(fastaY, posY, SEEK_SET);
+	pos_used_y = posY;
+	char cY = fastaY[pos_used_y++];
+	while(cY != '\n'){ cY = fastaY[pos_used_y++]; }
+	uint64_t gpY = iniY - LacY;
+	uint64_t currY = 0;
+	while(currY < gpY){
+		cY = fastaY[pos_used_y++];
+		if(cY != '\n') ++currY;
+	}
+
+
+
+
+	// Reached the region to show
+	currX = 0;
+	currY = 0;
+	cX = fastaX[pos_used_x++];
+	cY = fastaY[pos_used_y++];
+	//fprintf(fO, "\t");
+	tab_counter = 0;
+	while(currX < lX && currY < lY){
+
+		if(cX == 'A' || cX == 'C' || cX == 'G' || cX == 'T' || cX == 'N'){ copyX[atcopyX++] = cX; ++currX; }
+		cX = fastaX[pos_used_x++];
+		while(cX != 'A' && cX != 'C' && cX != 'G' && cX != 'T' && cX != 'N') cX = fastaX[pos_used_x++];
+
+
+		if(cY == 'A' || cY == 'C' || cY == 'G' || cY == 'T' || cY == 'N'){ copyY[atcopyY++] = cY; ++currY; }
+		cY = fastaY[pos_used_y++];
+		while(cY != 'A' && cY != 'C' && cY != 'G' && cY != 'T' && cY != 'N') cY = fastaY[pos_used_y++];
+
+		while(currX > currY){
+			if(cY == 'A' || cY == 'C' || cY == 'G' || cY == 'T' || cY == 'N'){ copyY[atcopyY++] = cY; ++currY; }
+			cY = fastaY[pos_used_y++];
+			while(cY != 'A' && cY != 'C' && cY != 'G' && cY != 'T' && cY != 'N') cY = fastaY[pos_used_y++];
+		}
+		while(currX < currY){
+			if(cX == 'A' || cX == 'C' || cX == 'G' || cX == 'T' || cX == 'N'){ copyX[atcopyX++] = cX; ++currX; }
+			cX = fastaX[pos_used_x++];
+			while(cX != 'A' && cX != 'C' && cX != 'G' && cX != 'T' && cX != 'N') cX = fastaX[pos_used_x++];
+		}
+
+		++tab_counter;
+
+
+
+		if(tab_counter >= TAB_INSERT && currX == currY){
+
+			copyX[TAB_INSERT] = '\0';
+			copyY[TAB_INSERT] = '\0';
+			fprintf(fO, "X:\t%.*s\n\t", TAB_INSERT, copyX);
+			for(i=0; i<TAB_INSERT; i++){
+				if(copyX[i] == copyY[i]) fprintf(fO, "|"); else fprintf(fO, " ");
+			}
+			fprintf(fO, "\nY:\t%.*s\n\n", TAB_INSERT, copyY);
+			tab_counter = 0;
+			atcopyX = 0; atcopyY = 0;
+		}
+	}
+	if(atcopyX > 0){
+		copyX[atcopyX] = '\0';
+		copyY[atcopyY] = '\0';
+		fprintf(fO, "X:\t%.*s\n\t", (int)atcopyX, copyX);
+		for(i=0; i<atcopyX; i++){
+			if(copyX[i] == copyY[i]) fprintf(fO, "|"); else fprintf(fO, " ");
+		}
+		fprintf(fO, "\nY:\t%.*s\n\n", (int)atcopyY, copyY);
+		atcopyX = 0; atcopyY = 0;
+	}
+	fprintf(fO, "\n");
+}
+
+
+void get_seq_from_to(FILE * fasta, FILE * output, uint64_t ini, uint64_t fin, uint64_t pos, uint64_t Lac, uint64_t seqNum, uint64_t l, FILE * fO){
+	fseek(fasta, pos, SEEK_SET);
+	char c = fgetc(fasta);
+	while(c != '\n'){ c = fgetc(fasta); }
+	uint64_t gp = ini - Lac;
+	uint64_t curr = 0, tab_counter;
+	while(curr < gp){
+		c = fgetc(fasta);
+		if(c != '\n') ++curr;
+	}
+	// Reached the region to show
+	curr = 0;
+	c = fgetc(fasta);
+	fprintf(fO, "\t");
+	tab_counter = 0;
+	while(curr < l){
+		if(c != '\n') fprintf(fO, "%c", c);
+		c = fgetc(fasta);
+		if(c != '\n' || feof(fasta)){ ++curr; ++tab_counter; }
+		if(tab_counter == TAB_INSERT){ fprintf(fO, "\n\t"); tab_counter = 0;}
+	}
+	fprintf(fO, "\n");
+}
+
+
+void get_seq_from_to_rev(FILE * fasta, FILE * output, uint64_t ini, uint64_t fin, uint64_t pos, uint64_t Lac, uint64_t seqNum, uint64_t l, FILE * fO){
+	fseek(fasta, pos, SEEK_SET);
+	char c = fgetc(fasta);
+	while(c != '\n'){ c = fgetc(fasta); }
+	uint64_t gp = ini - Lac;
+	uint64_t curr = 0, tab_counter;
+	while(curr < gp){
+		c = fgetc(fasta);
+		if(c != '\n') ++curr;
+	}
+	// Reached the region to show
+	curr = 0;
+	c = fgetc(fasta);
+	fprintf(fO, "\t");
+	tab_counter = 0;
+	while(curr < l){
+		if(c != '\n') fprintf(fO, "%c", c);
+		c = fgetc(fasta);
+		if(c != '\n' || feof(fasta)){ ++curr; ++tab_counter; }
+		if(tab_counter == TAB_INSERT){ fprintf(fO, "\n\t"); tab_counter = 0;}
+	}
+	fprintf(fO, "\n");
+}
+
+
+
+int main(int ac, char** av) {
+	FILE* fFrags;
+	uint64_t n1, n2;
+	struct FragFile frag;
+
+	if (ac != 9)
+		terror("USE: ./frags2text fragsFILE.frags fastaX fastaY fastaYrev indexX indexY indexYrev fragsFILE.txt");
+
+	// prepared for multiple files
+	if ((fFrags = fopen(av[1], "rb")) == NULL)
+		terror("Opening Frags binary file");
+
+	// Open fastas
+
+	FILE * fX = NULL, * fY = NULL, * fYrev = NULL, * fO = NULL;
+	fX = fopen(av[2], "rt");
+	if(fX == NULL) terror("Could not open fasta X file");
+	fY = fopen(av[3], "rt");
+	if(fY == NULL) terror("Could not open fasta Y file");
+	fYrev = fopen(av[4], "rt");
+	if(fYrev == NULL) terror("Could not open fasta Y-rev file");
+
+
+	// Get file lengths
+    fseek(fX, 0, SEEK_END);
+    uint64_t aprox_lenX = ftell(fX);
+    rewind(fX);
+	char * strfastaX = (char *) malloc(aprox_lenX*sizeof(char));
+	fseek(fY, 0, SEEK_END);
+    uint64_t aprox_lenY = ftell(fY);
+    rewind(fY);
+	char * strfastaY = (char *) malloc(aprox_lenY*sizeof(char));
+	fseek(fYrev, 0, SEEK_END);
+    uint64_t aprox_lenYrev = ftell(fYrev);
+    rewind(fYrev);
+	char * strfastaYrev = (char *) malloc(aprox_lenYrev*sizeof(char));
+
+	if(strfastaX == NULL || strfastaY == NULL || strfastaYrev == NULL) terror("Could not allocate string sequences");
+
+	if(aprox_lenX != fread(strfastaX, sizeof(char), aprox_lenX, fX)) terror("Read wrong number of chars at X sequence");
+	if(aprox_lenY != fread(strfastaY, sizeof(char), aprox_lenY, fY)) terror("Read wrong number of chars at Y sequence");
+	if(aprox_lenYrev != fread(strfastaYrev, sizeof(char), aprox_lenYrev, fYrev)) terror("Read wrong number of chars at Y reversed sequence");
+
+
+	struct rIndex2 * RI_X, * RI_Y, * RI_Yrev;
+
+
+	uint64_t nReads_X, nReads_Y;
+	RI_X = loadReadsIndex(av[5], &nReads_X);
+	RI_Y = loadReadsIndex(av[6], &nReads_Y);
+	RI_Yrev = loadReadsIndex(av[7], &nReads_Y);
+
+	fO = fopen(av[8], "wt");
+	if(fO == NULL) terror("Could not open output alignments file");
+
+
+	readSequenceLength(&n1, fFrags);
+	readSequenceLength(&n2, fFrags);
+
+
+	readFragment(&frag, fFrags);
+
+	// RI_X 	is the forward index for fasta file X
+	// RI_Y 	is the forward index for fasta file Y
+	// RI_Yrev 	is the reverse index for fasta file Y
+
+
+
+	while (!feof(fFrags)) {
+
+		//RI[id].pos is position in file of >
+		//RI[id].Lac is the sum of the reads length prior
+
+		if(frag.strand == 'f'){
+			write_headers(fX, fY, fO, RI_X[frag.seqX].pos, RI_Y[frag.seqY].pos, &frag);
+		}else{
+			write_headers(fX, fYrev, fO, RI_X[frag.seqX].pos, RI_Yrev[nReads_Y - frag.seqY - 1].pos, &frag);
+		}
+
+
+		//get_seq_from_to(fX, fO, frag.xStart, frag.xEnd, RI_X[frag.seqX].pos, RI_X[frag.seqX].Lac, frag.seqX, frag.length, fO);
+
+
+
+		if(frag.strand == 'f'){
+			get_both_seqs(strfastaX, strfastaY, frag.xStart, frag.xEnd, frag.yStart, frag.yEnd, RI_X[frag.seqX].pos, RI_Y[frag.seqY].pos, RI_X[frag.seqX].Lac, RI_Y[frag.seqY].Lac, frag.length, frag.length, fO);
+			//get_seq_from_to(fY, fO, frag.yStart, frag.yEnd, RI_Y[frag.seqY].pos, RI_Y[frag.seqY].Lac, frag.seqY, frag.length, fO);
+		}else{
+			uint64_t seqYnew;
+			seqYnew = nReads_Y - frag.seqY - 1;
+			get_both_seqs(strfastaX, strfastaYrev, frag.xStart, frag.xEnd, frag.yStart, frag.yEnd, RI_X[frag.seqX].pos, RI_Yrev[seqYnew].pos, RI_X[frag.seqX].Lac, RI_Yrev[seqYnew].Lac, frag.length, frag.length, fO);
+			//get_seq_from_to_rev(fYrev, fO, frag.yStart, frag.yEnd, RI_Yrev[seqYnew].pos, RI_Yrev[seqYnew].Lac, seqYnew, frag.length, fO);
+		}
+
+		readFragment(&frag, fFrags);
+	}
+
+	fclose(fFrags);
+	fclose(fX);
+	fclose(fY);
+	fclose(fO);
+
+	free(RI_X);
+	free(RI_Y);
+	free(RI_Yrev);
+
+	free(strfastaX);
+	free(strfastaY);
+	free(strfastaYrev);
+
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/getCSB.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,556 @@
+/* getCSB.c
+
+
+
+*/
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <math.h>
+
+#include "fragmentv3.h"
+#include "fragmentv2.h"
+#include "lista.h"
+#define  MAX_LEVELS  900000
+
+/* Subprograms */
+
+void quickSortX(Fragmentv3 *f, int elements);
+void quickSortScore(Fragmentv3 *f, int elements);
+void quickSortY(Fragmentv3 *f, int elements, int ytotal);
+void reorder( Fragmentv3* f, int nf, int ytotal);
+
+void resetFrags(Fragmentv3* f,int nf);
+
+Lista ordenarXY(Lista *lista,char c);
+void ordenarLista(Lista *lista);
+
+int S(Fragmentv3 f); // It returns '1' for 'f' strand and '-1' for 'r' strand;
+int C(Fragmentv3 a, Fragmentv3 ci,Fragmentv3 cd, Fragmentv3 s); // Collinearity of ant, current, sig.
+int L(Fragmentv3 a, Fragmentv3 c); // Linear of ant and current
+int CSB(Lista *lista,Fragmentv3 *frags); // Join all posible CSB. return the number of CSB
+
+int NewList(Lista* lista,Fragmentv3* f,int nf);
+void saveCSB(Lista lista,int xtotal,int ytotal);
+void saveFragments(Fragmentv3* f,int nf,int xtotal,int ytotal);
+
+/********************/
+
+/* Globals */
+//int dist;
+//int size;
+char** AV;
+
+//int MOSTRAR=1;
+//int CONTADOR=0;
+
+// Constantes extensi�n de gap
+int INI=0;
+float EXT=3;
+
+
+
+
+int main(int ac,char** av){
+
+	AV=av;
+	if(ac<4){
+		printf("Use: getCSB fragsv3.fil csb.fv3 csb.txt \n");
+		exit(-1);
+	}
+	/***********************/
+	/* General vars */
+	int size_of_list;
+	//int i;
+
+
+	/* Read Fragments */
+		Fragmentv3* f;
+		int nf,xtotal,ytotal;
+		f=readFragmentsv3(av[1],&nf,&xtotal,&ytotal);
+
+
+	/* Reset frags */
+	resetFrags(f,nf);
+
+	/* Order frags */
+	reorder(f,nf,ytotal);
+
+	/* Creates a list */
+	Lista lista = NULL;
+	size_of_list=NewList(&lista,f,nf);
+//+++++++++++++++++++++++++++
+/************ START PROGRAM ***************************************************************/
+	int n=1;
+
+
+	// Get CSBs
+	if(size_of_list>1){
+		n=CSB(&lista,f);
+	}
+
+
+    saveFragments(f,nf,xtotal,ytotal);
+
+return 0;
+}
+
+
+
+
+
+
+
+
+/*************************/
+int NewList(Lista* lista,Fragmentv3* f,int nf){
+		int i;
+		int size_of_list=0;
+		int m;
+		m=0;
+		for(i=0;i<nf;i++){
+
+			if(f[i].block>=0){
+				size_of_list++;
+
+				f[i].block=i+1;
+				f[i].id=i;
+				Insertar(lista,m++,f[i],0);
+
+			}else{
+				f[i].id=-1;
+			}
+			//printf("%d\t%ld\t%ld\t%ld\t%d\n",i,f[i].id,f[i].score,f[i].length,f[i].block);
+
+		}
+		return size_of_list;
+	}
+/***************************************************/
+/***************************************************/
+void saveFragments(Fragmentv3 *frags,int nf,int xtotal,int ytotal){
+
+FILE* fs;
+	if((fs=fopen(AV[2],"wb"))==NULL){
+		printf("***ERROR Opening output file %s\n",AV[2]);
+		exit(-1);
+	}
+FILE* ftxt;
+	if((ftxt=fopen(AV[3],"w"))==NULL){
+		printf("***ERROR Opening output file %s\n",AV[3]);
+		exit(-1);
+	}
+//printf("abierto %s\n",av[4]);
+Fragmentv3 fragv3;
+
+	uint64_t xtotal64,ytotal64;
+	xtotal64 = xtotal;
+	ytotal64 = ytotal;
+	fwrite(&xtotal,sizeof(int),1,fs);
+	fwrite(&ytotal,sizeof(int),1,fs);
+//	fwrite(&xtotal,sizeof(int),1,fs);
+//	fwrite(&ytotal,sizeof(int),1,fs);
+
+
+
+
+	int i;
+	for(i=0;i<nf;i++){
+		fragv3=frags[i];
+		fwrite(&fragv3,sizeof(Fragmentv3),1,fs);
+		fprintf(ftxt,"%ld\t%ld\t%ld\t%ld\t%ld\t%c\t%ld\t%d\n",fragv3.xIni,fragv3.yIni,fragv3.xFin,fragv3.yFin,fragv3.length,fragv3.strand,fragv3.score,fragv3.block);
+
+
+	}
+
+	fclose(fs);
+	fclose(ftxt);
+
+	//getchar();
+}
+/****************/
+void saveCSB(Lista lista,int xtotal,int ytotal){
+
+FILE* fs;
+	if((fs=fopen(AV[2],"wb"))==NULL){
+		printf("***ERROR Opening output file %s\n",AV[2]);
+		exit(-1);
+	}
+FILE* ftxt;
+	if((ftxt=fopen(AV[3],"w"))==NULL){
+		printf("***ERROR Opening output file %s\n",AV[3]);
+		exit(-1);
+	}
+//printf("abierto %s\n",av[4]);
+Fragmentv3 fragv3;
+
+	uint64_t xtotal64,ytotal64;
+	xtotal64 = xtotal;
+	ytotal64 = ytotal;
+	fwrite(&xtotal,sizeof(int),1,fs);
+	fwrite(&ytotal,sizeof(int),1,fs);
+//	fwrite(&xtotal,sizeof(int),1,fs);
+//	fwrite(&ytotal,sizeof(int),1,fs);
+
+
+	pNodo nodo;
+	nodo=lista;
+	printf("-------------------------\n");
+	fprintf(ftxt,"xIni\tyIni\txFin\tyFin\tlength\tstrand\tscore\n");
+
+	while(nodo && nodo->siguiente){
+		fragv3 = nodo->f;
+		fwrite(&fragv3,sizeof(Fragmentv3),1,fs);
+		fprintf(ftxt,"%ld\t%ld\t%ld\t%ld\t%ld\t%c\t%ld\t%d\n",fragv3.xIni,fragv3.yIni,fragv3.xFin,fragv3.yFin,fragv3.length,fragv3.strand,fragv3.score,fragv3.block);
+
+		nodo= nodo->siguiente;
+	}
+
+	fclose(fs);
+	fclose(ftxt);
+	printf("------------FINJ-------------\n");
+	//getchar();
+}
+/***************************************************/
+int S(Fragmentv3 f){
+
+	if(f.strand=='f')return 1;
+	if(f.strand=='r')return -1;
+	return 0;
+}
+/***************************************************/
+int C(Fragmentv3 a, Fragmentv3 ci, Fragmentv3 cd, Fragmentv3 s){
+
+	if( L(a,ci) && L(cd,s) )return 1;
+	return 0;
+}
+/***************************************************/
+int L(Fragmentv3 a, Fragmentv3 c){
+
+	int A,C;
+	A=a.seqY;
+	C=c.seqY;
+	if( ( (A-S(a)) == (C-2*S(c)) ) && (S(c) == S(a))  )return 1;
+	return 0;
+}
+/***************************************************/
+int CSB(Lista *lista,Fragmentv3 *frags){
+	int n=1;
+
+
+	int penal=0;
+	int joined=1;
+
+	pNodo nodo;
+	while(joined){
+		joined = 0;
+		nodo = *lista;
+		while(!nodo->anterior)nodo=nodo->siguiente;
+		while(nodo->siguiente) {
+
+
+
+				if( L(nodo->anterior->f,nodo->f) ){
+
+					penal = INI + EXT*(abs( nodo->anterior->f.xFin- nodo->f.xIni)+abs( nodo->anterior->f.yFin- nodo->f.yIni));
+					//printf("penal:%d\n",penal);
+
+					nodo->anterior->f.xFin = nodo->f.xFin;
+					nodo->anterior->f.yFin = nodo->f.yFin;
+					//nodo->anterior->f.score = nodo->anterior->f.score + nodo->f.score;
+					nodo->anterior->f.score = nodo->anterior->f.score + nodo->f.score- penal;
+					nodo->anterior->f.length = abs(nodo->anterior->f.yFin - nodo->anterior->f.yIni);
+
+					nodo->anterior->f.seqY = nodo->f.seqY;
+					nodo->anterior->f.seqX = nodo->f.seqX;
+					//nodo->anterior->f.block=n;
+					//frags[nodo->f.id].block=n;
+					//frags[nodo->anterior->f.id].block=n;
+					frags[nodo->f.id].block=frags[nodo->anterior->f.id].block;
+
+
+
+					joined = 1;
+					Borrar(lista, nodo->valor);
+
+				}else{
+
+
+					nodo = nodo->siguiente;
+					n++;
+
+				}
+		}
+		// Para el ultimo
+		if( L(nodo->anterior->f,nodo->f) ){
+			penal = INI + EXT*(abs( nodo->anterior->f.xFin- nodo->f.xIni)+abs( nodo->anterior->f.yFin- nodo->f.yIni));
+			//printf("penal:%d\n",penal);
+
+			nodo->anterior->f.xFin = nodo->f.xFin;
+			nodo->anterior->f.yFin = nodo->f.yFin;
+			//nodo->anterior->f.score = nodo->anterior->f.score + nodo->f.score;
+			nodo->anterior->f.score = nodo->anterior->f.score + nodo->f.score- penal;
+			nodo->anterior->f.length = abs(nodo->anterior->f.yFin - nodo->anterior->f.yIni);
+
+			nodo->anterior->f.seqY = nodo->f.seqY;
+			nodo->anterior->f.seqX = nodo->f.seqX;
+
+			frags[nodo->f.id].block=frags[nodo->anterior->f.id].block;
+
+			joined = 1;
+			Borrar(lista, nodo->valor);
+		}
+		if(n>=1)ordenarLista(lista);
+	}
+	return n;
+}
+/***************************************************/
+Lista ordenarXY(Lista *lista,char c){
+
+	Lista nueva=NULL;
+	pNodo nodo= *lista;
+	nodo = *lista;
+/*******************************************/
+/* Primero buscamos el valor menor, para que sea el primero en insertarse */
+
+	int valor=0;
+	pNodo primero=nodo;
+
+	//while(nodo->anterior)nodo=nodo->anterior;
+	if(c=='x'){valor=nodo->f.seqX;primero=nodo;}
+	if(c=='y'){valor=nodo->f.seqY;primero=nodo;}
+
+
+	while(nodo){
+		if(c=='x'){
+			if(valor>=nodo->f.seqX && nodo->f.block>=0){
+				valor=nodo->f.seqX;
+				primero=nodo;
+			}
+
+		}else{
+			if(valor>=nodo->f.seqY && nodo->f.block>=0){
+				valor=nodo->f.seqY;
+				primero=nodo;
+			}
+
+		}
+		nodo=nodo->siguiente;
+	}
+//	if(c=='x'){printf("aqui primero->f.seqX: %d valor:%d\n",primero->f.seqX,valor);}
+//	if(c=='y'){printf("aqui primero->f.seqY: %d valor:%d\n",primero->f.seqY,valor);}
+
+
+	//Insertamos el primero
+	if(c=='x'){
+//		printf("X insertamos en la posicion %d\n",primero->f.seqX);
+		Insertar(&nueva,primero->f.seqX,primero->f,primero->n_event);
+		Borrar(lista,primero->valor);
+	}else{
+///		printf("Y insertamos en la posicion %d\n",primero->f.seqY);
+		Insertar(&nueva,primero->f.seqY,primero->f,primero->n_event);
+//		printf("ya hemos insertado\n");
+		Borrar(lista,primero->valor);
+	}
+
+/*******************************************/
+
+	nodo = *lista;
+	while(nodo->anterior)nodo=nodo->anterior;
+	while(nodo){
+		// Insertarmos en el orden de XY
+		if(c=='x'){
+			Insertar(&nueva,nodo->f.seqX,nodo->f,nodo->n_event);
+		}else{
+			Insertar(&nueva,nodo->f.seqY,nodo->f,nodo->n_event);
+
+		}
+		nodo = nodo->siguiente;
+	}
+
+
+	pNodo newNodo = nueva;
+	newNodo = nueva;
+	int i=0;
+	while(newNodo){
+		if(c=='x'){
+
+			newNodo->f.seqX=i++;
+		}else{
+			newNodo->f.seqY=i++;
+		}
+
+		newNodo = newNodo->siguiente;
+	}
+
+	return nueva;
+
+
+}
+/****************************************/
+void reorder( Fragmentv3* f, int nf, int ytotal){
+	int i;
+	int j;
+
+	/* Sort Y*/
+	quickSortY(&f[0],nf,ytotal);
+	j=0;
+	for(i=0;i<nf;i++){
+		if(f[i].block>=0)f[i].seqY=j++;
+	}
+
+
+	/* Sort X*/
+	quickSortX(&f[0],nf);
+	j=0;
+	for(i=0;i<nf;i++){
+		if(f[i].block>=0)f[i].seqX=j++;
+	}
+
+
+}
+/******/
+void resetFrags(Fragmentv3* f, int nf){
+int i;
+for(i=0;i<nf;i++){
+	f[i].gap=0;
+
+	f[i].events=0;
+	f[i].id=i;
+
+	f[i].reversions=0;
+
+}
+
+}
+/**** Ordenar lista ****/
+void ordenarLista(Lista *lista){
+
+			// Ordenamos
+			Lista nueva = NULL;
+			nueva=ordenarXY(lista,'y');
+			BorrarLista(lista);
+			*lista=nueva;
+
+
+
+			// Ahora por X
+			Lista nuevaX = NULL;
+			nuevaX=ordenarXY(lista,'x');
+			BorrarLista(lista);
+			*lista=nuevaX;
+}
+
+/****************************************/
+
+//  quickSort
+//
+//  This public-domain C implementation by Darel Rex Finley.
+//
+//  * This function assumes it is called with valid parameters.
+//
+//  * Example calls:
+//    quickSort(&myArray[0],5); // sorts elements 0, 1, 2, 3, and 4
+//    quickSort(&myArray[3],5); // sorts elements 3, 4, 5, 6, and 7
+void quickSortScore(Fragmentv3 *f, int elements) {
+
+
+
+
+  int  beg[MAX_LEVELS], end[MAX_LEVELS], i=0, L, R, swap ;
+	Fragmentv3 piv;
+  beg[0]=0; end[0]=elements;
+  while (i>=0) {
+    L=beg[i]; R=end[i]-1;
+    if (L<R) {
+      piv=f[L];
+      while (L<R) {
+        while (f[R].score>=piv.score && L<R) R--; if (L<R) f[L++]=f[R];
+        while (f[L].score<=piv.score && L<R) L++; if (L<R) f[R--]=f[L]; }
+      f[L]=piv; beg[i+1]=L+1; end[i+1]=end[i]; end[i++]=L;
+      if (end[i]-beg[i]>end[i-1]-beg[i-1]) {
+        swap=beg[i]; beg[i]=beg[i-1]; beg[i-1]=swap;
+        swap=end[i]; end[i]=end[i-1]; end[i-1]=swap; }}
+    else {
+      i--; }}}
+/*****************/
+void quickSortX(Fragmentv3 *f, int elements) {
+
+
+
+
+  int  beg[MAX_LEVELS], end[MAX_LEVELS], i=0, L, R, swap ;
+	Fragmentv3 piv;
+  beg[0]=0; end[0]=elements;
+  while (i>=0) {
+    L=beg[i]; R=end[i]-1;
+    if (L<R) {
+      piv=f[L];
+      while (L<R) {
+        while (f[R].xIni>=piv.xIni && L<R) R--; if (L<R) f[L++]=f[R];
+        while (f[L].xIni<=piv.xIni && L<R) L++; if (L<R) f[R--]=f[L]; }
+      f[L]=piv; beg[i+1]=L+1; end[i+1]=end[i]; end[i++]=L;
+      if (end[i]-beg[i]>end[i-1]-beg[i-1]) {
+        swap=beg[i]; beg[i]=beg[i-1]; beg[i-1]=swap;
+        swap=end[i]; end[i]=end[i-1]; end[i-1]=swap; }}
+    else {
+      i--; }}}
+/*****************/
+void quickSortY(Fragmentv3 *f, int elements,int ytotal) {
+
+
+
+  int  beg[MAX_LEVELS], end[MAX_LEVELS], i=0, L, R, swap ;
+	Fragmentv3 piv;
+
+/*
+// Change Y component
+long ytemp;
+	for(i=0;i<elements;i++){
+		if(f[i].strand=='r'){
+			//f[i].yIni=ytotal-f[i].yIni;
+			//f[i].yFin=ytotal-f[i].yFin;
+
+			ytemp=f[i].yIni;
+			f[i].yIni=f[i].yFin;
+			f[i].yFin=ytemp;
+		}
+	}
+
+*/
+  beg[0]=0; end[0]=elements;
+  while (i>=0) {
+    L=beg[i]; R=end[i]-1;
+    if (L<R) {
+      piv=f[L];
+      while (L<R) {
+        while (f[R].yIni>=piv.yIni && L<R) R--; if (L<R) f[L++]=f[R];
+        while (f[L].yIni<=piv.yIni && L<R) L++; if (L<R) f[R--]=f[L]; }
+      f[L]=piv; beg[i+1]=L+1; end[i+1]=end[i]; end[i++]=L;
+      if (end[i]-beg[i]>end[i-1]-beg[i-1]) {
+        swap=beg[i]; beg[i]=beg[i-1]; beg[i-1]=swap;
+        swap=end[i]; end[i]=end[i-1]; end[i-1]=swap; }}
+    else {
+      i--; }}
+
+// Change Y component
+/*
+	for(i=0;i<elements;i++){
+		if(f[i].strand=='r'){
+			ytemp=f[i].yIni;
+			f[i].yIni=f[i].yFin;
+			f[i].yFin=ytemp;
+
+			//f[i].yIni=ytotal-f[i].yIni;
+			//f[i].yFin=ytotal-f[i].yFin;
+
+
+		}
+	}
+*/
+
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/getInfoCSB.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,68 @@
+/*
+
+getInfoCSB.c
+
+This program takes a fragment file with CSB marked and extract CSB composition
+
+Use: getInfoCSB file.frags fragment_composition(0 no, 1 yes) \n
+
+example:
+./getInfo S3chr2R-S2chr2R-L200-S40-K8.csb.frags 1
+
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <math.h>
+
+#include "fragmentv3.h"
+#include "fragmentv2.h"
+
+#include "lista.h"
+#define  MAX_LEVELS  900000
+
+
+int main(int ac,char** av){
+
+
+	if(ac<2){
+		printf("Use: getInfo file.frags\n");
+		exit(-1);
+	}
+
+	// Read fragments
+	struct FragFile* f;
+	int nf; // Number of fragments
+	uint64_t xtotal,ytotal;
+	nf=0;
+	f=readFragmentsv2(av[1],&nf,&xtotal,&ytotal);
+
+	/******************************************/
+	int i;
+
+	// Print header. Frags file info
+	printf("Total CSB: 0\n");
+	printf("========================================================\n");
+
+	printf("Type,xStart,yStart,xEnd,yEnd,strand(f/r),block,length,score,ident,similarity,%%ident,SeqX,SeqY\n");
+
+	double likeness;
+
+	for(i=0;i<nf;i++){
+		//similarity=100.0 * (((double)f[i].score)/((double)f[i].length*4.0));
+		likeness=100.0 * (((double)f[i].ident)/((double)f[i].length));
+
+		if(f[i].strand=='r'){
+			f[i].yStart = ytotal - f[i].yStart - 1;
+			f[i].yEnd = ytotal - f[i].yEnd - 1;
+		}
+
+		printf("Frag,%"PRIu64",%"PRIu64",%"PRIu64",%"PRIu64",%c,%"PRIu64",%"PRIu64",%"PRIu64",%"PRIu64",%.2f,%.2f,%"PRIu64",%"PRIu64"\n",(uint64_t)f[i].xStart,(uint64_t)f[i].yStart,(uint64_t)f[i].xEnd,(uint64_t)f[i].yEnd,f[i].strand,(uint64_t)f[i].block,(uint64_t)f[i].length,(uint64_t)f[i].score,(uint64_t)f[i].ident,f[i].similarity,likeness,(uint64_t)f[i].seqX,(uint64_t)f[i].seqY);
+
+	}
+
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/hdStat.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,99 @@
+/* leehd read and displays the hash table from disk
+   Syntax: leehd prefixNameOUT
+
+    prefixNameOUT.h2dW  : index of words-Pos-Ocurrences
+    prefixNameOUT.h2dP  : positions
+    both must be available
+
+    Any char as third argument means "Verbose mode"
+    Feb.2012: computes word frequencies
+
+                              ortrelles@uma.es / Dic.2011
+    ---------------------------------------------------------*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <inttypes.h>
+
+#include "structs.h"
+#include "commonFunctions.h"
+#include "dictionaryFunctions.h"
+
+#define PEQ 1001
+
+int main(int ac, char** av){
+
+	char fname[1024], *W;
+	W=(char *)malloc(33*sizeof(char));
+	FILE *f1, *f2, *f3;
+	hashentry he;
+	uint64_t i=0;
+        location spos;
+	uint64_t nW=0,maxF=0, aveF=0;
+	int flagV=0;
+	int64_t freq[PEQ];
+
+	if(ac<2)terror("USE: leehd  prefixNameOUT [v=verbose]\n");
+	if (ac==3) flagV=1;
+	for (i=0;i<PEQ;i++) freq[i]=0;
+	sprintf(fname,"%s.d2hW",av[1]); // Words file (first level of hash table)
+	if ((f1 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dW file");
+	sprintf(fname,"%s.d2hP",av[1]); // Positions file
+	if ((f2 = fopen(fname,"rb"))==NULL) terror("opening prefix.h2dP file");
+
+	sprintf(fname,"%s.freq",av[1]); // output
+	if ((f3 = fopen(fname,"wt"))==NULL) terror("opening prefix.freq OUT file");
+
+	// kick-off
+	if(fread(&he,sizeof(hashentry),1,f1)!=1)
+		terror("Empty dictionary");
+
+        while(!feof(f1)){
+
+             if (flagV) {showWord(&he.w, W);fprintf(stdout, "%.32s", W);}
+             if (flagV) fprintf(stdout,"  : num=%-7" PRIu64 ":",he.num);
+		if (he.num>=PEQ) {
+			fprintf(f3, "%" PRIu64 "\t", he.num);
+			showWord(&he.w, W);
+			fprintf(f3, "%.32s", W);
+			fprintf(f3, "%" PRIu64 "\n", he.num);
+		}
+		else freq[he.num]++;
+	     	nW++;
+		if (he.num>maxF) maxF=he.num;
+		aveF+=he.num;
+
+             fseek(f2,0, he.pos);
+	     if (flagV) {
+
+             for (i=0;i<he.num;i++){
+	       if(fread(&spos,sizeof(location),1,f2)!=1)
+			terror("Error reading the word occurrences");
+               fprintf(stdout,"(%" PRIu64 ",%" PRIu64 ") ",spos.pos,spos.seq);
+             }
+             fprintf(stdout,"\n");
+	    }
+	     if(fread(&he,sizeof(hashentry),1,f1)!=1)
+		if(ferror(f1))
+			terror("Error reading a dictionary entry");
+        }
+	free(W);
+
+	fclose(f1);
+	fclose(f2);
+	// store PEQ freqs--------
+	fprintf(f3,"freqs of words that appear\nTimes\tnWords\n");
+	for (i=0;i<PEQ;i++)
+		if (freq[i]) fprintf(f3,"%" PRId64 "\t%" PRId64 "\n",i,freq[i]);
+
+	fclose(f3);
+	fprintf(stdout,"Num.Words=%" PRIu64 " MaxFreq=%" PRIu64 " TotRepeat=%" PRIu64 " AveragFreq=%f\n",nW,maxF,aveF, (float)aveF/(float)nW);
+
+	exit(0);
+}
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/hits.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,184 @@
+/* hits : determine HITS between two sequences based on their
+ dictionaries from disk
+ Syntax: hits prefixNameX prefixNameY Outfile
+
+ prefixNameX & Y :refers to *.d2hW : index of words-Pos-Ocurrences
+ and *.d2hP : words positions
+ Outfile is in the form of [Diag][posX][posY]
+
+ Jan.2012: define an I-O buffer to reduce disk activity
+ Feb.2012: - define a function to compare words instead of chars
+ - use static buffer for positions
+ - New parameter: FreqThre (frequency threshold) to avoid high
+ frequency words (word.freq> FreqThr are skipped)
+
+ Feb.6   : new parameter (same meaning as in w2hd): PrefixSize
+
+ May 22  : for long repetitions some of them will be skipped (the step is
+ computed as NREP / MaxREP
+
+ ortrelles@uma.es / Dic.2011
+ ---------------------------------------------------------*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "dictionaryFunctions.h"
+#include "comparisonFunctions.h"
+
+#define MAXBUF 10000000
+#define MaxREP 10000
+
+int main(int ac, char** av) {
+
+	char fname[1024];
+	int hitsInBuf = 0;
+	int i, j;
+	int comp;
+	int firstMatch = 0, endMatch = 0;
+	uint64_t freqThr;
+	uint64_t nHits = 0, wordMatches = 0;
+	int wSize;
+	int stepX, stepY;
+	long offsetWordBefore, offsetWordAfter, offsetLocationBefore, offsetLocationAfter;
+	FILE *fX1, *fX2, *fY1, *fY2, *fOut;
+
+	location *posX = NULL, *posY = NULL;
+	hashentry heX, heY;
+	hit *hBuf;
+
+	if (ac != 6)
+		terror("USE: hits prefixNameX prefixNameY  Outfile FreqThre PrefixSize");
+	freqThr = (uint64_t) atoi(av[4]);
+	wSize = atoi(av[5]);
+
+	// I-O buffer
+	if ((hBuf = (hit*) calloc(sizeof(hit), MAXBUF)) == NULL)
+		terror("HITS: memory for I-O buffer");
+	// word positions buffers
+	if ((posX = (location*) calloc(MAXBUF, sizeof(location))) == NULL)
+		terror("memory for posX buffer.. using MAXBUF=10MB");
+	if ((posY = (location*) calloc(MAXBUF, sizeof(location))) == NULL)
+		terror("memory for posY buffer.. using MAXBUF=10MB");
+
+	// Sequence X files
+	sprintf(fname, "%s.d2hW", av[1]);
+	if ((fX1 = fopen(fname, "rb")) == NULL)
+		terror("opening seqX.d2hW file");
+	sprintf(fname, "%s.d2hP", av[1]);
+	if ((fX2 = fopen(fname, "rb")) == NULL)
+		terror("opening seqX.d2hP file");
+
+	// Sequence Y files
+	sprintf(fname, "%s.d2hW", av[2]);
+	if ((fY1 = fopen(fname, "rb")) == NULL)
+		terror("opening seqY.d2hW file");
+	sprintf(fname, "%s.d2hP", av[2]);
+	if ((fY2 = fopen(fname, "rb")) == NULL)
+		terror("opening seqY.d2hP file");
+
+	// OUT file
+	if ((fOut = fopen(av[3], "wb")) == NULL)
+		terror("opening OUT file");
+
+	// kick-off
+	if (readHashEntry(&heX, fX1, freqThr) == -1)
+		terror("no hash (1)");
+	if (readHashEntry(&heY, fY1, freqThr) == -1)
+		terror("no hash (2)");
+
+	while (!feof(fX1) && !feof(fY1)) {
+
+		comp = wordcmp(&heX.w.b[0], &heY.w.b[0], wSize);
+		if (comp < 0) {
+			readHashEntry(&heX, fX1, freqThr);
+			//Save position of first missmatch after matches and rewind
+			if(firstMatch){
+				offsetWordAfter = ftell(fY1) - sizeof(hashentry);
+				offsetLocationAfter = ftell(fY2);
+				fseek(fY1, offsetWordBefore, SEEK_SET);
+				readHashEntry(&heY, fY1, freqThr);
+				fseek(fY2, offsetLocationBefore, SEEK_SET);
+				firstMatch = 0;
+				endMatch = 1;
+			}
+			continue;
+		}
+		if (comp > 0) {
+			//No more matches, go to the next word
+			if(endMatch){
+				fseek(fY1, offsetWordAfter, SEEK_SET);
+				fseek(fY2, offsetLocationAfter, SEEK_SET);
+				endMatch = 0;
+			}
+			readHashEntry(&heY, fY1, freqThr);
+			continue;
+		}
+
+		wordMatches++;
+
+		// Load word position for seqX
+		if(!firstMatch)loadWordOcurrences(heX, &posX, &fX2);
+
+		// Saving the offset of the first match
+		if(wSize < 32 && !firstMatch){
+			offsetWordBefore = ftell(fY1) - sizeof(hashentry);
+			offsetLocationBefore = ftell(fY2);
+			firstMatch = 1;
+		}
+
+		// Load word position for seqY
+		loadWordOcurrences(heY, &posY, &fY2);
+
+		// Hits-----------------------
+		if (heX.num > MaxREP)
+			stepX = heX.num / MaxREP;
+		else
+			stepX = 1;
+		if (heY.num > MaxREP)
+			stepY = heY.num / MaxREP;
+		else
+			stepY = 1;
+
+		for (i = 0; i < heX.num; i += stepX)
+			for (j = 0; j < heY.num; j += stepY) {
+				hBuf[hitsInBuf].diag = posX[i].pos - posY[j].pos;
+				hBuf[hitsInBuf].posX = posX[i].pos;
+				hBuf[hitsInBuf].seqX = posX[i].seq;
+				hBuf[hitsInBuf].posY = posY[j].pos;
+				hBuf[hitsInBuf].seqY = posY[j].seq;
+
+				hitsInBuf++;
+				if (hitsInBuf == MAXBUF - 1) {
+					fwrite(hBuf, sizeof(hit), hitsInBuf, fOut);
+					hitsInBuf = 0;
+				}
+			}
+
+		nHits += ((heX.num / stepX) * (heY.num / stepY));
+
+		if(!firstMatch)readHashEntry(&heX, fX1, freqThr);
+		readHashEntry(&heY, fY1, freqThr);
+
+	}
+
+	//Closing dictionary files
+	fclose(fX1);
+	fclose(fY1);
+	fclose(fX2);
+	fclose(fY2);
+
+	//Checking if there is something still at the buffer
+	if (hitsInBuf != 0) {
+		fwrite(hBuf, sizeof(hit), hitsInBuf, fOut);
+	}
+	fclose(fOut);
+
+	fprintf(stdout, "HITS: matches=%" PRIu64 " Tot HITS=%" PRIu64 "\n", wordMatches, nHits);
+
+	return 0;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/hitsStat.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,47 @@
+/* lee Hits file (diag/posX/posY)
+
+	Syntax: leeHits InputHITSFile
+   --------------------------------------------------------*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "commonFunctions.h"
+
+int main(int ac, char**av)
+{
+	FILE *f1;
+	hit H;
+	unsigned long nHits;
+	//---------------------------------
+
+	if (ac!=2)
+		terror("Use: LeeHits InputFile]");
+
+	if ((f1=fopen(av[1],"rb"))==NULL)
+		terror("Input file open error");
+
+	fseek(f1,0, SEEK_END);
+	nHits = ftell(f1)/sizeof(hit);
+	fprintf(stdout,"File size=%ld  nHits=%ld\n", nHits * sizeof(hit), nHits);
+	fprintf(stdout,"---------------[RET]\n");fgetc(stdin);
+	fseek(f1,0, SEEK_SET);
+
+	if(fread(&H,sizeof(hit),1,f1)!=1){
+		terror("Empty hits file");
+	}
+	while(!feof(f1)){
+		fprintf(stdout,"d=%-7" PRId64 " pX=%-7" PRIu64 " pY=%-7" PRIu64 " seqX=%-7" PRIu64 " seqY=%-7" PRIu64 "\n",H.diag,H.posX,H.posY,H.seqX,H.seqY);
+		if(fread(&H,sizeof(hit),1,f1)!=1){
+			if(ferror(f1))terror("Empty hits file");
+		}
+	}
+
+	fclose(f1);
+	return 0;
+
+}
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/indexmaker.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,133 @@
+
+/*********************
+File		indexmaker.c
+Author		Bitlab
+Description	Computes a series of statistics and stores them in natural order and sorted order
+
+USAGE		<sequences.fasta>	The fasta sequences
+			<output.index>		The output name for the two indexes
+ * ------------------------------------------*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "comparisonFunctions.h"
+#include "commonFunctions.h"
+
+
+void copyRead(struct rIndex2 *To, struct rIndex2 *From);
+void QsortRead(struct rIndex2 *array,uint64_t x, uint64_t y);
+
+int main(int ac, char **av) {
+
+	FILE   *f, *fOut;
+	struct rIndex2 R;//, *RR;
+	//uint64_t *positions;
+	uint64_t  c;
+	uint64_t tLen=0, tLmask=0, tN=0, nReads=0;
+	uint64_t maxRlen=0, maxRlenMasked=0, maxNonACGT=0;
+	uint64_t N=0, i;
+	//char tmp[MAXLID];
+
+	if (ac!=3) terror("USE: indexmaker <sequences.fasta> <output.index>");
+
+
+	if ((f=fopen(av[1],"rt"))==NULL) terror("Could not open sequences fasta file");
+
+	if ((fOut=fopen(av[2],"wb"))==NULL) terror("Could not open output file");
+
+    R.Lac=0;
+    R.rLen=0;
+    c=fgetc(f);
+    while (!feof(f)) {
+       	while(c!='>') c=(char)fgetc(f); // only first time
+		R.pos=ftell(f) - 1;
+		i=0;
+        c=getc(f);
+        if(i==0 && c== ' ') c=getc(f); //Handle sequences with a ' ' after the '>'
+        while(i< MAXLID && c!='\n' && c!=' ') {
+         	R.id[i++] = c;
+          	c=getc(f);
+        }
+        R.id[i]=0x00;
+
+        while(c!='\n') c=getc(f);
+
+		i=0;
+		R.rLen=0;
+		R.rLmasked=0;
+		R.nonACGT=0;
+
+		while(c!='>' && !feof(f)) {
+			if (c=='N' || c=='n') R.nonACGT++;
+			else if (c>96 && c<123) R.rLmasked++;
+		   	c=toupper(c);
+		  	if (c>64 && c<91) R.rLen++;
+		  	if (c>47 && c<58) R.rLen++; //Handling color space reads
+
+			c=(char)fgetc(f);
+		}
+		R.rNumber = N;
+		if (R.rLen > maxRlen) maxRlen=R.rLen;
+		if (R.rLmasked > maxRlenMasked) maxRlenMasked = R.rLmasked;
+		if (R.nonACGT  > maxNonACGT) maxNonACGT = R.nonACGT;
+
+		fwrite(&R,sizeof(struct rIndex2),1,fOut);
+
+		N++;
+
+		R.Lac +=R.rLen;
+		tLen  +=R.rLen;
+		tN    +=R.nonACGT;
+		tLmask+=R.rLmasked;
+
+		nReads++;
+
+	}
+
+	fclose(f);
+	fclose(fOut);
+
+	fprintf(stdout,"....done\n");
+	return 0;
+}
+
+void copyRead(struct rIndex2 *To, struct rIndex2 *From){
+
+    strcpy(To->id, From->id);
+    To->rNumber = From->rNumber;
+    To->rLen    = From->rLen;
+    To->rLmasked= From->rLmasked;
+    To->nonACGT = From->nonACGT;
+    To->pos     = From->pos;      //Start position of read
+    To->Lac     = From->Lac;      // accumulated length
+}
+
+
+void QsortRead(struct rIndex2 *array,uint64_t x, uint64_t y) {
+
+	struct rIndex2 pivote, aux;
+	uint64_t x1, y1;
+
+	copyRead(&pivote, &array[(x+y)/2]);
+	x1 = x;
+	y1 = y;
+
+ 	do{
+		while (strcmp(pivote.id, array[x1].id)>0) x1++;
+		while (strcmp(pivote.id, array[y1].id)<0) y1--;
+    	if (x1 < y1) {
+			copyRead(&aux,&array[x1]);
+			copyRead(&array[x1], &array[y1]);
+			copyRead(&array[y1], &aux);
+			x1++;
+			y1--;
+    	}else if (x1 == y1) x1++;
+   } while (x1 <=y1);
+
+  if (x<y1) QsortRead(array,x,y1);
+  if (x1<y) QsortRead(array,x1,y);
+ }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/lista.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,145 @@
+/* lista.c
+
+functions to handle a list
+
+
+
+*/
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <math.h>
+
+#include "lista.h"
+
+void Insertar(Lista *lista, int v,Fragmentv3 f,int n_event) {
+   pNodo nuevo, actual;
+
+   /* Crear un nodo nuevo */
+   nuevo = (pNodo)malloc(sizeof(tipoNodo));
+   nuevo->valor = v;
+   nuevo->f=f;
+   nuevo->n_event=n_event;
+
+   /* Colocamos actual en la primera posición de la lista */
+   actual = *lista;
+   if(actual) while(actual->anterior) actual = actual->anterior;
+
+   /* Si la lista está vacía o el primer miembro es mayor que el nuevo */
+   if(!actual || actual->valor > v) {
+      /* Añadimos la lista a continuación del nuevo nodo */
+      nuevo->siguiente = actual;
+      nuevo->anterior = NULL;
+      if(actual) actual->anterior = nuevo;
+      if(!*lista) *lista = nuevo;
+   }
+   else {
+      /* Avanzamos hasta el último elemento o hasta que el siguiente tenga
+         un valor mayor que v */
+      while(actual->siguiente && actual->siguiente->valor <= v)
+         actual = actual->siguiente;
+      /* Insertamos el nuevo nodo después del nodo anterior */
+      nuevo->siguiente = actual->siguiente;
+      actual->siguiente = nuevo;
+      nuevo->anterior = actual;
+      if(nuevo->siguiente) nuevo->siguiente->anterior = nuevo;
+   }
+}
+
+void Borrar(Lista *lista, int v) {
+   pNodo nodo;
+
+
+   /* Buscar el nodo de valor v */
+   nodo = *lista;
+   while(nodo && nodo->valor <v) nodo = nodo->siguiente;
+   while(nodo && nodo->valor > v) nodo = nodo->anterior;
+
+   /* El valor v no está en la lista */
+   if(!nodo || nodo->valor != v) return;
+
+   /* Borrar el nodo */
+   /* Si lista apunta al nodo que queremos borrar, apuntar a otro */
+   if(nodo == *lista){
+     if(nodo->anterior) {*lista = nodo->anterior;}
+     else {*lista = nodo->siguiente;}
+   }
+   if(nodo->anterior) /* no es el primer elemento */
+      nodo->anterior->siguiente = nodo->siguiente;
+   if(nodo->siguiente) /* no es el último nodo */
+      nodo->siguiente->anterior = nodo->anterior;
+   free(nodo);
+}
+void BorrarLista(Lista *lista) {
+   pNodo nodo, actual;
+
+   actual = *lista;
+   while(actual->anterior) actual = actual->anterior;
+
+   while(actual) {
+      nodo = actual;
+      actual = actual->siguiente;
+      free(nodo);
+   }
+   *lista = NULL;
+}
+
+void MostrarLista(Lista lista, int orden,char* nombre,int num) {
+   pNodo nodo = lista;
+	int i=0;
+   if(!lista) printf("Lista vacía");
+
+   nodo = lista;
+   if(orden == 1) {
+      while(nodo->anterior) nodo = nodo->anterior;
+     // printf("Orden ascendente: ");
+      while(nodo) {
+        // printf("%d -> ", nodo->valor);
+		printFrag(nodo->f,nombre,i++,num,nodo->n_event);
+         nodo = nodo->siguiente;
+      }
+   }
+   else {
+      while(nodo->siguiente) nodo = nodo->siguiente;
+     // printf("Orden descendente: ");
+      while(nodo) {
+        printFrag(nodo->f,nombre,i++,num,nodo->n_event);
+         nodo = nodo->anterior;
+      }
+   }
+
+   //printf("\n");
+}
+
+void printFrag(Fragmentv3 f,char* nombre,int i,int num,int n_event){
+
+
+	fprintf(NULL,"%d\t%s\t%d\t%ld\t%ld\t%ld\t%ld\t%f\t%d\t%d\t%d\t%d\t%d\t%c\t%d\t%d\t%d\t%d\t%d\n",i,nombre,(int)num,f.xIni,f.xFin,f.yIni,f.yFin,f.similarity,(int)f.id,(int)f.seqY,(int)f.seqX,(int)f.block,(int)f.gap,f.strand,(int)f.reversions,(int)f.translocations,(int)f.duplications,(int)f.events,(int)f.score);
+	printf("%ld\t%ld\t%d\n",f.seqY,f.seqX,n_event);
+
+
+}
+
+
+void CopiarListas(Lista* origen,Lista* dest){ // SUponemos las dos listas mismo tamaño
+
+	pNodo onodo = *origen;
+	pNodo dnodo = *dest;
+	while(onodo->anterior && dnodo->anterior) {onodo = onodo->anterior;dnodo = dnodo->anterior;}
+
+   while(onodo && dnodo) {
+   /*
+	dnodo->f=*(onodo->f);
+	dnodo->valor=*(onodo->valor);
+      */
+
+	  dnodo = dnodo->siguiente;
+      onodo = onodo->siguiente;
+
+   }
+
+
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/lista.h	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,33 @@
+/*lista.h
+
+
+*/
+
+
+#ifndef LISTA_H_
+#define LISTA_H_
+
+#include "fragmentv3.h"
+
+typedef struct _nodo {
+	Fragmentv3 f;
+   int valor;
+   int n_event;
+   struct _nodo *siguiente;
+   struct _nodo *anterior;
+} tipoNodo;
+
+typedef tipoNodo *pNodo;
+typedef tipoNodo *Lista;
+
+
+void Insertar(Lista *lista, int v,Fragmentv3 f,int n_event);
+void Borrar(Lista *lista, int v);
+void printFrag(Fragmentv3 f,char* nombre,int i,int num,int n_event);
+void BorrarLista(Lista *lista);
+void CopiarListas(Lista* origen,Lista* dest); // SUponemos las dos listas mismo tamaño
+
+
+
+
+#endif /* LISTA_H_ */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/matrix.mat	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,6 @@
+ACGT
+ACGT
+4	-4	-4	-4
+-4	4	-4	-4
+-4	-4	4	-4
+-4	-4	-4	4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/quicksort.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,392 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <stdint.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "quicksort.h"
+
+#define BUF_SIZE 1024
+#define SWAP(a,b,t) t=a; a=b; b=t;
+#define STRING_SIZE 1024
+
+typedef struct {
+	BaseType* a;
+	int l;
+	int r;
+	int nth;
+} PQSortArgs;
+
+typedef struct {
+	char fin1[STRING_SIZE];
+	char fin2[STRING_SIZE];
+	char fout[STRING_SIZE];
+} PMergeArgs;
+
+void assertNotNull(void* p, char* msg){
+	if(p==NULL){
+		terror(msg);
+	}
+}
+
+void assertIntEQ(int a, int b, char* msg){
+	if(a!=b){
+		terror(msg);
+	}
+}
+
+void assertIntGE(int a, int b, char* msg){
+	if(a<b){
+		terror(msg);
+	}
+}
+
+int bufMerge(BaseType* a1, int n1, BaseType* a2, int n2, BaseType* m){
+	int i1=0,i2=0;
+	int j=0;
+
+	while(i1<n1 && i2<n2){
+		if(GT(a1[i1],a2[i2])){
+			m[j]=a2[i2];
+			i2++;
+			j++;
+		}else{
+			m[j]=a1[i1];
+			i1++;
+			j++;
+		}
+	}
+
+	while(i1<n1){
+		m[j]=a1[i1];
+		i1++;
+		j++;
+	}
+
+	while(i2<n2){
+		m[j]=a2[i2];
+		i2++;
+		j++;
+	}
+
+	return j;
+}
+
+void *PMerge(void* a){
+	PMergeArgs* args = (PMergeArgs*)a;
+
+	FILE* fin1 = fopen(args->fin1,"rb");
+	assertNotNull(fin1,args->fin1);
+
+	FILE* fin2 = fopen(args->fin2,"rb");
+	assertNotNull(fin2,args->fin2);
+
+	FILE* fout = fopen(args->fout,"wb");
+	assertNotNull(fout,args->fout);
+
+	BaseType *a1 = (BaseType*)calloc(BUF_SIZE,sizeof(BaseType));
+	assertNotNull(a1,"calloc");
+
+	BaseType *a2 = (BaseType*)calloc(BUF_SIZE,sizeof(BaseType));
+	assertNotNull(a2,"calloc");
+
+	BaseType *m = (BaseType*)calloc(2*BUF_SIZE,sizeof(BaseType));
+	assertNotNull(m,"calloc");
+
+	int i,j,k;
+	int r1,r2,tmp;
+
+	r1=fread(a1,sizeof(BaseType),BUF_SIZE,fin1);
+	r2=fread(a2,sizeof(BaseType),BUF_SIZE,fin2);
+
+	i=j=k=0;
+	while(r1>0 && r2>0){
+		while(i<r1 && j<r2){
+			if(GT(a1[i],a2[j])){
+				m[k]=a2[j];
+				j++;
+			}else{
+				m[k]=a1[i];
+				i++;
+			}
+			k++;
+			if(k>=2*BUF_SIZE){
+				tmp=fwrite(m,sizeof(BaseType),k,fout);
+		    assertIntEQ(tmp,k,"fwrite");
+				k=0;
+			}
+		}
+
+		if(i>=r1){
+			r1=fread(a1,sizeof(BaseType),BUF_SIZE,fin1);
+			i=0;
+		}
+
+		if(j>=r2){
+			r2=fread(a2,sizeof(BaseType),BUF_SIZE,fin2);
+			j=0;
+		}
+	}
+
+	if(k>0){
+		tmp=fwrite(m,sizeof(BaseType),k,fout);
+		assertIntEQ(tmp,k,"fwrite");
+	}
+
+	if(i<r1){
+		tmp=fwrite(a1+i,sizeof(BaseType),r1-i,fout);
+		assertIntEQ(tmp,r1-i,"fwrite");
+	}
+
+	if(j<r2){
+		tmp=fwrite(a2+j,sizeof(BaseType),r2-j,fout);
+		assertIntEQ(tmp,r2-j,"fwrite");
+	}
+
+	assertIntEQ(fclose(fin1),0,"fclose");
+	assertIntEQ(fclose(fin2),0,"fclose");
+	assertIntEQ(fclose(fout),0,"fclose");
+
+	assertIntEQ(unlink(args->fin1),0,args->fin1);
+	assertIntEQ(unlink(args->fin2),0,args->fin2);
+
+	pthread_exit(NULL);
+}
+
+int partition(BaseType* a, int l, int r) {
+   int i=l;
+   int j=r+1;
+   BaseType t;
+
+   // l sera el pivote
+   // y contendra la mediana de l, r y (l+r)/2
+   int mid = (l+r)/2;
+
+   if(GT(a[mid],a[r])) {
+		 SWAP(a[mid],a[r],t);
+   }
+
+   if(GT(a[mid],a[l])) {
+		 SWAP(a[mid],a[l],t);
+   }
+
+   if(GT(a[l],a[r])) {
+		 SWAP(a[l],a[r],t);
+	 }
+
+	while (1) {
+		do{
+			++i;
+		}while( !GT(a[i],a[l]) && i <= r );
+
+		do{
+			--j;
+		}while( GT(a[j],a[l]) && j >= l);
+
+		if( i >= j ) break;
+
+		SWAP(a[i],a[j],t)
+	}
+
+	SWAP(a[l],a[j],t)
+
+	return j;
+}
+
+int QsortC(BaseType* a, int l,int r) {
+   int j;
+
+	if( l < r ) {
+ 	// divide and conquer
+       j = partition( a, l, r);
+       //  j=(l+r)/2;
+       QsortC( a, l, j-1);
+       QsortC( a, j+1, r);
+   }
+   return 0;
+}
+
+void *PQSort(void* a){
+	PQSortArgs *args=(PQSortArgs*)a;
+	if(args->nth>1){
+		int tmp;
+		int j = partition(args->a,args->l,args->r);
+		int np=1;
+		if(args->r - args->l > 0)
+			np = (args->nth*(j-args->l))/(args->r-args->l);
+		if(np<1) np=1;
+		if(np>=args->nth) np=args->nth-1;
+		//printf("%d\t%d (%d)\t%d (%d)\n",args->r-args->l,j-args->l,np,args->r-j,args->nth-np);
+
+		pthread_t* th = (pthread_t*)calloc(2,sizeof(pthread_t));
+		assertNotNull(th,"calloc");
+
+		PQSortArgs* nargs = (PQSortArgs*)calloc(2,sizeof(PQSortArgs));
+		assertNotNull(args,"calloc");
+
+		nargs[0].a=args->a;
+		nargs[0].l=args->l;
+		nargs[0].r=j-1;
+		nargs[0].nth=np;
+		tmp=pthread_create(th,NULL,PQSort,(void*)(nargs));
+		assertIntEQ(tmp,0,"pthread_create");
+
+		nargs[1].a=args->a;
+		nargs[1].l=j+1;
+		nargs[1].r=args->r;
+		nargs[1].nth=args->nth-np;
+		tmp=pthread_create(th+1,NULL,PQSort,(void*)(nargs+1));
+		assertIntEQ(tmp,0,"pthread_create");
+
+		tmp=pthread_join(th[0],NULL);
+		assertIntEQ(tmp,0,"pthread_join");
+		tmp=pthread_join(th[1],NULL);
+		assertIntEQ(tmp,0,"pthread_join");
+
+		free(th);
+		free(nargs);
+	}else{
+		QsortC(args->a,args->l,args->r);
+	}
+	pthread_exit(NULL);
+}
+
+unsigned long timestart(){
+	struct timeval tv;
+
+	gettimeofday(&tv,NULL);
+
+	return (tv.tv_usec/1000) + (tv.tv_sec*1000);
+}
+
+unsigned long timestop(unsigned long start){
+	struct timeval tv;
+
+	gettimeofday(&tv,NULL);
+
+	return (tv.tv_usec/1000) + (tv.tv_sec*1000) - start;
+}
+
+int psort(int maxsize, int nproc, char* ifile, char* ofile){
+	int tmp;
+	int max=maxsize;
+	int np=nproc;
+	if(np<1) np=1;
+
+	printf("Allocating %lu bytes.\n",max*sizeof(BaseType));
+	BaseType* a = (BaseType*)calloc(max,sizeof(BaseType));
+	assertNotNull(a,"calloc");
+
+	FILE* fin=fopen(ifile,"rt");
+	assertNotNull(fin,ifile);
+
+	printf("Stage1: Quicksorts\n");
+	unsigned long t = timestart();
+	//Read + Quicksort + Write:
+	char fname[strlen(ofile)+10];
+	int nfile=0;
+
+	do {
+		//Read:
+		// printf("Reading...\n");
+		int n=fread(a,sizeof(BaseType),max,fin);
+		if(n==0) break;
+
+		//Quicksort:
+		// printf("Quicksort %d\n",n);
+		pthread_t th;
+		PQSortArgs args;
+
+		args.a=a;
+		args.l=0;
+		args.r=n-1;
+		args.nth=np;
+		tmp=pthread_create(&th,NULL,PQSort,(void*)(&args));
+		assertIntEQ(tmp,0,"pthread_create");
+
+		//Wait:
+		tmp=pthread_join(th,NULL);
+		assertIntEQ(tmp,0,"pthread_join");
+
+		//Write:
+		// printf("Writing...\n");
+		sprintf(fname,"%s.%06d",ofile,nfile);
+		FILE* fout=fopen(fname,"wb");
+		assertNotNull(fout,"fname");
+
+		tmp=fwrite(a,sizeof(BaseType),n,fout);
+		assertIntEQ(tmp,n,"fwrite");
+
+		tmp=fclose(fout);
+		assertIntEQ(tmp,0,"fclose");
+
+		nfile++;
+	}while(!feof(fin));
+
+	tmp=fclose(fin);
+	assertIntEQ(tmp,0,"fclose");
+
+	free(a);
+
+	tmp=unlink(ifile);
+	assertIntEQ(tmp,0,"unlink");
+
+	printf("Stage1: %lu\n\n",timestop(t));
+
+	//Merge:
+	printf("Stage2: Merge\n");
+	t = timestart();
+
+	int curf=0;
+	int i=0;
+	int j=0;
+	pthread_t th[np];
+	PMergeArgs args[np];
+
+	curf=0;
+	while(nfile-curf>1){
+		j=0;
+		while(curf<nfile-1 && j<np){
+			sprintf(args[j].fin1,"%s.%06d",ofile,curf);
+			sprintf(args[j].fin2,"%s.%06d",ofile,curf+1);
+			sprintf(args[j].fout,"%s.%06d",ofile,nfile+j);
+
+			// printf("Merge(%d, %d) -> %d\n",curf,curf+1,nfile+j);
+			tmp=pthread_create(th+j,NULL,PMerge,(void*)(args+j));
+			assertIntEQ(tmp,0,"pthread_create");
+			curf+=2;
+			j++;
+		}
+
+		for(i=0;i<j;i++){
+			tmp=pthread_join(th[i],NULL);
+			assertIntEQ(tmp,0,"pthread_join");
+		}
+
+		nfile+=j;
+	}
+
+	printf("Stage2: %lu\n\n",timestop(t));
+
+	//Bin2Text:
+	printf("Stage3: Write output file\n");
+	t = timestart();
+
+	sprintf(fname,"%s.%06d",ofile,curf);
+	if(nfile>0){
+		tmp=rename(fname,ofile);
+		assertIntEQ(tmp,0,"rename");
+	} else {
+		FILE *fout=fopen(ofile, "wb");
+		assertNotNull(fout,"fopen");
+		fclose(fout);
+	}
+
+	printf("Stage3: %lu\n",timestop(t));
+
+	return 0;
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/quicksort.h	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,18 @@
+#ifndef QUICKSORT_H
+#define QUICKSORT_H
+
+/**
+ * Function to order the ifile using a buffer size of
+ * 'maxsize' and a thread number of 'nproc'. the result
+ * will be written to ofile
+ */
+int psort(int maxsize, int nproc, char* ifile, char* ofile);
+
+/**
+ * Function to determine if object 1 is strictly greater than 2.
+ * Returns 1 if true, 0 otherwise
+ * Has to be defined in the main procedure
+ */
+int GT(BaseType a1, BaseType a2);
+
+#endif /* QUICKSORT_H */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/reverseComplement.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,120 @@
+/* reverseComplement.c
+   Program reading a FASTA file as input (one or multiple sequence)
+   and then returning the reverse complementary sequence in the output file.
+
+   It is important to note that for multiple sequence file the first input
+   sequence is the last one at the output file and the last one in the input
+   is the first one in the output.
+   oscart@uma.es
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <inttypes.h>
+#include "commonFunctions.h"
+
+#define SEQSIZE 2000000000
+#define WSIZE 32
+#define NREADS 1000000
+
+int main(int ac, char** av) {
+	FILE *fIn, *fOut;
+	int64_t i, j, nR, seqLen = 0;
+	char *seq, c, toW;
+	long offset[NREADS];
+
+	if (ac != 3)
+		terror("USE: reverseComplement seqFile.IN reverseComplementarySeq.OUT");
+
+	/**
+	 * Allocating memory for the sequence
+	 * Fixed amount of memory, change it to loadSeqDB?
+	 */
+	if ((seq = (char*) malloc(SEQSIZE)) == NULL)
+		terror("memory for Seq");
+
+	if ((fIn = fopen(av[1], "rt")) == NULL)
+		terror("opening IN sequence FASTA file");
+
+	if ((fOut = fopen(av[2], "wt")) == NULL)
+		terror("opening OUT sequence Words file");
+
+	for(i=0;i<NREADS;i++){
+		offset[i]=0;
+	}
+
+	nR = 0;
+	c = fgetc(fIn);
+	while(!feof(fIn)){
+		if(c == '>'){
+			offset[nR++] = ftell(fIn)-1;
+		}
+		c = fgetc(fIn);
+	}
+
+	for(i=nR-1; i>=0; i--){
+		fseek(fIn, offset[i], SEEK_SET);
+		//READ and write header
+		if(fgets(seq, SEQSIZE, fIn)==NULL){
+			terror("Empty file");
+		}
+		fprintf(fOut, "%s", seq);
+		//READ and write sequence
+		c = fgetc(fIn);
+		while(c != '>' && !feof(fIn)){
+			if(isupper(c) || islower(c)){
+				seq[seqLen++]=c;
+			}
+			c = fgetc(fIn);
+		}
+		for(j=seqLen-1; j >= 0; j--){
+			switch(seq[j]){
+				case 'A':
+					toW = 'T';
+					break;
+				case 'C':
+					toW = 'G';
+					break;
+				case 'G':
+					toW = 'C';
+					break;
+				case 'T':
+					toW = 'A';
+					break;
+				case 'U':
+					toW = 'A';
+					break;
+				case 'a':
+					toW = 't';
+					break;
+				case 'c':
+					toW = 'g';
+					break;
+				case 'g':
+					toW = 'c';
+					break;
+				case 't':
+					toW = 'a';
+					break;
+				case 'u':
+					toW = 'a';
+					break;
+				default:
+					toW = seq[j];
+					break;
+			}
+			fwrite(&toW, sizeof(char), 1, fOut);
+		}
+		toW='\n';
+		fwrite(&toW, sizeof(char), 1, fOut);
+		seqLen=0;
+	}
+
+	fclose(fIn);
+	fclose(fOut);
+
+	return 0;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/sortHits.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,26 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "quicksort.h"
+
+int GT(BaseType a1, BaseType a2) {
+	if(a1.diag > a2.diag)
+		return 1;
+	else if (a1.diag < a2.diag)
+		return 0;
+	if (a1.posX > a2.posX)
+		return 1;
+
+	return 0;
+}
+
+int main(int ac, char** av) {
+	if (ac < 4) {
+		printf("USE: sortHits <max_size> <num_proc> <input_file> <output_file>\n");
+		exit(1);
+	}
+
+	return psort(atoi(av[1]), atoi(av[2]), av[3], av[4]);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/sortWords.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "quicksort.h"
+
+int GT(BaseType a1, BaseType a2){
+	if(a1.w.b[0] > a2.w.b[0]) return 1;
+	else if(a1.w.b[0] < a2.w.b[0]) return 0;
+
+	if(a1.w.b[1] > a2.w.b[1]) return 1;
+	else if(a1.w.b[1] < a2.w.b[1]) return 0;
+
+	if(a1.w.b[2] > a2.w.b[2]) return 1;
+	else if(a1.w.b[2] < a2.w.b[2]) return 0;
+
+	if(a1.w.b[3] > a2.w.b[3]) return 1;
+	else if(a1.w.b[3] < a2.w.b[3]) return 0;
+
+	if(a1.w.b[4] > a2.w.b[4]) return 1;
+	else if(a1.w.b[4] < a2.w.b[4]) return 0;
+
+	if(a1.w.b[5] > a2.w.b[5]) return 1;
+	else if(a1.w.b[5] < a2.w.b[5]) return 0;
+
+	if(a1.w.b[6] > a2.w.b[6]) return 1;
+	else if(a1.w.b[6] < a2.w.b[6]) return 0;
+
+	if(a1.w.b[7] > a2.w.b[7]) return 1;
+	else if(a1.w.b[7] < a2.w.b[7]) return 0;
+
+	if(a1.seq > a2.seq) return 1;
+	else if(a1.seq < a2.seq) return 0;
+
+	if(a1.pos > a2.pos) return 1;
+	return 0;
+}
+
+int main(int ac, char** av){
+	if(ac<4) {
+		terror("USE: sortWords <max_size> <num_proc> <input_file> <output_file>\n");
+	}
+
+	return psort(atoi(av[1]),atoi(av[2]),av[3],av[4]);
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/structs.h	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,130 @@
+#ifndef STRUCTS_H
+#define STRUCTS_H
+
+#include <inttypes.h>
+//Structs required for the dotplot workflow
+#define MAXLID 200
+#define MAXLS 1000000000
+#define READBUF 50000000 //50MB
+
+//Struct for words program
+typedef struct {
+	//Each letter is stored using 2 bits
+	//We have 4 letters per byte and a
+	//maximum of 32 in 'b'
+    unsigned char b[8];
+} word;
+
+//Struct for words and sort program
+typedef struct {
+	//Word compressed in binary format
+    word w;
+    //Ocurrence position in the sequence
+    uint64_t pos;
+    //For multiple sequence files this var
+    //reflects in what sequence occurs the
+    //word
+    uint64_t seq;
+} wentry;
+
+//Struct for w2hd program
+typedef struct {
+	//Word compressed in binary format
+    word w;
+    //Ocurrence position in the sequence
+    uint64_t pos;
+    //Number of ocurrences inside the
+    //sequence. This is used to know the
+    //number of locations stored in the
+    //positions file
+    uint64_t num;
+} hashentry;
+
+//Struct for w2hd program
+typedef struct {
+    //Ocurrence position in the sequence
+    uint64_t pos;
+    //For multiple sequence files this var
+    //reflects in what sequence occurs the
+    //word
+    uint64_t seq;
+} location;
+
+//Struct for hits, sortHits and filterHits programs
+typedef struct {
+	//Diagonal where the hit is located
+	//This value is calculated as:
+	//posX - posY
+	int64_t diag;
+    //Ocurrence position in sequence X
+    uint64_t posX;
+    //Ocurrence position in sequence Y
+    uint64_t posY;
+    //For multiple sequence files this var
+    //reflects in what sequence of X file
+    //occurs the word
+    uint64_t seqX;
+    //For multiple sequence files this var
+    //reflects in what sequence of Y file
+    //occurs the word
+    uint64_t seqY;
+} hit;
+
+//Struct for FragHits, af2png and leeFrag programs
+struct FragFile {
+	//Diagonal where the frag is located
+	//This value is calculated as:
+	//posX - posY
+    int64_t diag;
+    //Start position in sequence X
+    uint64_t xStart;
+    //Start position in Sequence Y
+    uint64_t yStart;
+    //End position in Sequence X
+    uint64_t xEnd;
+    //End position in Sequence Y
+    uint64_t yEnd;
+    //Fragment Length
+    //For ungaped aligment is:
+    //xEnd-xStart+1
+    uint64_t length;
+    //Number of identities in the
+    //fragment
+    uint64_t ident;
+    //Score of the fragment. This
+    //depends on the score matrix
+    //used
+    uint64_t score;
+    //Percentage of similarity. This
+    //is calculated as score/scoreMax
+    //Where score max is the maximum
+    //score possible
+    float similarity;
+    //sequence number in the 'X' file
+    uint64_t seqX;
+    //sequence number in the 'Y' file
+    uint64_t seqY;
+    //synteny block id
+    int64_t block;
+    //'f' for the forward strain and 'r' for the reverse
+    char strand;
+};
+
+//Struct for leeSeqDB function
+struct Sequence{
+    char ident[MAXLID+1];
+    char *datos;
+};
+
+//Struct for reads index tuple
+struct rIndex2 {
+    char id[MAXLID];
+    uint64_t  rNumber;
+    uint64_t  rLen;
+    uint64_t  rLmasked; //Masked positions
+    uint64_t  nonACGT;  //N's
+    uint64_t  pos;      //Start position of sequence
+    uint64_t  Lac;      //Accumulated length
+};
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/w2hd.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,81 @@
+/* creates a hash table in disk from a set of ordered words
+   Syntax: w2hd  wordsSort.In  prefixNameOUT
+
+    wordsSort is a bin file with Word-Pos-Seq
+    prefixNameOUT.h2dW  : index of words-Pos-Ocurrences
+    prefixNameOUT.h2dP  : positions(Pos+seq)
+
+    Feb.2011: add a new parameter: PrefixSize
+
+    PrefixSize: defines the word-prefix size to be used to identify when two
+		words are the "same"
+
+                              ortrelles@uma.es / Dic.2011
+    ---------------------------------------------------------*/
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "dictionaryFunctions.h"
+
+int main(int ac, char** av){
+
+	char fname[1024];
+	uint64_t nWords=0;
+	FILE* fw, *fOut1, *fOut2;
+
+	wentry we;
+	hashentry he;
+	location loc;
+
+	if(ac!=3)terror("USE: w2hd  wordsSort.In  prefixNameOUT\n");
+
+	if ((fw = fopen(av[1],"rb"))==NULL) terror("opening IN file");
+
+	sprintf(fname,"%s.d2hW",av[2]);
+	if ((fOut1 = fopen(fname,"wb"))==NULL) terror("opening prefix.d2hW file");
+	sprintf(fname,"%s.d2hP",av[2]);
+	if ((fOut2 = fopen(fname,"wb"))==NULL) terror("opening prefix.d2hP file");
+
+	if(fread(&we,sizeof(wentry),1,fw)!=1){
+		terror("empty words file");
+	}
+	memcpy(&he.w.b[0],&we.w.b[0],8);
+	he.pos=0;
+	he.num=0;
+
+	while(!feof(fw)){
+		  loc.pos=we.pos;
+		  loc.seq=we.seq;
+		  if (wordcmp(&he.w.b[0],&we.w.b[0],32)!=0) {
+			 fwrite(&he,sizeof(hashentry),1,fOut1);
+			 memcpy(&he.w.b[0],&we.w.b[0],8);
+			 he.pos=ftell(fOut2);
+			 he.num=0;
+		  }
+
+		  fwrite(&loc,sizeof(location),1,fOut2);
+		  he.num++;
+		  nWords++;
+
+		  if(fread(&we,sizeof(wentry),1,fw)!=sizeof(wentry)){
+			if(ferror(fw))terror("error reading words file");
+		  }
+	}
+
+	fwrite(&he,sizeof(hashentry),1,fOut1);
+
+	fprintf(stdout,"\nw2hd: %s tot words=%" PRIu64 "\n",av[1],nWords);
+
+	fclose(fOut1);
+	fclose(fOut2);
+	fclose(fw);
+
+	return 0;
+}
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/words.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,123 @@
+/* words.c
+ It is recommended to mask the Low-Complexity regions before using this program
+
+ This program generates a set of 32-mers for the given input sequence.
+
+ Usage: "./words seq.IN words.OUT
+ where seq.IN is a plain-text sequence
+ words.OUT is a binary file of "wentry" structures with the 32-mers
+ -----------------------------------------------------
+ oscart@uma.es
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "dictionaryFunctions.h"
+
+#define WORD_SIZE 32
+#define BYTES_IN_WORD 8
+
+void shift_word(word * w){
+	int i;
+	for(i=0;i<BYTES_IN_WORD-1;i++){
+		w->b[i]<<=2;
+		w->b[i]|=(w->b[i+1]>>6);
+	}
+	w->b[BYTES_IN_WORD-1]<<=2;
+}
+
+void main_FILE(char * inFile, char * outFile){
+	FILE *f;
+	FILE *f2;
+	char c;
+	char *seq = NULL;
+	uint64_t i = 0, r = 0;
+
+	if ((f=fopen(inFile,"rt"))==NULL){
+	perror("opening sequence file");
+	}
+	if ((f2=fopen(outFile,"wb"))==NULL) {
+		terror("opening OUT sequence Words file");
+	}
+	if ((seq = calloc(READBUF, sizeof(char))) == NULL) {
+		terror("not enough memory for read buffer");
+	}
+
+	//To force the read
+	i = READBUF + 1;
+
+	c = buffered_fgetc(seq, &i, &r, f);
+	while (c != '\n') {
+		c = buffered_fgetc(seq, &i, &r, f);
+	}
+
+	wentry WE;
+	WE.seq=0;
+	unsigned long index=0;
+	unsigned long inEntry=0;
+	unsigned long NW=0;
+	unsigned long Tot=0;
+	unsigned long NoACGT=0;
+	unsigned long NoC=0;
+
+	c = buffered_fgetc(seq, &i, &r, f);
+	while (!feof(f) || (feof(f) && i < r)){
+		if (!isupper(toupper(c))){
+			if(c=='>'){
+				c = buffered_fgetc(seq, &i, &r, f);
+				while (c != '\n')
+					c = buffered_fgetc(seq, &i, &r, f);
+				WE.seq++;
+				inEntry=0;
+				index++;
+			}
+			NoC++;
+			c = buffered_fgetc(seq, &i, &r, f);
+			continue;
+		}
+		shift_word(&WE.w);
+		switch (c) {
+			case 'A': inEntry++; break;
+			case 'C':
+				WE.w.b[BYTES_IN_WORD-1]|=1;
+				inEntry++;
+				break;
+			case 'G':
+				WE.w.b[BYTES_IN_WORD-1]|=2;
+				inEntry++;
+				break;
+			case 'T':
+				WE.w.b[BYTES_IN_WORD-1]|=3;
+				inEntry++;
+				break;
+			default :
+				inEntry=0; NoACGT++; break;
+		}
+		index++;
+		Tot++;
+		if(inEntry>=(unsigned long)WORD_SIZE){
+			WE.pos=index-WORD_SIZE;
+			NW++;
+			fwrite(&WE,sizeof(wentry),1,f2);
+		}
+		c = buffered_fgetc(seq, &i, &r, f);
+	}
+
+	fclose(f);
+
+}
+
+int main(int ac, char** av){
+	if(ac!=3){
+		terror("USE: words seqFile.IN words.OUT");
+	}
+	main_FILE(av[1], av[2]);
+	return 0;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gecko/src/wordsStat.c	Thu Dec 13 07:59:25 2018 -0500
@@ -0,0 +1,47 @@
+/* wordsStat.c
+
+ This program shows the position of each word and the
+ sequence it corresponds to
+ -----------------------------------------------------12.Nov.2012
+ oscart @ uma.es
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include "structs.h"
+#include "commonFunctions.h"
+#include "dictionaryFunctions.h"
+
+int main(int ac, char** av) {
+	FILE *f;
+	uint64_t i = 0;
+	wentry wordEntry;
+	char wordString[33];
+	wordString[32] = '\0';
+
+	if (ac != 2)
+		terror("USE: wordsStat words");
+
+	if ((f = fopen(av[1], "rt")) == NULL)
+		terror("opening words file");
+
+	if(fread(&wordEntry, sizeof(wentry), 1, f)!=1){
+		terror("Empty words file");
+	}
+	while (!feof(f)) {
+		showWord(&wordEntry.w, wordString);
+		fprintf(stdout, "Words(%" PRIu64 "):%s Pos: %" PRIu64 " Seq: %" PRIu64 "\n", i, wordString,
+				wordEntry.pos, wordEntry.seq);
+		if(fread(&wordEntry, sizeof(wentry), 1, f)!=1){
+			if(ferror(f))terror("Error reading words file");
+		}
+		i++;
+	}
+
+	fclose(f);
+
+	return 0;
+}
+