# HG changeset patch
# User drosofff
# Date 1432761555 14400
# Node ID b996480cd604701f39e60f728376602d4de52a06
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
diff -r 000000000000 -r b996480cd604 sRbowtieParser.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sRbowtieParser.py Wed May 27 17:19:15 2015 -0400
@@ -0,0 +1,57 @@
+#!/usr/bin/python
+# python parser module to analyse sRbowtie alignments
+# version 1.0.2 - argparse implementation
+# Usage sRbowtieParser.py <1:index source> <2:extraction directive> <3:outputL> <4:polarity> <5:6:7 filePath:FileExt:FileLabel> <.. ad lib>
+
+import sys, argparse
+from smRtools import *
+
+def masterListGenerator(data_source):
+ for filePath, FileExt, FileLabel in data_source:
+ yield HandleSmRNAwindows (filePath, FileExt, IndexSource, genomeRefFormat)
+
+def Parser():
+ the_parser = argparse.ArgumentParser()
+ the_parser.add_argument('--IndexSource', action="store", type=str, help="Path to the index source")
+ the_parser.add_argument('--ExtractDirective', action="store", type=str, choices=["fastaSource", "bowtieIndex"], help="Extract info from fasta or bowtie index")
+ the_parser.add_argument('--output', action="store", type=str, help="path to the output")
+ the_parser.add_argument('--polarity', choices=["forward", "reverse", "both"], help="forward, reverse or both forward an reverse reads are counted")
+ the_parser.add_argument('--alignmentSource',nargs='+', help="paths to alignments files")
+ the_parser.add_argument('--alignmentFormat',nargs='+', help="Format of the bowtie alignment (tabular, sam or bam)")
+ the_parser.add_argument('--alignmentLabel',nargs='+', help="Label of the alignment")
+ args = the_parser.parse_args()
+ return args
+
+args = Parser()
+
+IndexSource = args.IndexSource
+genomeRefFormat = args.ExtractDirective
+Output = args.output
+Polarity = args.polarity
+header = ["gene"]
+
+
+FileLabelList=[label for label in args.alignmentLabel]
+header.extend(FileLabelList)
+assert (len(FileLabelList)==len(set(FileLabelList))),"You have supplied a non-unique label. Please make sure that your input files have unique names"
+
+data_source=zip (args.alignmentSource, args.alignmentFormat, args.alignmentLabel)
+master_generator=masterListGenerator(data_source)
+
+for i,window in enumerate(master_generator):
+ window=window
+ if i==0:
+ gene_count_dict={gene:[str(item.readcount(polarity=Polarity))] for gene,item in window.instanceDict.items()}
+ else:
+ [gene_count_dict[gene].append(str(item.readcount(polarity=Polarity))) for gene,item in window.instanceDict.items()]
+
+
+F = open (args.output, "w")
+# print >>F, args
+print >> F, "\t".join(header)
+
+for item in sorted(gene_count_dict.keys()):
+ line=[item]
+ line.extend(gene_count_dict[item])
+ print >> F, "\t".join(line )
+F.close()
diff -r 000000000000 -r b996480cd604 sRbowtieParser.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sRbowtieParser.xml Wed May 27 17:19:15 2015 -0400
@@ -0,0 +1,87 @@
+
+
+
+ bowtie
+ samtools
+ pysam
+ numpy
+ scipy
+
+
+ sRbowtieParser.py
+ #if $refGenomeSource.genomeSource == "history":
+ --IndexSource $refGenomeSource.ownFile
+ --ExtractDirective fastaSource
+ #else:
+ #silent reference= filter( lambda x: str( x[0] ) == str( $input_list.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
+ --IndexSource $reference
+ --ExtractDirective bowtieIndex
+ #end if
+ --output $output
+ --polarity $polarity
+ --alignmentSource
+ #for $i in $refGenomeSource.input_list
+ $i
+ #end for
+ --alignmentFormat
+ #for $i in $refGenomeSource.input_list
+ $i.ext
+ #end for
+ --alignmentLabel
+ #for $i in $refGenomeSource.input_list
+ "$i.name"
+ #end for
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+Parses read counts from one or several sRBowtie alignments (in tabular, Sam or Bam format).
+Here a bowtie match done against an index composed of a set of items is parsed and expressed as a hit list of the corresponding items
+
+Sense, antisense or both sense and antisense alignments can be counted
+
+The library labels are infered from the input dataset names in the galaxy history.
+
+**It is thus essential that input datasets are appropriately renamed**
+
+**it is preferable that you do not put any space in this input dataset names. You may edit these names in the history**
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r b996480cd604 smRtools.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/smRtools.py Wed May 27 17:19:15 2015 -0400
@@ -0,0 +1,755 @@
+#!/usr/bin/python
+# version 1 7-5-2012 unification of the SmRNAwindow class
+
+import sys, subprocess
+from collections import defaultdict
+from numpy import mean, median, std
+from scipy import stats
+
+def get_fasta (index="/home/galaxy/galaxy-dist/bowtie/5.37_Dmel/5.37_Dmel"):
+ '''This function will return a dictionary containing fasta identifiers as keys and the
+ sequence as values. Index must be the path to a fasta file.'''
+ p = subprocess.Popen(args=["bowtie-inspect","-a", "0", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines
+ outputlines = p.stdout.readlines()
+ p.wait()
+ item_dic = {}
+ for line in outputlines:
+ if (line[0] == ">"):
+ try:
+ item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item
+ except: pass
+ current_item = line[1:].rstrip().split()[0] #take the first word before space because bowtie splits headers !
+ item_dic[current_item] = ""
+ stringlist=[]
+ else:
+ stringlist.append(line.rstrip() )
+ item_dic[current_item] = "".join(stringlist) # for the last item
+ return item_dic
+
+def get_fasta_headers (index):
+ p = subprocess.Popen(args=["bowtie-inspect","-n", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines
+ outputlines = p.stdout.readlines()
+ p.wait()
+ item_dic = {}
+ for line in outputlines:
+ header = line.rstrip().split()[0] #take the first word before space because bowtie splits headers !
+ item_dic[header] = 1
+ return item_dic
+
+
+def get_file_sample (file, numberoflines):
+ '''import random to use this function'''
+ F=open(file)
+ fullfile = F.read().splitlines()
+ F.close()
+ if len(fullfile) < numberoflines:
+ return "sample size exceeds file size"
+ return random.sample(fullfile, numberoflines)
+
+def get_fasta_from_history (file):
+ F = open (file, "r")
+ item_dic = {}
+ for line in F:
+ if (line[0] == ">"):
+ try:
+ item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item
+ except: pass
+ current_item = line[1:-1].split()[0] #take the first word before space because bowtie splits headers !
+ item_dic[current_item] = ""
+ stringlist=[]
+ else:
+ stringlist.append(line[:-1])
+ item_dic[current_item] = "".join(stringlist) # for the last item
+ return item_dic
+
+def antipara (sequence):
+ antidict = {"A":"T", "T":"A", "G":"C", "C":"G", "N":"N"}
+ revseq = sequence[::-1]
+ return "".join([antidict[i] for i in revseq])
+
+def RNAtranslate (sequence):
+ return "".join([i if i in "AGCN" else "U" for i in sequence])
+def DNAtranslate (sequence):
+ return "".join([i if i in "AGCN" else "T" for i in sequence])
+
+def RNAfold (sequence_list):
+ thestring= "\n".join(sequence_list)
+ p = subprocess.Popen(args=["RNAfold","--noPS"], stdin= subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ output=p.communicate(thestring)[0]
+ p.wait()
+ output=output.split("\n")
+ if not output[-1]: output = output[:-1] # nasty patch to remove last empty line
+ buffer=[]
+ for line in output:
+ if line[0] in ["N","A","T","U","G","C"]:
+ buffer.append(DNAtranslate(line))
+ if line[0] in ["(",".",")"]:
+ fields=line.split("(")
+ energy= fields[-1]
+ energy = energy[:-1] # remove the ) parenthesis
+ energy=float(energy)
+ buffer.append(str(energy))
+ return dict(zip(buffer[::2], buffer[1::2]))
+
+def extractsubinstance (start, end, instance):
+ ''' Testing whether this can be an function external to the class to save memory'''
+ subinstance = SmRNAwindow (instance.gene, instance.sequence[start-1:end], start)
+ subinstance.gene = "%s %s %s" % (subinstance.gene, subinstance.windowoffset, subinstance.windowoffset + subinstance.size - 1)
+ upcoordinate = [i for i in range(start,end+1) if instance.readDict.has_key(i) ]
+ downcoordinate = [-i for i in range(start,end+1) if instance.readDict.has_key(-i) ]
+ for i in upcoordinate:
+ subinstance.readDict[i]=instance.readDict[i]
+ for i in downcoordinate:
+ subinstance.readDict[i]=instance.readDict[i]
+ return subinstance
+
+class HandleSmRNAwindows:
+ def __init__(self, alignmentFile="~", alignmentFileFormat="tabular", genomeRefFile="~", genomeRefFormat="bowtieIndex", biosample="undetermined", size_inf=None, size_sup=1000, norm=1.0):
+ self.biosample = biosample
+ self.alignmentFile = alignmentFile
+ self.alignmentFileFormat = alignmentFileFormat # can be "tabular" or "sam"
+ self.genomeRefFile = genomeRefFile
+ self.genomeRefFormat = genomeRefFormat # can be "bowtieIndex" or "fastaSource"
+ self.alignedReads = 0
+ self.instanceDict = {}
+ self.size_inf=size_inf
+ self.size_sup=size_sup
+ self.norm=norm
+ if genomeRefFormat == "bowtieIndex":
+ self.itemDict = get_fasta (genomeRefFile)
+ elif genomeRefFormat == "fastaSource":
+ self.itemDict = get_fasta_from_history (genomeRefFile)
+ for item in self.itemDict:
+ self.instanceDict[item] = SmRNAwindow(item, sequence=self.itemDict[item], windowoffset=1, biosample=self.biosample, norm=self.norm) # create as many instances as there is items
+ self.readfile()
+
+ def readfile (self) :
+ if self.alignmentFileFormat == "tabular":
+ F = open (self.alignmentFile, "r")
+ for line in F:
+ fields = line.split()
+ polarity = fields[1]
+ gene = fields[2]
+ offset = int(fields[3])
+ size = len (fields[4])
+ if self.size_inf:
+ if (size>=self.size_inf and size<= self.size_sup):
+ self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
+ self.alignedReads += 1
+ else:
+ self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
+ self.alignedReads += 1
+ F.close()
+ return self.instanceDict
+# elif self.alignmentFileFormat == "sam":
+# F = open (self.alignmentFile, "r")
+# dict = {"0":"+", "16":"-"}
+# for line in F:
+# if line[0]=='@':
+# continue
+# fields = line.split()
+# if fields[2] == "*": continue
+# polarity = dict[fields[1]]
+# gene = fields[2]
+# offset = int(fields[3])
+# size = len (fields[9])
+# if self.size_inf:
+# if (size>=self.size_inf and size<= self.size_sup):
+# self.instanceDict[gene].addread (polarity, offset, size)
+# self.alignedReads += 1
+# else:
+# self.instanceDict[gene].addread (polarity, offset, size)
+# self.alignedReads += 1
+# F.close()
+ elif self.alignmentFileFormat == "bam" or self.alignmentFileFormat == "sam":
+ import pysam
+ samfile = pysam.Samfile(self.alignmentFile)
+ for read in samfile:
+ if read.tid == -1:
+ continue # filter out unaligned reads
+ if read.is_reverse:
+ polarity="-"
+ else:
+ polarity="+"
+ gene = samfile.getrname(read.tid)
+ offset = read.pos
+ size = read.qlen
+ if self.size_inf:
+ if (size>=self.size_inf and size<= self.size_sup):
+ self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
+ self.alignedReads += 1
+ else:
+ self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
+ self.alignedReads += 1
+ return self.instanceDict
+
+# def size_histogram (self):
+# size_dict={}
+# size_dict['F']= defaultdict (int)
+# size_dict['R']= defaultdict (int)
+# size_dict['both'] = defaultdict (int)
+# for item in self.instanceDict:
+# buffer_dict_F = self.instanceDict[item].size_histogram()['F']
+# buffer_dict_R = self.instanceDict[item].size_histogram()['R']
+# for size in buffer_dict_F:
+# size_dict['F'][size] += buffer_dict_F[size]
+# for size in buffer_dict_R:
+# size_dict['R'][size] -= buffer_dict_R[size]
+# allSizeKeys = list (set (size_dict['F'].keys() + size_dict['R'].keys() ) )
+# for size in allSizeKeys:
+# size_dict['both'][size] = size_dict['F'][size] + size_dict['R'][size]
+# return size_dict
+ def size_histogram (self): # in HandleSmRNAwindows
+ '''refactored on 7-9-2014 to debug size_histogram tool'''
+ size_dict={}
+ size_dict['F']= defaultdict (float)
+ size_dict['R']= defaultdict (float)
+ size_dict['both'] = defaultdict (float)
+ for item in self.instanceDict:
+ buffer_dict = self.instanceDict[item].size_histogram()
+ for polarity in ["F", "R"]:
+ for size in buffer_dict[polarity]:
+ size_dict[polarity][size] += buffer_dict[polarity][size]
+ for size in buffer_dict["both"]:
+ size_dict["both"][size] += buffer_dict["F"][size] - buffer_dict["R"][size]
+ return size_dict
+
+ def CountFeatures (self, GFF3="path/to/file"):
+ featureDict = defaultdict(int)
+ F = open (GFF3, "r")
+ for line in F:
+ if line[0] == "#": continue
+ fields = line[:-1].split()
+ chrom, feature, leftcoord, rightcoord, polarity = fields[0], fields[2], fields[3], fields[4], fields[6]
+ featureDict[feature] += self.instanceDict[chrom].readcount(upstream_coord=int(leftcoord), downstream_coord=int(rightcoord), polarity="both", method="destructive")
+ F.close()
+ return featureDict
+
+class SmRNAwindow:
+
+ def __init__(self, gene, sequence="ATGC", windowoffset=1, biosample="Undetermined", norm=1.0):
+ self.biosample = biosample
+ self.sequence = sequence
+ self.gene = gene
+ self.windowoffset = windowoffset
+ self.size = len(sequence)
+ self.readDict = defaultdict(list) # with a {+/-offset:[size1, size2, ...], ...}
+ self.matchedreadsUp = 0
+ self.matchedreadsDown = 0
+ self.norm=norm
+
+ def addread (self, polarity, offset, size):
+ '''ATTENTION ATTENTION ATTENTION'''
+ ''' We removed the conversion from 0 to 1 based offset, as we do this now during readparsing.'''
+ if polarity == "+":
+ self.readDict[offset].append(size)
+ self.matchedreadsUp += 1
+ else:
+ self.readDict[-(offset + size -1)].append(size)
+ self.matchedreadsDown += 1
+ return
+
+ def barycenter (self, upstream_coord=None, downstream_coord=None):
+ '''refactored 24-12-2013 to save memory and introduce offset filtering see readcount method for further discussion on that
+ In this version, attempt to replace the dictionary structure by a list of tupple to save memory too'''
+ upstream_coord = upstream_coord or self.windowoffset
+ downstream_coord = downstream_coord or self.windowoffset+self.size-1
+ window_size = downstream_coord - upstream_coord +1
+ def weigthAverage (TuppleList):
+ weightSum = 0
+ PonderWeightSum = 0
+ for tuple in TuppleList:
+ PonderWeightSum += tuple[0] * tuple[1]
+ weightSum += tuple[1]
+ if weightSum > 0:
+ return PonderWeightSum / float(weightSum)
+ else:
+ return 0
+ forwardTuppleList = [(k, len(self.readDict[k])) for k in self.readDict.keys() if (k > 0 and abs(k) >= upstream_coord and abs(k) <= downstream_coord)] # both forward and in the proper offset window
+ reverseTuppleList = [(-k, len(self.readDict[k])) for k in self.readDict.keys() if (k < 0 and abs(k) >= upstream_coord and abs(k) <= downstream_coord)] # both reverse and in the proper offset window
+ Fbarycenter = (weigthAverage (forwardTuppleList) - upstream_coord) / window_size
+ Rbarycenter = (weigthAverage (reverseTuppleList) - upstream_coord) / window_size
+ return Fbarycenter, Rbarycenter
+
+ def correlation_mapper (self, reference, window_size):
+ '''to map correlation with a sliding window 26-2-2013'''
+ if window_size > self.size:
+ return []
+ F=open(reference, "r")
+ reference_forward = []
+ reference_reverse = []
+ for line in F:
+ fields=line.split()
+ reference_forward.append(int(float(fields[1])))
+ reference_reverse.append(int(float(fields[2])))
+ F.close()
+ local_object_forward=[]
+ local_object_reverse=[]
+ ## Dict to list for the local object
+ for i in range(1, self.size+1):
+ local_object_forward.append(len(self.readDict[i]))
+ local_object_reverse.append(len(self.readDict[-i]))
+ ## start compiling results by slides
+ results=[]
+ for coordinate in range(self.size - window_size):
+ local_forward=local_object_forward[coordinate:coordinate + window_size]
+ local_reverse=local_object_reverse[coordinate:coordinate + window_size]
+ if sum(local_forward) == 0 or sum(local_reverse) == 0:
+ continue
+ try:
+ reference_to_local_cor_forward = stats.spearmanr(local_forward, reference_forward)
+ reference_to_local_cor_reverse = stats.spearmanr(local_reverse, reference_reverse)
+ if (reference_to_local_cor_forward[0] > 0.2 or reference_to_local_cor_reverse[0]>0.2):
+ results.append([coordinate+1, reference_to_local_cor_forward[0], reference_to_local_cor_reverse[0]])
+ except:
+ pass
+ return results
+
+ def readcount (self, size_inf=0, size_sup=1000, upstream_coord=None, downstream_coord=None, polarity="both", method="conservative"):
+ '''refactored 24-12-2013 to save memory and introduce offset filtering
+ take a look at the defaut parameters that cannot be defined relatively to the instance are they are defined before instanciation
+ the trick is to pass None and then test
+ polarity parameter can take "both", "forward" or "reverse" as value'''
+ upstream_coord = upstream_coord or self.windowoffset
+ downstream_coord = downstream_coord or self.windowoffset+self.size-1
+ if upstream_coord == 1 and downstream_coord == self.windowoffset+self.size-1 and polarity == "both":
+ return self.matchedreadsUp + self.matchedreadsDown
+ if upstream_coord == 1 and downstream_coord == self.windowoffset+self.size-1 and polarity == "forward":
+ return self.matchedreadsUp
+ if upstream_coord == 1 and downstream_coord == self.windowoffset+self.size-1 and polarity == "reverse":
+ return self.matchedreadsDown
+ n=0
+ if polarity == "both":
+ for offset in xrange(upstream_coord, downstream_coord+1):
+ if self.readDict.has_key(offset):
+ for read in self.readDict[offset]:
+ if (read>=size_inf and read<= size_sup):
+ n += 1
+ if method != "conservative":
+ del self.readDict[offset] ## Carefull ! precludes re-use on the self.readDict dictionary !!!!!! TEST
+ if self.readDict.has_key(-offset):
+ for read in self.readDict[-offset]:
+ if (read>=size_inf and read<= size_sup):
+ n += 1
+ if method != "conservative":
+ del self.readDict[-offset]
+ return n
+ elif polarity == "forward":
+ for offset in xrange(upstream_coord, downstream_coord+1):
+ if self.readDict.has_key(offset):
+ for read in self.readDict[offset]:
+ if (read>=size_inf and read<= size_sup):
+ n += 1
+ return n
+ elif polarity == "reverse":
+ for offset in xrange(upstream_coord, downstream_coord+1):
+ if self.readDict.has_key(-offset):
+ for read in self.readDict[-offset]:
+ if (read>=size_inf and read<= size_sup):
+ n += 1
+ return n
+
+ def readsizes (self):
+ '''return a dictionary of number of reads by size (the keys)'''
+ dicsize = {}
+ for offset in self.readDict:
+ for size in self.readDict[offset]:
+ dicsize[size] = dicsize.get(size, 0) + 1
+ for offset in range (min(dicsize.keys()), max(dicsize.keys())+1):
+ dicsize[size] = dicsize.get(size, 0) # to fill offsets with null values
+ return dicsize
+
+# def size_histogram(self):
+# norm=self.norm
+# hist_dict={}
+# hist_dict['F']={}
+# hist_dict['R']={}
+# for offset in self.readDict:
+# for size in self.readDict[offset]:
+# if offset < 0:
+# hist_dict['R'][size] = hist_dict['R'].get(size, 0) - 1*norm
+# else:
+# hist_dict['F'][size] = hist_dict['F'].get(size, 0) + 1*norm
+# ## patch to avoid missing graphs when parsed by R-lattice. 27-08-2014. Test and validate !
+# if not (hist_dict['F']) and (not hist_dict['R']):
+# hist_dict['F'][21] = 0
+# hist_dict['R'][21] = 0
+# ##
+# return hist_dict
+
+ def size_histogram(self, minquery=None, maxquery=None): # in SmRNAwindow
+ '''refactored on 7-9-2014 to debug size_histogram tool'''
+ norm=self.norm
+ size_dict={}
+ size_dict['F']= defaultdict (float)
+ size_dict['R']= defaultdict (float)
+ size_dict['both']= defaultdict (float)
+ for offset in self.readDict:
+ for size in self.readDict[offset]:
+ if offset < 0:
+ size_dict['R'][size] = size_dict['R'][size] - 1*norm
+ else:
+ size_dict['F'][size] = size_dict['F'][size] + 1*norm
+ ## patch to avoid missing graphs when parsed by R-lattice. 27-08-2014. Test and validate !
+ if not (size_dict['F']) and (not size_dict['R']):
+ size_dict['F'][21] = 0
+ size_dict['R'][21] = 0
+ ##
+ allSizeKeys = list (set (size_dict['F'].keys() + size_dict['R'].keys() ) )
+ for size in allSizeKeys:
+ size_dict['both'][size] = size_dict['F'][size] - size_dict['R'][size]
+ if minquery:
+ for polarity in size_dict.keys():
+ for size in xrange(minquery, maxquery+1):
+ if not size in size_dict[polarity].keys():
+ size_dict[polarity][size]=0
+ return size_dict
+
+ def statsizes (self, upstream_coord=None, downstream_coord=None):
+ ''' migration to memory saving by specifying possible subcoordinates
+ see the readcount method for further discussion'''
+ upstream_coord = upstream_coord or self.windowoffset
+ downstream_coord = downstream_coord or self.windowoffset+self.size-1
+ L = []
+ for offset in self.readDict:
+ if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
+ for size in self.readDict[offset]:
+ L.append(size)
+ meansize = mean(L)
+ stdv = std(L)
+ mediansize = median(L)
+ return meansize, mediansize, stdv
+
+ def foldEnergy (self, upstream_coord=None, downstream_coord=None):
+ ''' migration to memory saving by specifying possible subcoordinates
+ see the readcount method for further discussion'''
+ upstream_coord = upstream_coord or self.windowoffset
+ downstream_coord = downstream_coord or self.windowoffset+self.size-1
+ Energy = RNAfold ([self.sequence[upstream_coord-1:downstream_coord] ])
+ return float(Energy[self.sequence[upstream_coord-1:downstream_coord]])
+
+ def Ufreq (self, size_scope, upstream_coord=None, downstream_coord=None):
+ ''' migration to memory saving by specifying possible subcoordinates
+ see the readcount method for further discussion. size_scope must be an interable'''
+ upstream_coord = upstream_coord or self.windowoffset
+ downstream_coord = downstream_coord or self.windowoffset+self.size-1
+ freqDic = {"A":0,"T":0,"G":0,"C":0, "N":0}
+ convertDic = {"A":"T","T":"A","G":"C","C":"G","N":"N"}
+ for offset in self.readDict:
+ if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
+ for size in self.readDict[offset]:
+ if size in size_scope:
+ startbase = self.sequence[abs(offset)-self.windowoffset]
+ if offset < 0:
+ startbase = convertDic[startbase]
+ freqDic[startbase] += 1
+ base_sum = float ( sum( freqDic.values()) )
+ if base_sum == 0:
+ return "."
+ else:
+ return freqDic["T"] / base_sum * 100
+
+ def Ufreq_stranded (self, size_scope, upstream_coord=None, downstream_coord=None):
+ ''' migration to memory saving by specifying possible subcoordinates
+ see the readcount method for further discussion. size_scope must be an interable
+ This method is similar to the Ufreq method but take strandness into account'''
+ upstream_coord = upstream_coord or self.windowoffset
+ downstream_coord = downstream_coord or self.windowoffset+self.size-1
+ freqDic = {"Afor":0,"Tfor":0,"Gfor":0,"Cfor":0, "Nfor":0,"Arev":0,"Trev":0,"Grev":0,"Crev":0, "Nrev":0}
+ convertDic = {"A":"T","T":"A","G":"C","C":"G","N":"N"}
+ for offset in self.readDict:
+ if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
+ for size in self.readDict[offset]:
+ if size in size_scope:
+ startbase = self.sequence[abs(offset)-self.windowoffset]
+ if offset < 0:
+ startbase = convertDic[startbase]
+ freqDic[startbase+"rev"] += 1
+ else:
+ freqDic[startbase+"for"] += 1
+ forward_sum = float ( freqDic["Afor"]+freqDic["Tfor"]+freqDic["Gfor"]+freqDic["Cfor"]+freqDic["Nfor"])
+ reverse_sum = float ( freqDic["Arev"]+freqDic["Trev"]+freqDic["Grev"]+freqDic["Crev"]+freqDic["Nrev"])
+ if forward_sum == 0 and reverse_sum == 0:
+ return ". | ."
+ elif reverse_sum == 0:
+ return "%s | ." % (freqDic["Tfor"] / forward_sum * 100)
+ elif forward_sum == 0:
+ return ". | %s" % (freqDic["Trev"] / reverse_sum * 100)
+ else:
+ return "%s | %s" % (freqDic["Tfor"] / forward_sum * 100, freqDic["Trev"] / reverse_sum * 100)
+
+
+ def readplot (self):
+ norm=self.norm
+ readmap = {}
+ for offset in self.readDict.keys():
+ readmap[abs(offset)] = ( len(self.readDict.get(-abs(offset),[]))*norm , len(self.readDict.get(abs(offset),[]))*norm )
+ mylist = []
+ for offset in sorted(readmap):
+ if readmap[offset][1] != 0:
+ mylist.append("%s\t%s\t%s\t%s" % (self.gene, offset, readmap[offset][1], "F") )
+ if readmap[offset][0] != 0:
+ mylist.append("%s\t%s\t%s\t%s" % (self.gene, offset, -readmap[offset][0], "R") )
+ ## patch to avoid missing graphs when parsed by R-lattice. 27-08-2014. Test and validate !
+ if not mylist:
+ mylist.append("%s\t%s\t%s\t%s" % (self.gene, 1, 0, "F") )
+ ###
+ return mylist
+
+ def readcoverage (self, upstream_coord=None, downstream_coord=None, windowName=None):
+ '''Use by MirParser tool'''
+ upstream_coord = upstream_coord or 1
+ downstream_coord = downstream_coord or self.size
+ windowName = windowName or "%s_%s_%s" % (self.gene, upstream_coord, downstream_coord)
+ forORrev_coverage = dict ([(i,0) for i in xrange(1, downstream_coord-upstream_coord+1)])
+ totalforward = self.readcount(upstream_coord=upstream_coord, downstream_coord=downstream_coord, polarity="forward")
+ totalreverse = self.readcount(upstream_coord=upstream_coord, downstream_coord=downstream_coord, polarity="reverse")
+ if totalforward > totalreverse:
+ majorcoverage = "forward"
+ for offset in self.readDict.keys():
+ if (offset > 0) and ((offset-upstream_coord+1) in forORrev_coverage.keys() ):
+ for read in self.readDict[offset]:
+ for i in xrange(read):
+ try:
+ forORrev_coverage[offset-upstream_coord+1+i] += 1
+ except KeyError:
+ continue # a sense read may span over the downstream limit
+ else:
+ majorcoverage = "reverse"
+ for offset in self.readDict.keys():
+ if (offset < 0) and (-offset-upstream_coord+1 in forORrev_coverage.keys() ):
+ for read in self.readDict[offset]:
+ for i in xrange(read):
+ try:
+ forORrev_coverage[-offset-upstream_coord-i] += 1 ## positive coordinates in the instance, with + for forward coverage and - for reverse coverage
+ except KeyError:
+ continue # an antisense read may span over the upstream limit
+ output_list = []
+ maximum = max (forORrev_coverage.values()) or 1
+ for n in sorted (forORrev_coverage):
+ output_list.append("%s\t%s\t%s\t%s\t%s\t%s\t%s" % (self.biosample, windowName, n, float(n)/(downstream_coord-upstream_coord+1), forORrev_coverage[n], float(forORrev_coverage[n])/maximum, majorcoverage))
+ return "\n".join(output_list)
+
+
+ def signature (self, minquery, maxquery, mintarget, maxtarget, scope, zscore="no", upstream_coord=None, downstream_coord=None):
+ ''' migration to memory saving by specifying possible subcoordinates
+ see the readcount method for further discussion
+ scope must be a python iterable; scope define the *relative* offset range to be computed'''
+ upstream_coord = upstream_coord or self.windowoffset
+ downstream_coord = downstream_coord or self.windowoffset+self.size-1
+ query_range = range (minquery, maxquery+1)
+ target_range = range (mintarget, maxtarget+1)
+ Query_table = {}
+ Target_table = {}
+ frequency_table = dict ([(i, 0) for i in scope])
+ for offset in self.readDict:
+ if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
+ for size in self.readDict[offset]:
+ if size in query_range:
+ Query_table[offset] = Query_table.get(offset, 0) + 1
+ if size in target_range:
+ Target_table[offset] = Target_table.get(offset, 0) + 1
+ for offset in Query_table:
+ for i in scope:
+ frequency_table[i] += min(Query_table[offset], Target_table.get(-offset -i +1, 0))
+ if minquery==mintarget and maxquery==maxtarget: ## added to incorporate the division by 2 in the method (26/11/2013), see signature_options.py and lattice_signature.py
+ frequency_table = dict([(i,frequency_table[i]/2) for i in frequency_table])
+ if zscore == "yes":
+ z_mean = mean(frequency_table.values() )
+ z_std = std(frequency_table.values() )
+ if z_std == 0:
+ frequency_table = dict([(i,0) for i in frequency_table] )
+ else:
+ frequency_table = dict([(i, (frequency_table[i]- z_mean)/z_std) for i in frequency_table] )
+ return frequency_table
+
+ def hannon_signature (self, minquery, maxquery, mintarget, maxtarget, scope, upstream_coord=None, downstream_coord=None):
+ ''' migration to memory saving by specifying possible subcoordinates see the readcount method for further discussion
+ note that scope must be an iterable (a list or a tuple), which specifies the relative offsets that will be computed'''
+ upstream_coord = upstream_coord or self.windowoffset
+ downstream_coord = downstream_coord or self.windowoffset+self.size-1
+ query_range = range (minquery, maxquery+1)
+ target_range = range (mintarget, maxtarget+1)
+ Query_table = {}
+ Target_table = {}
+ Total_Query_Numb = 0
+ general_frequency_table = dict ([(i,0) for i in scope])
+ ## filtering the appropriate reads for the study
+ for offset in self.readDict:
+ if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
+ for size in self.readDict[offset]:
+ if size in query_range:
+ Query_table[offset] = Query_table.get(offset, 0) + 1
+ Total_Query_Numb += 1
+ if size in target_range:
+ Target_table[offset] = Target_table.get(offset, 0) + 1
+ for offset in Query_table:
+ frequency_table = dict ([(i,0) for i in scope])
+ number_of_targets = 0
+ for i in scope:
+ frequency_table[i] += Query_table[offset] * Target_table.get(-offset -i +1, 0)
+ number_of_targets += Target_table.get(-offset -i +1, 0)
+ for i in scope:
+ try:
+ general_frequency_table[i] += (1. / number_of_targets / Total_Query_Numb) * frequency_table[i]
+ except ZeroDivisionError :
+ continue
+ return general_frequency_table
+
+ def phasing (self, size_range, scope):
+ ''' to calculate autocorelation like signal - scope must be an python iterable'''
+ read_table = {}
+ total_read_number = 0
+ general_frequency_table = dict ([(i, 0) for i in scope])
+ ## read input filtering
+ for offset in self.readDict:
+ for size in self.readDict[offset]:
+ if size in size_range:
+ read_table[offset] = read_table.get(offset, 0) + 1
+ total_read_number += 1
+ ## per offset read phasing computing
+ for offset in read_table:
+ frequency_table = dict ([(i, 0) for i in scope]) # local frequency table
+ number_of_targets = 0
+ for i in scope:
+ if offset > 0:
+ frequency_table[i] += read_table[offset] * read_table.get(offset + i, 0)
+ number_of_targets += read_table.get(offset + i, 0)
+ else:
+ frequency_table[i] += read_table[offset] * read_table.get(offset - i, 0)
+ number_of_targets += read_table.get(offset - i, 0)
+ ## inclusion of local frequency table in the general frequency table (all offsets average)
+ for i in scope:
+ try:
+ general_frequency_table[i] += (1. / number_of_targets / total_read_number) * frequency_table[i]
+ except ZeroDivisionError :
+ continue
+ return general_frequency_table
+
+
+
+ def z_signature (self, minquery, maxquery, mintarget, maxtarget, scope):
+ '''Must do: from numpy import mean, std, to use this method; scope must be a python iterable and defines the relative offsets to compute'''
+ frequency_table = self.signature (minquery, maxquery, mintarget, maxtarget, scope)
+ z_table = {}
+ frequency_list = [frequency_table[i] for i in sorted (frequency_table)]
+ if std(frequency_list):
+ meanlist = mean(frequency_list)
+ stdlist = std(frequency_list)
+ z_list = [(i-meanlist)/stdlist for i in frequency_list]
+ return dict (zip (sorted(frequency_table), z_list) )
+ else:
+ return dict (zip (sorted(frequency_table), [0 for i in frequency_table]) )
+
+ def percent_signature (self, minquery, maxquery, mintarget, maxtarget, scope):
+ frequency_table = self.signature (minquery, maxquery, mintarget, maxtarget, scope)
+ total = float(sum ([self.readsizes().get(i,0) for i in set(range(minquery,maxquery)+range(mintarget,maxtarget))]) )
+ if total == 0:
+ return dict( [(i,0) for i in scope])
+ return dict( [(i, frequency_table[i]/total*100) for i in scope])
+
+ def pairer (self, overlap, minquery, maxquery, mintarget, maxtarget):
+ queryhash = defaultdict(list)
+ targethash = defaultdict(list)
+ query_range = range (int(minquery), int(maxquery)+1)
+ target_range = range (int(mintarget), int(maxtarget)+1)
+ paired_sequences = []
+ for offset in self.readDict: # selection of data
+ for size in self.readDict[offset]:
+ if size in query_range:
+ queryhash[offset].append(size)
+ if size in target_range:
+ targethash[offset].append(size)
+ for offset in queryhash:
+ if offset >= 0: matched_offset = -offset - overlap + 1
+ else: matched_offset = -offset - overlap + 1
+ if targethash[matched_offset]:
+ paired = min ( len(queryhash[offset]), len(targethash[matched_offset]) )
+ if offset >= 0:
+ for i in range (paired):
+ paired_sequences.append("+%s" % RNAtranslate ( self.sequence[offset:offset+queryhash[offset][i]]) )
+ paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-targethash[matched_offset][i]+1:-matched_offset+1]) ) )
+ if offset < 0:
+ for i in range (paired):
+ paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-queryhash[offset][i]+1:-offset+1]) ) )
+ paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+targethash[matched_offset][i]] ) )
+ return paired_sequences
+
+ def pairable (self, overlap, minquery, maxquery, mintarget, maxtarget):
+ queryhash = defaultdict(list)
+ targethash = defaultdict(list)
+ query_range = range (int(minquery), int(maxquery)+1)
+ target_range = range (int(mintarget), int(maxtarget)+1)
+ paired_sequences = []
+
+ for offset in self.readDict: # selection of data
+ for size in self.readDict[offset]:
+ if size in query_range:
+ queryhash[offset].append(size)
+ if size in target_range:
+ targethash[offset].append(size)
+
+ for offset in queryhash:
+ matched_offset = -offset - overlap + 1
+ if targethash[matched_offset]:
+ if offset >= 0:
+ for i in queryhash[offset]:
+ paired_sequences.append("+%s" % RNAtranslate (self.sequence[offset:offset+i]) )
+ for i in targethash[matched_offset]:
+ paired_sequences.append( "-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-i+1:-matched_offset+1]) ) )
+ if offset < 0:
+ for i in queryhash[offset]:
+ paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-i+1:-offset+1]) ) )
+ for i in targethash[matched_offset]:
+ paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+i] ) )
+ return paired_sequences
+
+ def newpairable_bowtie (self, overlap, minquery, maxquery, mintarget, maxtarget):
+ ''' revision of pairable on 3-12-2012, with focus on the offset shift problem (bowtie is 1-based cooordinates whereas python strings are 0-based coordinates'''
+ queryhash = defaultdict(list)
+ targethash = defaultdict(list)
+ query_range = range (int(minquery), int(maxquery)+1)
+ target_range = range (int(mintarget), int(maxtarget)+1)
+ bowtie_output = []
+
+ for offset in self.readDict: # selection of data
+ for size in self.readDict[offset]:
+ if size in query_range:
+ queryhash[offset].append(size)
+ if size in target_range:
+ targethash[offset].append(size)
+ counter = 0
+ for offset in queryhash:
+ matched_offset = -offset - overlap + 1
+ if targethash[matched_offset]:
+ if offset >= 0:
+ for i in queryhash[offset]:
+ counter += 1
+ bowtie_output.append("%s\t%s\t%s\t%s\t%s" % (counter, "+", self.gene, offset-1, self.sequence[offset-1:offset-1+i]) ) # attention a la base 1-0 de l'offset
+ if offset < 0:
+ for i in queryhash[offset]:
+ counter += 1
+ bowtie_output.append("%s\t%s\t%s\t%s\t%s" % (counter, "-", self.gene, -offset-i, self.sequence[-offset-i:-offset])) # attention a la base 1-0 de l'offset
+ return bowtie_output
+
+
+def __main__(bowtie_index_path, bowtie_output_path):
+ sequenceDic = get_fasta (bowtie_index_path)
+ objDic = {}
+ F = open (bowtie_output_path, "r") # F is the bowtie output taken as input
+ for line in F:
+ fields = line.split()
+ polarity = fields[1]
+ gene = fields[2]
+ offset = int(fields[3])
+ size = len (fields[4])
+ try:
+ objDic[gene].addread (polarity, offset, size)
+ except KeyError:
+ objDic[gene] = SmRNAwindow(gene, sequenceDic[gene])
+ objDic[gene].addread (polarity, offset, size)
+ F.close()
+ for gene in objDic:
+ print gene, objDic[gene].pairer(19,19,23,19,23)
+
+if __name__ == "__main__" : __main__(sys.argv[1], sys.argv[2])
diff -r 000000000000 -r b996480cd604 test-data/Read_Count_Lists.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Read_Count_Lists.tab Wed May 27 17:19:15 2015 -0400
@@ -0,0 +1,239 @@
+gene matchedSample_1 matchedSample_2
+dme-bantam 1 1
+dme-let-7 0 0
+dme-mir-1 72 0
+dme-mir-10 0 20
+dme-mir-100 0 1
+dme-mir-1000 0 0
+dme-mir-1001 0 0
+dme-mir-1002 0 0
+dme-mir-1003 0 0
+dme-mir-1004 0 0
+dme-mir-1005 0 0
+dme-mir-1006 0 0
+dme-mir-1007 0 0
+dme-mir-1008 0 0
+dme-mir-1009 0 0
+dme-mir-1010 0 0
+dme-mir-1011 0 0
+dme-mir-1012 0 0
+dme-mir-1013 0 0
+dme-mir-1014 0 0
+dme-mir-1015 0 0
+dme-mir-1016 0 0
+dme-mir-1017 0 0
+dme-mir-11 1 2
+dme-mir-12 2 0
+dme-mir-124 0 0
+dme-mir-125 0 1
+dme-mir-133 0 0
+dme-mir-137 0 0
+dme-mir-13a 0 0
+dme-mir-13b-1 0 0
+dme-mir-13b-2 0 0
+dme-mir-14 7 7
+dme-mir-184 146 111
+dme-mir-190 0 0
+dme-mir-193 0 0
+dme-mir-210 0 0
+dme-mir-219 0 0
+dme-mir-2279 0 0
+dme-mir-2280 0 0
+dme-mir-2281 0 0
+dme-mir-2282 0 0
+dme-mir-2283 0 0
+dme-mir-2489 0 0
+dme-mir-2490 0 0
+dme-mir-2491 0 0
+dme-mir-2492 0 0
+dme-mir-2493 0 0
+dme-mir-2494 0 0
+dme-mir-2495 0 0
+dme-mir-2496 0 0
+dme-mir-2497 0 0
+dme-mir-2498 0 0
+dme-mir-2499 0 0
+dme-mir-2500 0 0
+dme-mir-2501 0 0
+dme-mir-252 0 0
+dme-mir-2535b 0 0
+dme-mir-263a 12 2
+dme-mir-263b 0 0
+dme-mir-274 0 0
+dme-mir-275 7 0
+dme-mir-276a 7 5
+dme-mir-276b 1 0
+dme-mir-277 0 1
+dme-mir-278 0 0
+dme-mir-279 8 0
+dme-mir-280 0 0
+dme-mir-281-1 0 3
+dme-mir-281-2 1 28
+dme-mir-282 0 0
+dme-mir-283 0 0
+dme-mir-284 0 0
+dme-mir-285 0 0
+dme-mir-286 0 0
+dme-mir-287 0 0
+dme-mir-288 0 0
+dme-mir-289 0 0
+dme-mir-2a-1 1 0
+dme-mir-2a-2 0 0
+dme-mir-2b-1 0 0
+dme-mir-2b-2 0 1
+dme-mir-2c 0 0
+dme-mir-3 0 0
+dme-mir-303 0 0
+dme-mir-304 0 0
+dme-mir-305 10 2
+dme-mir-306 0 0
+dme-mir-307a 0 0
+dme-mir-307b 0 0
+dme-mir-308 0 0
+dme-mir-309 0 0
+dme-mir-310 0 0
+dme-mir-311 0 0
+dme-mir-312 1 0
+dme-mir-313 0 0
+dme-mir-314 0 0
+dme-mir-315 0 0
+dme-mir-316 1 0
+dme-mir-317 0 0
+dme-mir-318 1 0
+dme-mir-31a 2 22
+dme-mir-31b 1 0
+dme-mir-33 0 0
+dme-mir-34 0 0
+dme-mir-3641 0 0
+dme-mir-3642 0 0
+dme-mir-3643 0 0
+dme-mir-3644 0 0
+dme-mir-3645 0 0
+dme-mir-375 0 0
+dme-mir-4 0 0
+dme-mir-4908 0 0
+dme-mir-4909 0 0
+dme-mir-4910 0 0
+dme-mir-4911 0 0
+dme-mir-4912 0 0
+dme-mir-4913 0 0
+dme-mir-4914 0 0
+dme-mir-4915 0 0
+dme-mir-4916 0 0
+dme-mir-4917 0 0
+dme-mir-4918 0 0
+dme-mir-4919 0 0
+dme-mir-4939 0 0
+dme-mir-4940 0 0
+dme-mir-4941 0 0
+dme-mir-4942 0 0
+dme-mir-4943 0 0
+dme-mir-4944 0 0
+dme-mir-4945 0 0
+dme-mir-4946 0 0
+dme-mir-4947 0 0
+dme-mir-4948 0 0
+dme-mir-4949 0 0
+dme-mir-4950 0 0
+dme-mir-4951 0 0
+dme-mir-4952 0 0
+dme-mir-4953 0 0
+dme-mir-4954 0 0
+dme-mir-4955 0 0
+dme-mir-4956 0 0
+dme-mir-4957 0 0
+dme-mir-4958 0 0
+dme-mir-4959 0 0
+dme-mir-4960 0 0
+dme-mir-4961 0 0
+dme-mir-4962 0 0
+dme-mir-4963 0 0
+dme-mir-4964 0 0
+dme-mir-4965 0 0
+dme-mir-4966 0 0
+dme-mir-4967 0 0
+dme-mir-4968 0 0
+dme-mir-4969 0 0
+dme-mir-4970 0 0
+dme-mir-4971 0 0
+dme-mir-4972 0 0
+dme-mir-4973 0 0
+dme-mir-4974 0 0
+dme-mir-4975 0 0
+dme-mir-4976 0 0
+dme-mir-4977 0 0
+dme-mir-4978 0 0
+dme-mir-4979 0 0
+dme-mir-4980 0 0
+dme-mir-4981 0 0
+dme-mir-4982 0 0
+dme-mir-4983 0 0
+dme-mir-4984 0 0
+dme-mir-4985 0 0
+dme-mir-4986 0 0
+dme-mir-4987 0 0
+dme-mir-5 0 0
+dme-mir-6-1 0 0
+dme-mir-6-2 0 0
+dme-mir-6-3 0 0
+dme-mir-7 0 0
+dme-mir-79 7 0
+dme-mir-8 1 4
+dme-mir-87 0 0
+dme-mir-927 0 0
+dme-mir-929 0 0
+dme-mir-92a 1 0
+dme-mir-92b 2 32
+dme-mir-932 0 0
+dme-mir-954 0 0
+dme-mir-955 0 0
+dme-mir-956 0 51
+dme-mir-957 0 0
+dme-mir-958 0 0
+dme-mir-959 0 1
+dme-mir-960 1 0
+dme-mir-961 0 0
+dme-mir-962 0 0
+dme-mir-963 0 0
+dme-mir-964 0 0
+dme-mir-965 0 1
+dme-mir-966 0 0
+dme-mir-967 0 0
+dme-mir-968 0 0
+dme-mir-969 0 0
+dme-mir-970 0 0
+dme-mir-971 0 0
+dme-mir-972 0 0
+dme-mir-973 0 0
+dme-mir-974 0 0
+dme-mir-975 0 0
+dme-mir-976 0 0
+dme-mir-977 0 0
+dme-mir-978 0 0
+dme-mir-979 0 0
+dme-mir-980 0 0
+dme-mir-981 0 0
+dme-mir-982 1 0
+dme-mir-983-1 0 0
+dme-mir-983-2 0 0
+dme-mir-984 0 0
+dme-mir-985 0 0
+dme-mir-986 0 0
+dme-mir-987 0 0
+dme-mir-988 0 0
+dme-mir-989 2 1
+dme-mir-990 0 0
+dme-mir-991 0 1
+dme-mir-992 0 0
+dme-mir-993 0 0
+dme-mir-994 2 0
+dme-mir-995 0 1
+dme-mir-996 1 0
+dme-mir-997 0 0
+dme-mir-998 0 0
+dme-mir-999 0 1
+dme-mir-9a 0 0
+dme-mir-9b 0 0
+dme-mir-9c 0 0
+dme-mir-iab-4 0 0
+dme-mir-iab-8 0 0
diff -r 000000000000 -r b996480cd604 test-data/dme-mir-v20
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dme-mir-v20 Wed May 27 17:19:15 2015 -0400
@@ -0,0 +1,476 @@
+>dme-bantam
+ATTTGACTACGAAACCGGTTTTCGATTTGGTTTGACTGTTTTTCATACAAGTGAGATCATTTTGAAAGCTGATTTTGTCAA
+>dme-let-7
+TCTGGCAAATTGAGGTAGTAGGTTGTATAGTAGTAATTACACATCATACTATACAATGTGCTAGCTTTCTTTGCTTGA
+>dme-mir-1
+TTCAGCCTTTGAGAGTTCCATGCTTCCTTGCATTCAATAGTTATATTCAAGCATATGGAATGTAAAGAAGTATGGAGCGAAATCTGGCGAG
+>dme-mir-2a-1
+GCTGGGCTCTCAAAGTGGTTGTGAAATGCATTTCCGCTTTGCGCGGCATATCACAGCCAGCTTTGATGAGCTTAGC
+>dme-mir-2a-2
+ATCTAAGCCTCATCAAGTGGTTGTGATATGGATACCCAACGCATATCACAGCCAGCTTTGATGAGCTAGGAT
+>dme-mir-2b-1
+CTTCAACTGTCTTCAAAGTGGCAGTGACATGTTGTCAACAATATTCATATCACAGCCAGCTTTGAGGAGCGTTGCGG
+>dme-mir-2b-2
+TTGTGTCATTCTTCAAAGTGGTTGTGAAATGTTTGCCTTTTTATGCCTATTCATATCACAGCCAGCTTTGAGGAGCGACGCGA
+>dme-mir-2c
+TCGTATCTTACTTTCAATGTCATCAAAAAGGGCTGAAGAAAGATATTTCTGCATTTGAATCGTATCACAGCCAGCTTTGATGGGCATTGCAATGAGCAGCGA
+>dme-mir-3
+GATCCTGGGATGCATCTTGTGCAGTTATGTTTCAATCTCACATCACTGGGCAAAGTGTGTCTCAAGATC
+>dme-mir-4
+TTGCAATTAGTTTCTTTGGTCGTCCAGCCTTAGGTGATTTTTCCGGTCATAAAGCTAGACAACCATTGAAGTTCGTTGTGG
+>dme-mir-5
+GCTAAAAGGAACGATCGTTGTGATATGAGTTGTTTCCTAACATATCACAGTGATTTTCCTTTATAACGC
+>dme-mir-6-1
+TTTAATGTAGAGGGAATAGTTGCTGTGCTGTAAGTTAATATACCATATCTATATCACAGTGGCTGTTCTTTTTGTACCTAAA
+>dme-mir-6-2
+TAACCCAAGGGAACTTCTGCTGCTGATATATTATTGAAAAACTACTATATCACAGTGGCTGTTCTTTTTGGTTG
+>dme-mir-6-3
+CAAAAAGAAGGGAACGGTTGCTGATGATGTAGTTTGAAACTCTCACAATTTATATCACAGTGGCTGTTCTTTTTTGTTTG
+>dme-mir-7
+GAGTGCATTCCGTATGGAAGACTAGTGATTTTGTTGTTTGGTCTTTGGTAATAACAATAAATCCCTTGTCTTCTTACGGCGTGCATTT
+>dme-mir-8
+AAGGACATCTGTTCACATCTTACCGGGCAGCATTAGATCCTTTTTATAACTCTAATACTGTCAGGTAAAGATGTCGTCCGTGTCCTT
+>dme-mir-9a
+GCTATGTTGTCTTTGGTTATCTAGCTGTATGAGTGATAAATAACGTCATAAAGCTAGCTTACCGAAGTTAATATTAGC
+>dme-mir-9b
+TGCATATTATTTGCTCTTTGGTGATTTTAGCTGTATGGTGTTTATGTATATTCCATAGAGCTTTATTACCAAAAACCAAATGGTTTCTGCA
+>dme-mir-9c
+ATTTTTGCTGTTTCTTTGGTATTCTAGCTGTAGATTGTTTCACGCACATTGTATATCATCTAAAGCTTTTATACCAAAGCTCCAGCTTAAAT
+>dme-mir-10
+CCACGTCTACCCTGTAGATCCGAATTTGTTTTATACTAGCTTTAAGGACAAATTCGGTTCTAGAGAGGTTTGTGTGG
+>dme-mir-11
+GCACTTGTCAAGAACTTTCTCTGTGACCCGCGTGTACTTAAAAGCCGCATCACAGTCTGAGTTCTTGCTGAGTGC
+>dme-mir-12
+TACGGTTGAGTATTACATCAGGTACTGGTGTGCCTTAAATCCAACAACCAGTACTTATGTCATACTACGCCGTG
+>dme-mir-13a
+TACGTAACTCCTCAAAGGGTTGTGAAATGTCGACTATTATCTACTCATATCACAGCCATTTTGATGAGTTTCGTG
+>dme-mir-13b-1
+CCATGTCGTTAAAATGTTTGTGAACTTATGTATTCACAATCATATCACAGCCATTTTGACGAGTTTGG
+>dme-mir-13b-2
+TATTAACGCGTCAAAATGACTGTGAGCTATGTGGATTTGACTTCATATCACAGCCATTTTGACGAGTTTG
+>dme-mir-14
+TGTGGGAGCGAGACGGGGACTCACTGTGCTTATTAAATAGTCAGTCTTTTTCTCTCTCCTATA
+>dme-mir-31a
+TCCGTTGGTAAATTGGCAAGATGTCGGCATAGCTGACGTTGAAAAGCGATTTTGAAGAGCGCTATGCTGCATCTAGTCAGTTGTTCAATGGA
+>dme-mir-31b
+CAAATAATGAATTTGGCAAGATGTCGGAATAGCTGAGAGCACAGCGGATCGAACATTTTATCGTCCGAAAAAATGTGATTATTTTTGAAAAGCGGCTATGCCTCATCTAGTCAATTGCATTACTTTG
+>dme-mir-33
+CTCTTCCTCTGGAGATGACACGAAGGTGCATTGTAGTCGCATTGTCTGTCCCAATTGCTTCAGGCAATACAACTTCAGTGCAAGCTCTGTGCATTTCAC
+>dme-mir-34
+AATTGGCTATGCGCTTTGGCAGTGTGGTTAGCTGGTTGTGTAGCCAATTATTGCCGTTGACAATTCACAGCCACTATCTTCACTGCCGCCGCGACAAGC
+>dme-mir-79
+TGAAGCTGACTTGCCATTGCTTTGGCGCTTTAGCTGTATGATAGATTTAAACTACTTCATAAAGCTAGATTACCAAAGCATTGGCTTCTGCAGGTCA
+>dme-mir-87
+AACACATTTCATTCGCGCCTGTATCTTGCTGAACCGCTGCCATTATGGCCAACGATCCGGTTGAGCAAAATTTCAGGTGTGTGAGAAATGTGTTTAGCA
+>dme-mir-92a
+AATATGAATTTCCCGTAGGACGGGAAGGTGTCAACGTTTTGCATTTCGAATAAACATTGCACTTGTCCCGGCCTATGGGCGGTTTGTAATAAACA
+>dme-mir-92b
+TAAAACGTCACCTGATGTAGGCCGTGCCCAGTGCTTATTTGTTGCATTTTCGAAATACAAATTGCACTAGTCCCGGCCTGCAATGAGTGTCGCAGTCGAC
+>dme-mir-100
+CCATTAACAGAAACCCGTAAATCCGAACTTGTGCTGTTTTATATCTGTTACAAGACCGGCATTATGGGAGTCTGTCAATGCAAACAACTGGTTTTTGGCA
+>dme-mir-124
+TCATTTGGTACGTTTTTCTCCTGGTATCCACTGTAGGCCTATATGTATTTCCACCATAAGGCACGCGGTGAATGCCAAGAGCGAACGCAGTTCTACAAAT
+>dme-mir-125
+GACATGTGCAAATGTTTGTATGGCTGATTCCCTGAGACCCTAACTTGTGACTTTTAATACCAGTTTCACAAGTTTTGATCTCCGGTATTGGACGCAAACTTGCTGATGTT
+>dme-mir-133
+ACCTGCAACACTGTGTGTAGCTGGTTGACATCGGGTCAGATCTGTTTTTCAAGCATTTGGTCCCCTTCAACCAGCTGTAGCCAGTGGTTGATGACAAC
+>dme-mir-137
+CAATCTCCAATGGCCACGTGTATGCTCGTAGCTATAACCTGAAATCCAAATGTTATTGCTTGAGAATACACGTAGTTCACCGAGATTTGTT
+>dme-mir-184
+GGTTGGCCGGTGCATTCGTACCCTTATCATTCTCTCGCCCCGTGTGCACTTAAAGACAACTGGACGGAGAACTGATAAGGGCTCGTATCACCAATTCATC
+>dme-mir-190
+CGAACTAATTGATGGTTCCAGTGAGATATGTTTGATATTCTTGGTTGTTTCATTCAAAAGTTCACCCAGGAATCAAACATATTATTACTGTGACCCTCGC
+>dme-mir-193
+TGTGTGCCCTTATTATGGTTGGGATTTTTTAGATCAGCAGTTATTGCTATATAGCCATATTTATAAATCTTCTACTGGCCTACTAAGTCCCAACATAATGAGAGTAAA
+>dme-mir-210
+AAAGGTGCTTATTGCAGCTGCTGGCCACTGCACAAGATTAGACTTAAGACTCTTGTGCGTGTGACAGCGGCTATTGTAAGAGGCCATAGAAGCAACAGCC
+>dme-mir-219
+TAATTCGATTTTTAGCTATGATTGTCCAAACGCAATTCTTGTTGATATTCAATATTCAAGGGTTGCGACTGGGCATCGCGGCTCGAAATAAGAATACAAC
+>dme-mir-252
+ACCAAGTTCGCTTTCCTAAGTACTAGTGCCGCAGGAGTTAGGTTCGTGTCCGCAATACCTCCTGCTGCCCAAGTGCTTATTAAAGCGGCGAGT
+>dme-mir-263a
+TAGATCTCGGCACAGTTAATGGCACTGGAAGAATTCACGGGGTAATTTTTATACAACCCGTGATCTCTTAGTGGCATCTATGGTGCGAGAATAA
+>dme-mir-263b
+TTGCTGACTTTGAGTCTTGGCACTGGGAGAATTCACAGTTGACTTTATTATTCTGTGGTTCTGCGGGTGCCAAAACTTAAAAACCGGCTT
+>dme-mir-274
+TCCTGTGTTGCAGTTTCGTTTTGTGACCGACACTAACGGGTAATTGTTTGGCCGCCAGGATTACTCGTTTTTGCGATCACAAATTATGAAATTGCAGCAA
+>dme-mir-275
+TGTAAAGTCTCCTACCTTGCGCGCTAATCAGTGACCGGGGCTGGTTTTTTATATACAGTCAGGTACCTGAAGTAGCGCGCGTGGTGGCAGACATATAT
+>dme-mir-276a
+CCTGGTTTTTGCCATCAGCGAGGTATAGAGTTCCTACGTTCATTATAAACTCGTAGGAACTTCATACCGTGCTCTTGGAAGACCAAAAAACAACCAAG
+>dme-mir-276b
+AAAACCGAAGTCTTTTTACCATCAGCGAGGTATAGAGTTCCTACGTTCCTATATTCAGTCGTAGGAACTTAATACCGTGCTCTTGGAGGACTGTCGACC
+>dme-mir-277
+TTGAAGGTTTTGGGCTGCGTGTCAGGAGTGCATTTGCACTGAAACTATCTGAAGCATGTAAATGCACTATCTGGTACGACATTCCAGAACGTACAATCTT
+>dme-mir-278
+GTAATGGTACGGTGCGACCGGATGATGGTTCACAACGACCGTGTCATTTAAACGGGTCGGTGGGACTTTCGTCCGTTTGTAACGCCATTTGTCAACGA
+>dme-mir-279
+GGAATTCATACTACTGTTTTTAGTGGGTGGGGGTCCAGTGTTTCACATTGATTTTCTTAGTATTTGTGACTAGATCCACACTCATTAATAACGGTAGTTC
+>dme-mir-280
+TGGCTTTTATGTATTTACGTTGCATATGAAATGATATTTATAGTAAACAGATTATTTTATATGCAGGTATATGCAAGTCGAGGTCCTCCACACTG
+>dme-mir-281-1
+CGAATAAGTGAATAAAGAGAGCTGTCCGTCGACAGTCCAGAAACTATTTAATATCACTGTCATGGAATTGCTCTCTTTGTATAATATTCG
+>dme-mir-281-2
+CGAATTGTGAAATGAAGAGAGCTATCCGTCGACAGTCAAGTTAAGACCGATTGTAATACTGTCATGGAATTGCTCTCTTTGTATAACATTCG
+>dme-mir-282
+AGTTTCCTTCTAAATCTAGCCTCTACTAGGCTTTGTCTGTGCATTCGAAAGCCGATCAGACATAGCCTATAAGAGGTTAGGTGTACCAAGGCGAACA
+>dme-mir-283
+CTCACACGATTCTCAAAGGTAAATATCAGCTGGTAATTCTGGGAGCTAAGCCTAAATATGAAACACTCGGAATTTCAGTTGGTATCGACTTTTTTGAATT
+>dme-mir-284
+GTTGCAGTTCCTGGAATTAAGTTGACTGTGTAGCCTGTGAGGGCAAGGCTTGAATAATGCTCCTGAAGTCAGCAACTTGATTCCAGCAATTGCGGCCCGA
+>dme-mir-285
+TCGAATCGAAGAACTGAGATCGATTGGTGCATAGATATCAGGAGAACCCACTCAATTTAACTCTAGCACCATTCGAAATCAGTGCTTTTGATAAGAAAC
+>dme-mir-286
+TTAAAATTGAATGGCGAATGTCGGTATGGTCTCTTTTTCAAAGAAAGGTTTCGATTAAGCGAAGTGACTAGACCGAACACTCGTGCTATAATTTTAAAAT
+>dme-mir-287
+GGACGCCGGGGATGTATGGGTGTGTAGGGTCTGAAATTTTGCACACATTTACAATAATTGTAAATGTGTTGAAAATCGTTTGCACGACTGTGA
+>dme-mir-288
+CGGCCATGTCGTAATTAGCGGAGCACGGCATCGCCGGCGATAATTAATGACGGTGGTCACGTTGGTTTCATGTCGATTTCATTTCATGACACGGCCG
+>dme-mir-289
+GAGTTTACAGTAAAATAAATATTTAAGTGGAGCCTGCGACTTCAGTCCCTCTGACTGACTGGGGTAAGTCACTTGAGCGTTTGTTGGCACGTAAAAGAC
+>dme-mir-303
+TCTTGGTTTAGGTTTCACAGGAAACTGGTTTAATAACGAAAACTAGTTTCCTCTAAAATCCTAATCAAGA
+>dme-mir-304
+GCAGCATTGAATAATCTCAATTTGTAAATGTGAGCGGTTTAAGCCATTTGACGCACTCACTTTGCAATTGGAGATTGCTCGAGACTGC
+>dme-mir-305
+CATGTCTATTGTACTTCATCAGGTGCTCTGGTGTGTCTCGTAACCCGGCACATGTTGAAGTACACTCAATATG
+>dme-mir-306
+GTCCACTCGATGGCTCAGGTACTTAGTGACTCTCAATGCTTTTGACATTTTGGGGGTCACTCTGTGCCTGTGCTGCCAGTGGGAC
+>dme-mir-307a
+TGTCTTGCTTTGACTCACTCAACCTGGGTGTGATGTTATTTCGATATGGTATCCATCACAACCTCCTTGAGTGAGCGATAGCAGGACA
+>dme-mir-307b
+TAGTTTCGTGGATACTCTGTCCTGCTATCGCTCACTCAAGGAGGTTGTGATGGATACCATATCGAAATAACATCACACCCAGGTTGAGTGAGTCAAAGCAAGACAAAATGCTGCTAACTT
+>dme-mir-308
+CTCGCAGTATATTTTTGTGTTTTGTTTCGTTTTGCAATCCAAATCACAGGATTATACTGTGAG
+>dme-mir-309
+ATTATACGACAAACCTTGTTCGGTTTTGCCAATTTCCAAGCCAGCACTGGGTAAAGTTTGTCCTATAAT
+>dme-mir-310
+AACATAAACATTTGCAGGGCGGGTCGTGTGTCAGTGTATTTATATCTTAGCTATATTGCACACTTCCCGGCCTTTAAATGTCCAATGTT
+>dme-mir-311
+TCTAGATCATTTTTCGGACGGTATATGGGTTAATATTTCATTTGTCGAATATATTGCACATTCACCGGCCTGAAAATATCAAGA
+>dme-mir-312
+GATTTGGTTCGTCACAAGGGCAATTCTGCATTTTTTAACTAGTATTGCACTTGAGACGGCCTGATT
+>dme-mir-313
+ATTTTCTGCTGCGGATGGGGGCAGTACTGTTTTTTTAACATTGAGTATTGCACTTTTCACAGCCCGAAAAT
+>dme-mir-314
+TCGTAACTTGTGTGGCTTCGAACTTACCTAGTTGAGGAAAACTCCCATGTCGGATTTTGTTACCTCTGGTATTCGAGCCAATAAGTTCGG
+>dme-mir-315
+CACTTATATAATTTTGATTGTTGCTCAGAAAGCCCTCATTGTTTACCAGTTGGCTTTCGAGCAATAATTGAAACCAGATAAGTG
+>dme-mir-316
+AAATTCTAGTCGATTTGTCTTTTTCCGCTTACTGGCGTTTCAATTCCACAACGACAGGAAAGGGAAAAAGGCGTATTTACTATGAGTTT
+>dme-mir-317
+ATGCAACTGCCATTGGGATACACCCTGTGCTCGCTTTGAATGAAATGCAAGCAAGTGAACACAGCTGGTGGTATCCAGTGGCCGTTTGGCAT
+>dme-mir-318
+TTTATGGGATACACACAGTTCAGTTTTGTCACACTTCAAGCATCACTGGGCTTTGTTTATCTCATGAG
+>dme-mir-375
+CCGGGCAGCGAATTACTTGGGCCAAGGGAATGCAAACTGTGATCATCCCGAAAGTTTGTTCGTTTGGCTTAAGTTATTTTCATGTCCGACT
+>dme-mir-927
+TGGTTGCTGTAGAGTTTTAGAATTCCTACGCTTTACCGTGGCATACGAAATTCGGCAAAGCGTTTGGATTCTGAAACCCTACCGATCCATTA
+>dme-mir-929
+AGTCCTGGTGGAGCTCAAATTGACTCTAGTAGGGAGTCCTTTAATGAGCGACTCCCTAACGGAGTCAGATTGAGCTGCAAAGGAGCGA
+>dme-mir-932
+TTGGTTTTGAAGTTTTCAATTCCGTAGTGCATTGCAGTGTGTTTCATTATTTACTGCAAGCGCTGCGGATTTGGCAACTTTGACGACCTTC
+>dme-mir-954
+TACAAACACAAGATTTTCTGGGTGTTGCGTTGTGTGTACCTGTGTACAGGCGTATTCACATGCAACATCCCTTACATCTTGCTTGAT
+>dme-mir-955
+GGCCAGCTAATCAACTCCATCGTGCAGAGGTTTGAGTGTCCTGTGTTTTGCCTAATCGCATTCAATTTCTGAACGGTAGAGATGGTACGCTTAGAAA
+>dme-mir-956
+GATCGTTATCGTGTTTGGAATGGTCTCGTTAGCTAACGGATGAGCAAGTGCTCGGCTCACTGGCCCAAATGCAGTTTGCCCGGAGACGCCGGTTAACCCAGCACTGAAATGTGTAGTTTCGAGACCACTCTAATCCATTGCAGCATTT
+>dme-mir-957
+TGTCCACAATAGACCTTAGTTTTCGACGTGTTTTGGTGTGCTGGGGAGTTCTATTCCGATTGAAACCGTCCAAAACTGAGGCCAACTGTGAGGCGC
+>dme-mir-958
+GGCGTGTCTATGGCAAGTAGAATAGCAGGCTTATCACATGTTTAATTCAATCTGCTGTGAGATTCTTCTATTCTACTTTCGACAACACCCGT
+>dme-mir-959
+TTAACTTTGTTCTATATTCTTAGTACTCGGGTTGATAAAGACCTTTTCTTCAGGGAGCCTTTGTCATCGGGGGTATTATGAAATATAGTTTAAAGAAA
+>dme-mir-960
+GGGTTTTGTACCACATTCTGAGTATTCCAGATTGCATAGCTTTGTGCTACTATTGCTATACGGTCTGGGACACTTTTAACATGGTATCAAATC
+>dme-mir-961
+TCAAGGGCCGAGTTACCTTTGATCACCAGTAACTGAGATTGTTTCTGATACGGTTTCGTTTTCTGGCAATCAAAAGAACTTGGACTCGA
+>dme-mir-962
+GATGGGGCACTCAGGCTATAAGGTAGAGAAATTGATGCTGTCTACACTATTCAGACTTCAGTTTCATTACCTTTCAATTTGTTTGCCCCCAT
+>dme-mir-963
+TTAGTCTAATCTAAAACAAGGTAAATATCAGGTTGTTTCCTGTATTCGATCGAAACATCTGTATATACCTTTGTTCCGATTGGACAAAA
+>dme-mir-964
+CAATAACATATTGGTCCAACTTGCCTTAGAATAGGGGAGCTTAACTTATGTTTTTGATGTTTAAGTTAAAAGCCTCTGTTCTAAGACAATTTGATGATCA
+>dme-mir-965
+GACAATATTGCTCAACATTTTGGGGGGTAAAACTGTACGTTATATGTGCCCTTCTGTGATATTCATAAGCGTATAGCTTTTCCCCTTAAAAGTTAGAGCTATTGCAA
+>dme-mir-966
+AACCTGATCCGCTGCTGTGGGTTGTGGGCTGTGTGGCTGTGGTATAGGTGCCGCCAGTTGATAACCCCCAGCGTGGGCACGGACCCA
+>dme-mir-967
+CTTGGAGAGCAGAGATACCTCTGGAGAAGCGCGTGACCTGACCCCAGCAGGAGAACCCAACCCGCTTTTCCACCTAGGTGTCTCTCTCTCTCTTTAT
+>dme-mir-968
+TGATGGAAGCTTCCTTAAGTAGTATCCATTAAAGGGTTGTTCTCAACATGCAAATCAACCTTTTGATGTACTACTTTAAGAATCTCCAGTTA
+>dme-mir-969
+AGTCTCTGTCCTACGTCCGAGTTCCACTAAGCAAGTTTTGAGATCGTTTTAAAAACAAAAACTTGACACGTTGAGCTCGTTCGTGGGATGGACT
+>dme-mir-970
+GAAAGGCATCTGTTGCAGCTAGCGGGTGTTTTATTTGGTAGCTGTAATGATTTGAATCTATCATAAGACACACGCGGCTATAACCGTTGTCTAAG
+>dme-mir-971
+TCCGTGGCTGGCATCGCTCGCTGTAAATTGTAATCATCAAAGCGTTTTCTCAGAGCCGCTTGGTGTTACTTCTTACAGTGAGTGTGCCAGTCCGTA
+>dme-mir-972
+AGAATGATAGGGAAATTGCTAAATATTTTTTTTGTATAAATAACTTTTAACTTTTGTACAATACGAATATTTAGGCATTTCTCAAATCAAA
+>dme-mir-973
+ACCGTCGTCGACTTTTCGTGGTTGGTGGTTGAACTTCGATTTTAAGTATTTAAATAAAATGAAATCTGTTCATTCTCCGACACAAGAAGTTCACGCAAAGG
+>dme-mir-974
+CAATTGTCACCGGTCATGTCCTCCAAGCGAGCAAAGAAGTAGTATTTGTGTTTCCAAGAGCAAATATAACTTCATTGGAAGCTAAGTGGATTTGCCCAAAT
+>dme-mir-975
+TTGAATTTTTGATTTTAAACACTTCCTACATCCTGTATGTGTTTTGCATCCGGTACAGATGTGGGAGTCGTTTGCACTCAGAGATTTCACA
+>dme-mir-976
+CATCGCCATGCAGTGCCGCGGCATTGGTGAGGCCATCTCCAATGGATTAGTTCTCAACATTGGATTAGTTATCATCAATGCCGGTGCACTGCACCTA
+>dme-mir-977
+CGAATCAACAAACAAGGTATGCTTTAGATAACTCGAATATCACATCTTCAGTGTTCGAAATCTGATGAGATATTCACGTTGTCTAAATCATGTTTTGTA
+>dme-mir-978
+GTTGGCGGCACAATCTGCAATCTACGCCACTGGCTTACGTTGCAATCGAAAATCGTGTCCAGTGCCGTAAATTGCAGTTGTGTGAACGCAAA
+>dme-mir-979
+TACATGTGAGGATGTCACAAATACACTGAATTTGGGGGGAATTCTTATGTATATACAAATTCTTCCCGAACTCAGGCTAATTTTGTGGCATCCGT
+>dme-mir-980
+AGTTGATTGTATGTCAGTTTTTCATTTGGCCTGGCTAGCTTACTCCTTTTTAAATATTGCTAGCTGCCTTGTGAAGGGCTTACGTGTAATTGCAGTTC
+>dme-mir-981
+AAACATCCTCACTGAAGTCGGGTTTCGTTAGCAGCGGGCTGTTTTAATAAATTCAACAAGTTCGTTGTCGACGAAACCTGCATGCTGTGTGGAAAAT
+>dme-mir-982
+CGAAATCATGTTAGATCCTGGACAAATATGAAGTAAATTGTTTTTATGCATCAATTACTTGATATTCATCCTTGAACTAAATGGTTTTAGAGC
+>dme-mir-983-1
+TATATTGCAATAATTAAATAATACGTTTCGAACTAATGATTTTCAGTTCATTCATTAGGTAGTTACGCATTATCTAGTTGTTGTAAACATT
+>dme-mir-983-2
+TATTATATTGCAATAATTAAATAATACGTTTCGAACTAATGATTTTCAGTTCATTCATTAGGTAGTTACGCATTATCTAGTTGTTGTAAACATTCAACT
+>dme-mir-984
+AGAAACAAATTTCATTGAGGTAAATACGGTTGGAATTTTGTCTTTTAACTATAAATCCAACCGAATTTGGCTCGGCGAAATTTTTCAGTT
+>dme-mir-985
+TATATAATAGCACTGCTGGCTCATTGGTACATTTCATAAGTACCTTATCAAATGTTCCAATGGTCGGGCAGAGCTATTATTTGTCC
+>dme-mir-986
+CACACCTGAAATTACCCATCTCGAATAGCGTTGTGACTGAGGTAACTGCGCATCGAATCTACTCAGCGGCGAGGCTATTCAAGTAAGGTTATTTTGGGCC
+>dme-mir-987
+TGTTGGACTGTGTTTAAAGTAAATAGTCTGGATTGATGAAAGTTGCATTCGAGAATTCATCAACAGGCATTTACTTCAACTGCAGTTTGAACAA
+>dme-mir-988
+GACGGCGGTACCGGGCATTTTGGGTGTGTGATTTGTAGCAAAGTGATATGTATTTGATCATCCCCTTGTTGCAAACCTCACGCCAAAGATGATCTGCGA
+>dme-mir-989
+AAAGATTTTGGGAATCGGCCACTACCTTGCAGTCACGTGATGAAAAGACACAGGTGACACTGATCCGGATTTGGTAGTTGACAAATCCTCCATGCCGAGATTAGTTTCATTTTGCGTCTTTTGAATTCGAATAGTTCATGTGATGTGACGTAGTGGAACATACCTGAAATTACA
+>dme-mir-990
+TCTGCTCTGCGACATTCACCGTTCTGAGTTGGCCCCAAGTGCACGTGGGCCAGCTTTCAGCTTCGGTGCCATTTCACCCCGAGCAC
+>dme-mir-991
+TATCACTGCAGTTTCAGGCTTTTCCCAACTACACCTATTAATACATATTTTAACGTCCTATTAAAGTTGTAGTTTGGAAAGTTTTGGTTTTGCATT
+>dme-mir-992
+ATTTTCCCAAGTGCCTGGTATCAGCAAAGTGTTATTTTTTATGTTTATGTAAAGTACACGTTTCTGGTACTAAGTACTTCGAGAAAGTTACC
+>dme-mir-993
+AACGCTCCCGTGACCTACCCTGTAGTTCCGGGCTTTTGTTTAAATGGCGTTCGGCACATTGTCGGACTGCTGGCTCGATTATCAGAAGCTCGTCTCTACAGGTATCTCACAGGGTAGAA
+>dme-mir-994
+TATCGAGTTATCTAAGGAAATAGTAGCCGTGATTTTACCCAAGAATTTTTCACATATCACAGTTGCTGTTTCTTTTAGATAGCTCTTTTGT
+>dme-mir-995
+CACCTGCACCCCGCAGCCCGAATTATGTGGGAGCTGCGCCGTTTCCGTAATCCGTAGCACCACATGATTCGGCTTCGTGGTACAGGATAT
+>dme-mir-996
+TCTGACTCTATTTTGTCGGCGAACATGGATCTAGTGCACGGTGGTTCATGATTAAGTTCGTGACTAGATTTCATGCTCGTCTATTAAGTTGGGTCAG
+>dme-mir-997
+TTATGGATCCTCTTTCAATGAATTTAGTATGCCCAAACTCGAAGGAGTTTCACCTCCATAAGAGCGACAGTCCTGGAGAAGTTATCAGAGCCAAAAAAATTCATATGATGATGCATTTTCCGTCTCTGAAAACGTCTTCAGCAGAAGTTGTTTTTAGCGAAGTGAAACTCATTCGATTTTGATCATACTAACGACATTGGATGCTTGGATCGGCA
+>dme-mir-998
+CCTCGTGTCAAATTCATTTTGGAACTGAATTCTCGTGGGTCTGCACTGACAACACTGACCGCTCCAGGGCAAATTGTTCATTTTGAAATTGAAATTCTGTAGCACCATGAGATTCAGCTCTGGCGTGAATTTCAAACATGCAT
+>dme-mir-999
+AAGGATGCCGCTCAATTACCCCGACATAGTCATACGGTGAATGTTGTGTATTGGAGACCAATGTTAACTGTAAGACTGTGTCTCGGTGGTTGCCAGCCCAGCCAC
+>dme-mir-1000
+GACGCTTGCCATTGATATTGTCCTGTCACAGCAGTATTGTAACACTATATTATAGTTTACTGCTGGGTCGGGGCATTAACATTGTTGAGCGTCATTAGCA
+>dme-mir-1001
+AAGCTGGCCTGTCCCTGGGTAAACTCCCAAGGATCAGGTGGAGATTGAATCCCGATCCTTGGGTTTCTGCTCTCGGGCAAGGTCAGTAGT
+>dme-mir-1002
+TAGAAATTATATATTTAAGTAGTGGATACAAAGGGCGATTTGATATAAAAGTGTCGCATTGTATGACCTACTTAACTAGCTGATTTTGT
+>dme-mir-1003
+GTGGGTATCTGGATGTGGTTGGCTCTGGCGGTCCTCTCACATTTACATATTCACAG
+>dme-mir-1004
+GTTGGGGGACATTGATCTCGGAGACGGCGGTTTAACTGATCCATTCTCTCACATCACTTCCCTCACAG
+>dme-mir-1005
+GTGAGTTGATCGATTTCGAGGTTTTGGCACACGAATATAATCTGGAATCTTTAATTCGCAG
+>dme-mir-1006
+GTGAGTTTGAAATTGAAATGCGTAAATTGTTTGGTACAATTTAAATTCGATTTCTTATTCATAG
+>dme-mir-1007
+GTAAGCAGTGTTTGAACTCGATCTTGGTTCTTGGACTCTTGATAAGCTCAATTAACTGTTTGCAG
+>dme-mir-1008
+GTAAATATCTAAAGTTGAACTTGGCCAATGGCAAGTCACAGCTTTTTGTGTTTACAG
+>dme-mir-1009
+GTAAGTGTAAGACTTTCTTGAGTTACCCGCGATGAGTATCTCAAAAATTGTTACATTTCAG
+>dme-mir-1010
+GTAAGTGGTGTAGATGAAACAAATTTACCAACAATTTTGTTGGATTGTTTCACCTATCGTTCCATTTGCAG
+>dme-mir-1011
+GTGAGTTTTTGAGCCAGGAATATAGTTCTTATTATTGGTTCAAATCGCTCGCAG
+>dme-mir-1012
+GTGGGTAGAACTTTGATTAATATTGCTTGAAAAATATTAGTCAAAGATTTTCCCCATAG
+>dme-mir-1013
+GTGAGTTTCGTACACTTAATTAATAGGATCGGCCGTTAATAAAAGTATGCCGAACTCGCAG
+>dme-mir-1014
+GTATAATGGAAATAGATTTTAATCGCAGGCGCGTCAGTGGTTGAATTAAAATTCATTTTCATTTGCAG
+>dme-mir-1015
+GTGAGTGATGCTCCAGTTAGCTTGGCTGAGTGAGGATTTAAGTCCTGGGACATCTCTCTTGCAG
+>dme-mir-1016
+GTAAGTATAGAGAGGATGTGATTGGTAAATTCCAAAGTTCACCTCTCTCCATACTTAG
+>dme-mir-1017
+GTGAGTTTAGTGGAGTTTAAAGCTTCCCATCGCCAGCAATTACGCGAAAGCTCTACCCAAACTCATCCCCC
+>dme-mir-2279
+CGGGAGAGGATTATGAAAGACAATTTGAAATTAAATATCTGTGTGTGAAATTATTTAAAAGATTAATTTCACGCGAAGATATTTATTTTTACATTTGTCCCTATAATAACCGG
+>dme-mir-2280
+TCGTTTAAGATGCTGTGAATATCCCGTTTTCTTAGCTTGGCAATAAAATATTATTCACAGAAACTAGTAATTTACTTCCAAGCTAACTGAAGGGGATATTCAGTGCAACCACGGA
+>dme-mir-2281
+GGATGTCGAGCAGTACACGTATTATCTGCAGCTGCAGATGCAAATGTATATGTATCTGTATCTGTATCTGCAGTATTGCAGTATCGCTGCTTTATTCATATCC
+>dme-mir-2282
+GCCTGCATAACGCTGTGGCTTTCCGCTTCTATTTTTCGTTCACTGCCTTGTGCTTCGTTGGAAAATCGGTGAGCTAAAAATAGAATTCGGTTGCCACGCTTCAAGGG
+>dme-mir-2283
+AGCTGCTCCGAAAATATCATGAATACGACAATTATGTGCACATCATTTAGTATACGTGATATTTTAGGAGCAGCTA
+>dme-mir-2489
+GCAATAAGTTAGACGTGAGTATGGCTATACAAATCATCTGGAACGTAAGTTCTAATGTATGTATTGTATGTTGTATTTGCAGTTTTATGATAGCA
+>dme-mir-2490
+GAGCGAGTTGAAGCGATAAAGCAGGTTGCAAACTGCTCTCTCGCTTTGCTCTTACTTTATCGCTTAAACTCGCAA
+>dme-mir-2491
+CGCGAACCACTTTGTGCCGCTGTTGCCTTTGCAGTTGCTGTTTTCCATGTTGCTACTGCGGCCAATTGTTGCGCATCAGCAACATCAACAACATCAACAACAGCAGCAGCAACTGCAGTAGCAGCACTCGTGATTCACTTTCGA
+>dme-mir-2492
+GAATTTTTTTGGGATCTCTTTATAAAGCGTTTATTATTTATTTATTATGTTTTGTCGAGAGCTTTCAAAAAATTCA
+>dme-mir-2493
+CGCTATTATTAAAACCCCCAACGCTCACACACACACACAGCCATGTGCATGGAGCAAAAAACTATGGCCGTGTGTATTTGTGCGAGGCGTTGGCGTTCTGACCAGC
+>dme-mir-2494
+TCCATCTTCCGACCGGCGATCAGTAAGTGCACCAGATTGGGAATAACCTTCAAAAGCTGCAACTTAAATATTCCCAGTAATGTCCACTTACAAGGAGCCCCTGGTTGGATGGGTGGA
+>dme-mir-2495
+TGTATGATCACATTGGCTTGTGGGCGTGGCACTTCAATTGGGCTGACTTTGCCTGGCTGGAAAAGCGGCACCTAATTGAAATGCCCGCCGATCAGTGCTCGATTACGATCGACAC
+>dme-mir-2496
+GTGCGCCGGTGTGCGTGTGTGAGAGGCCATCGTAAAAGCCTTGTTTGCGTGCGGAATTTAACTGTTTTCTCGCCGTCTGCGAATTGCGGGGCACTCACACACGCATATTCAGCCGCAC
+>dme-mir-2497
+GCACGTCATCGTAAGCATTGCAAAATGTAGTTGTTAAATTCGCTCTTCAATCATTAACAACATTTACAATGTTTGCAGGCTGTCAACGTTGATGGC
+>dme-mir-2498
+CACATTTGCCATAATCTCCTTAAAATAGTAAAGGCTGTTATGCTTATTTTTGTGTTTTTTACAAAACTGGCATTCCCGTCTTACTTATTTTCAGAAAACTTACACCAAAGTG
+>dme-mir-2499
+TCCTTGCTGTGGGTGTGGTTGCGTGCGTAGACGTCAGCGCAATGCCTTTGTTTGGCGGCATGCGGATGACATTCACGTCCGTATCCGCACACACAT
+>dme-mir-2500
+GTAAGGAACACACATATACAATACCGTTTGAATTTCAAATAATCAGGATTTTGTGTGTGGACCTCAGA
+>dme-mir-2501
+GTAAGTTTGGAGTGCCTGGGCTAAGAACTTCCATTCTGAAACTTAACTCTTGCAATCAAACTTTCTTTCAG
+>dme-mir-2535b
+GTAAGTGACTATGCCGGGATTGTGGTTATTGAACTTAGCGTCTCCACTCACGGCATTTCATTCACTTGCAG
+>dme-mir-3641
+GTGAGTGTCCTGTTTACAGCTGGTATATTTTTCGAAAAGCCCTCTAAACTGGTCACTTCCACTCTACAG
+>dme-mir-3642
+GTGAGTGGGTCGCACGGAGCGTCATTTGGAATTCCCATGGCTGACGAAACGTTCACTCTGCTCTTCTCGATTACAG
+>dme-mir-3643
+GTGAGTTTATCTAAATATGCCCATTTATCAATAGTCATACTTATTTATGCTCCTTTAACTCCTTGCAG
+>dme-mir-3644
+GTGAGTGATTATGGTTTAAGTAATACGAATTACATAAACTATATTTAACCACTATTCTTAG
+>dme-mir-3645
+GTGAGTGCAGGCTGTTTATTGCGGCGCTGTTTACTGTTTACGATCGCACTGGAAGTGCGCTTTCCGGCGCCTTTGGAAGCCAGCACTCAAGTTTCCTTCATTGTTTTCCTTTCTTTCCCCTCTCTCTCTCCCCGCAG
+>dme-mir-4908
+GTAGGATTTTAAGCATTAAGCATGTATATAGTTTTTGCTGTGCTCATAAAGTTCTATAG
+>dme-mir-4909
+GTAGGTTGAAATCGGATACAACTAAAAGTTATTTTTAATTGTATCCGGTCTTCATTTTAG
+>dme-mir-4910
+GTTTGTATGGAATTCCATGAGCTTCAAAGTAATGCTTCTAACTCTGTGGTTTTTCATATTTCCCAG
+>dme-mir-4911
+GTGAGGAGGAGCAAAGTGGCGCCACAATTTACTAGAAACTAATTGCATCTTTTTGTCCCCCCGCGTAG
+>dme-mir-4912
+GTGAGTAGTCGTATGTTTATTAAGATTTACTTTCTATAAATTAATGTCCACTACTTTTAG
+>dme-mir-4913
+GTGAGTTCGATTGGAAAATTTAAATTTAATGTTTAATTAACCAATTCGAATTGCTTAG
+>dme-mir-4914
+GTGAGTGAGAGCGAGTTCCAATGTGGTCCTCGAAACCAGTTGTGCTTCGATCAGATTCCAAAATTGACACATTCCGACCTTGAATCACCCACAG
+>dme-mir-4915
+GTGAGAAAAAGTAGTAAATTAAGCATATCACGCAAGGACAAACATTAAGTTAAGGCCTAGTTCCTATTATACTAGGAATACTCAATCCTACTTATGCCTCTTATTCTTCTTTTCTCAG
+>dme-mir-4916
+GTGGGTGTTCTTGGAAATTCGCCTCGATCGGTAGCTAAATTTCTGATTACTCCTAAAAG
+>dme-mir-4917
+GTGAGTAGCGATGGGGGTCAGATGGCCGTGGAAGATATGTGACGATTGCCCCCTTACTACTATCGTCTGCTCAACAG
+>dme-mir-4918
+GTGAGCACGATGGCCACATTTATTGGGCAACCAATGAATTCCCGCCTGCCCACAG
+>dme-mir-4919
+GTGAGTCTCTAGGGGATTAGCAAGCTATAATTTCTCACTTAATCCCTGAACGACTTGCAG
+>dme-mir-4939
+GCATCATGCTTCTGCGATCTCTACCAATTTTCCTTGCACTCTTTGTCCTTTCCAAAAGTGACACGGGTGCAGGTGAAGATTCGGAAGAGGACGACGAAAACGACTGCAA
+>dme-mir-4940
+TTATCGCAACTTATCGATCGGGTGGCGGATATAGATCATTGGTTATCCCTTAATTAAAGCAAACAAGATTTGTTTCGGCCGCCCGATTTTACATACAACGACACG
+>dme-mir-4941
+ATGACCGGCGAGGAGATGCGATTCCTCGAGCTGCTCACCGCCTTTGGCTTGGACATCGAGGCGGTGGCCGCCGCTCTGGGAGTCGATATGCACACGCTCAACA
+>dme-mir-4942
+TATGAATACATATATTTTTATGTTTGAGTCAACATCGAGTTGCCGATGTTTGGAAAACATCGATGACATCGCTGTTGGCATCGAAATATGTCCATATCTAATATATAGTT
+>dme-mir-4943
+TTCGGATCGTTTGAATGACCAATTTTGTCTTTTATTTTCTCTGTGTTTTCAGTTAATGAAAGATAGTGAACACAGTGCTAAATGAAAGATAATGAGACCCAGTCGACAGGACCCAAG
+>dme-mir-4944
+AAAAGTAACTTTCGTAATCGAAAAAGTTTCGCTTTTTTTTCTGCTTCTGCCGCTGAGCAAGTTTTAAATATTACCGCTTAGTTACAGCTGCAGGAAGAAAAGTTATAATTATGGACTACGTACATAAAT
+>dme-mir-4945
+GTGATGGTGCTGGTGGTGGTGCCTTCATTGTTTTTCTCTTCTTTTTGTTGTTTGACTTTTACACTTTTGAACAACAAGCCGAAAGGAAAAAGCATGAAGGAATGCAAAGTATTCG
+>dme-mir-4946
+GCTCGATTGCTGCAACTAAGAAACAAATATTTTCAGTTGTTCATTGCAAATTAATTTATGTACATTGCTATGAAATCAATTGAAAATATAATTGTATCCTAGGCTTATCAGCTGCA
+>dme-mir-4947
+AAAGCGACCCACCCATTCCCCTTCCGCCCATATTTATCCTTCGTCTACTGTTTCATCGCAGGATTTAAGTGGGCGGAGCACGGAATTCGGGGGCGTGGCG
+>dme-mir-4948
+GTCGTAGATGTGGGCGTAGGTGGCGGCGGGCGTGGCGTGCGTGATCCTGGTTGTGGTGACCACGCCGGTGCGCTTGCCCTCCTTCTGGGCCCACTCCATGACGCTC
+>dme-mir-4949
+ATAATGCGCCGCCATCTTGCGGCACACTCGCACACACACACGTGTGCCATTCTGGTTTTGCAGTGGTGTGCGTGTGTGCCTCGAGTCGTTGCGTCACTG
+>dme-mir-4950
+TAATGCACATAATAAATGCCAACCATTCATCATGAAATTATAAATAGTATCATTTCAAGATGCATGGTAAGGCTTTATTTTGTTTTGCAATC
+>dme-mir-4951
+TTTTCGTACGTACAAAAAAGCATCAGGTTTTCGGGTGAGGTTTCTGATGCTTTGTCTCTTAAGTTTTTCCTAACAATTAAACCCTATGTCGGTATTACGAATACCATCAAAAAACCTCACCCACCAAATGTATGACCAGAGAGATAAAAA
+>dme-mir-4952
+TCCGGGTGCGCTGGACGATCTGATCTTCTCGGGAGCCGGATCCGGATGTCGCGGCGACGACGAGGACGAGTGCACGCCGCCATTCGAATCGGGTAGCGGGGATGATCTCATAACACCGGTCTACGT
+>dme-mir-4953
+TAAAGTTCACAGGCGACCATGAATGTTAATAAAGAATATTTACCAATAAAGTTTTATTTTTTATTGAAGGTCATGGACATCCTTGAACCGATT
+>dme-mir-4954
+TTGGCGCTTGGAATCGATACCCGAGCCATGATAGATTGAAGTCAACCCAATCGATCGCGGTTCGAGTGCTCGAGTCTTGTGCGCCGGCATG
+>dme-mir-4955
+CACCGATTTGCTTCCCCTTTTTCCGTGCGCGGAGAAAAAAATCCCCAGAAACATAATATCAGGATGCTCTCTATCTCTGTCTCTGTCTAACGCTTAATTGTTGCTCAATTGCGAATTAATGTCTGGGGCTTTTTTTCTCCGTGCAGTGCCATAGCCGTAACCACAACCT
+>dme-mir-4956
+AGGGGGCGGCAACATGTGCCACCTGCCGCCGATGGCCAGCAACAACTCGCTGAATAATCTCTGCGGATTGTCGCTGGGCAGCGGTGGTAGTGATGATCTCATGAACGATCCTCGGGC
+>dme-mir-4957
+TCGTTGTATTTTACGCACTCTGCTGGCAGCGCTGTCGGCGTCGCTGCCAGCTGCCAGCAGCGAGCGGTACACTTGCAACTATCC
+>dme-mir-4958
+TCTCTGGCAGCTGCAGTTCCGTTTCCGGAGCGGGATCTGGAACGGGCTCTGGTTCTAGATCAGGATCCAATCATGCACCCGGTCCAGGTACCGCTCCAGGTCCCGTTCCCGGAAACGGTGCCACCGCCAATGCA
+>dme-mir-4959
+TGTCCGGTTTGAACCTTCTGTGCCGGCTGGCCATGATTTGCAGCAGGTATCTCCGAGAGCAGATGATACGCAGCCAGTTGCGTTTCATGACCAAACGGCATGTGAAGCGTGCCCTGCG
+>dme-mir-4960
+ACACAAGGCCTGCCGCCCAAGAAACGCAAGCCCAGAAGTCCGGCAGTCGAGAATCACGAGGGACTGGGACCTTGGGCTTACGGTTCTCAGTTTGGCTTGAC
+>dme-mir-4961
+CGATATCCAATAGTAGCCAACTCTCTCGCTCTCTATGTGTATGTATGTATCTTGCTATCCATATATGTATATCCATATCAGAGAGCCAGGATTGGTTGAGACCATACGATATAACCCGAAACCACACTGGCCA
+>dme-mir-4962
+TCCTTTCTTTTCTCTCCGTCTCTCTCTTTCCCTCTCTCTGTTTACATTGAGCTGTTTTGTCCGTGCACGTGCTAGACGATGTAGAGAGTGAGGGAGAGAGCGAATAGAATAACAGCTCATTGTG
+>dme-mir-4963
+CTGTAGGAAATTTTTTGGGGTACTTCTGGTTTCATAACGCAGAGAATCATCAGCGTGAAGGAATGATTCTCTGCGGGCTGGAACCAGAAGTCCTGCAAATGTTCTTTTA
+>dme-mir-4964
+ATTTTTGAATGGTCCTTTTCATGCCCAAGGCGAATAAATATGTATTTATGTATATTTTTTTATATGGTACGTATTTATTTGTCTGCAAATGGCTGCCAAGGAGTGCTACATA
+>dme-mir-4965
+CACAAGGTGATCGATGGGAGATGAGCGCCTGATGTGATGATGAAGTAGATCGATCACACCACACACTCACTCACCCAGGTTTCATCC
+>dme-mir-4966
+GCTGTGTGACAACAAACTTGTCGGCACTGGAGTACTAAATATATTGCACATGTTCATGCATGTTCAACATATTTAGCACTCAAGTACCGAAAAATGAACAACGAAACGAAAAATAACACGAACATCA
+>dme-mir-4967
+CTATAGCGTTGTCTCTTGCGCAAATTGCATTTTCTCTCTGTTAACATGTACACATAATTCCTGTAACAAAGGGCATTTGCAATGTGTGAGTCAGCAAACCGCATCCG
+>dme-mir-4968
+TTTTCCTCAGAGCCGCGACTGCTGGCGTGTTATTGTTGCTGTTGCAGCTAGCTGCCACGGTTTGTTGCTTCACTGATTACGCGCAATGCAGCAACAGCAGCAGCAGCAGAAGCAACAGCAACTAACTGCAGAGGAGCAATAGCAGCTGAGAGTTC
+>dme-mir-4969
+GTATGGATTTTTCGGCGTGGCGGTAAATTGAATGATGAGTGGGAGGATGTCGCTGCCATTGTTTGGTTGATAAGGGAGCCTGGCGCACTATTACCTGGCTCTCTGAAACGAGAGCCCACCTGTCAATCAATACCACCCCGCCGATGCGACATCATCAT
+>dme-mir-4970
+CCAAAGGTGGGCGACAAGGGCTGGGAGCAGCAGTTGCTGGCGGTATTCTGAAGCTGCTCTTCCGTGGGTTGCTCTTGCTCCTGACCCTCCTCTTCCTCCGCTT
+>dme-mir-4971
+TCGGCCCTCGACACGGTGGATGAGCTTGTCAGTGGCGGATCCAGGAGCAATTCTGCCGCTGGCGACAGATCATCGGCCACTGACAATGCCCATTCACTGTTCGAGGAGATC
+>dme-mir-4972
+AATCAAAAGCGCCGTAGGCCTCCACCTCGGCTTTACAGATATATATATATATATCGTGTATATATCTGTGCGATCGGGATTGAGACTGAGGTTGGTCCGAGTACTTT
+>dme-mir-4973
+CTCGTTCAGTCGTCGTTGCGTGTTGTAGGTGGCCTGGTCTACTGAGCTCCGTTTGGACTGCTCCTCCTCCAACTACTACTGCGGCCGCTGCTGCTGCTGCTGCGGCTGCTGCATCCGCCTGCAACCTGCAACACTCACACCACGGCAACAG
+>dme-mir-4974
+TTCTTTGTTTTTATTCCCTGTCAGAGAGTATATTGATTTTAGGTGGACATTTGCGACCATATAAAATATATATATTCCTGATAAAGAATATTTAATATATCT
+>dme-mir-4975
+GGGCAAATAAAGCACGTTGCTCCCTGTTTACTTGGGTTTTCTGTTCATTTCGCCTATCGAACGTTGAACTTAAAAAGCCACGATCAGTTGTTTAACATACAGATATCTCTG
+>dme-mir-4976
+GTCGATGCCGGCGTTTCCCGCTCCAGGCTGTCGTCCATGGAGTTTGTGTTGTCAGCATTCAGCTCGGTGGATGTCAGCCAGCGGTGGAAGGCGTCGTCTCGCT
+>dme-mir-4977
+TAACCCTCCGCTCGAGAACGTGGTTTCTTTTGCGACGTGCAATACTCTGTGGCAGAAGTCTTGCACCGTTCCAAAAGAGAGCAGCACGTGGAGCGACGAGAACGG
+>dme-mir-4978
+GGACTTTCCACTTTTCGTTTGGCTGGCCTGGAAAAGCTCTCTTTCTCTATTGCCTCTATGGCTCGGCAGCGCTGGGCGCCTAATCCGAATTGCGAGAAAGATCCGCTTTTCGAGCCAAAAACCATCGAAAAGCCTGGCAAGCACACA
+>dme-mir-4979
+CATAGGTGTTATATCTAGGCCAAATATATATACTTGTTCTATGTATTAGTTATTTGCAAAGGAATACATGTGCCTGTATATATATTCGGTAAAATCACATCTTTGAATT
+>dme-mir-4980
+GAAGAGGCTGAGAGCGGGAAAGTGAGAAGTAGAGAGCGCAGATAGGGAGTGGGAGCTTATACCGTTATAAAGCCAACTTCCGTTCTGCGTTTCGTTCCAACCCCCCACCTCTCTTTCCCTCT
+>dme-mir-4981
+TGGCCACGTGCCCGCAAGACGCCTTCGGCCGGCGCAAAGTTTCAATTTGATATTCCTTGGCCGGTCGAGCAGCGTCTTGGGGTTGTTCGTGGATT
+>dme-mir-4982
+CCAACTTTGCTGGCATTCGGTGGCCAATAATGCAATTGCATGTCCTTGTTGCTGTTGCAATGTTGCAGCTGGAATTGCAATTGCATTTCTAGTTGCCGCTGCTGCTGCTCGAGTGGCAA
+>dme-mir-4983
+GGCTGGCTGTCTTGACCCACTTGCTCGTTTGCATTTCTGAAAAATGTATACGCAAAAACCAAAATCAGAAATTCTTTTGAGCAAGTGTGTCATGAAATTGCCATTCGT
+>dme-mir-4984
+CTTCGCTCGAGGTGTGAAAACCTTTTGGCCAGCGAATACGCCTCGGATCCAATCACGTTTCAAGATCGAGGTGAATTCTTTGACGTATTCGCTGGCCAAATGACACGTTCGCCATGTTTTGCG
+>dme-mir-4985
+TTGGGGGCGCTGCACTGGCATTGAAAAGTGAATTACATTGATCGTGACATGGGAATGGAAAATGTCGCCGAGCCATGTAAATCGTTTGGCCTACCCGTCTA
+>dme-mir-4986
+TTCTGCCGCTTTTGCTGTGGCTTCTCTGCATGGGATTCCCCATTCTGCATGGCGCCGATCTCTGCCAGCCCATCGGATGGCGGAACTTCCAGTGCAGCGAGGTCGCTTCTCTGCAGGATCTG
+>dme-mir-4987
+GCGGTGCCGTTGATGATGACACAGCGCGCTTGCAACAGCGTGCGGCACGATTCTCACAGCAGGGCTCCAGCTCGGCCAAAAAATCGGTCGTCGCCATTGCAAGCTCACCGTTTGGTCTCACCACGGCCAAGA
+>dme-mir-iab-4
+TCGTAAACGTATACTGAATGTATCCTGAGTGTATCCTATCCGGTATACCTTCAGTATACGTAACACGA
+>dme-mir-iab-8
+TCGTGTTACGTATACTGAAGGTATACCGGATAGGATACACTCAGGATACATTCAGTATACGTTTACGA
diff -r 000000000000 -r b996480cd604 test-data/matchedSample_1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/matchedSample_1 Wed May 27 17:19:15 2015 -0400
@@ -0,0 +1,300 @@
+58 + dme-mir-305 45 CGGCACATGTTGAATTACACTCA
+109 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+156 + dme-mir-14 40 TCAGTCTTTTTTTCTCTCCTA
+158 + dme-mir-184 61 GGACGGAGAACTGATAAGGGCA
+170 + dme-mir-79 58 ATAAAGCTAGATTACCAAAGCAT
+199 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+217 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+228 + dme-mir-184 61 GGACGGAGAACTGATAATGGC
+276 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+295 + dme-mir-982 15 TCCTGGACAAATATGAATTAAATT
+294 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+315 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+341 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+340 + dme-mir-31a 13 TGGCAAGATGTCGGCATATCT
+359 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+392 + dme-mir-184 60 TGGACGGAGAACTGATAAGG
+398 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+408 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+417 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCA
+425 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+451 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+464 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+460 + dme-mir-305 45 CGGCACATGTTGAAGTACACTCA
+481 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+500 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCTT
+516 + dme-mir-279 66 TGACTAGATCCACACTCATTA
+521 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+522 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+537 + dme-mir-275 58 TCAGGTACCTGAAGTAGCGCGCG
+567 + dme-mir-184 60 TGGACGGAGAACTGATAAGGTC
+570 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+573 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+544 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+557 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+603 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+601 + dme-mir-79 59 TAAAGCTAGATTACCAAAGCAT
+591 + dme-mir-996 60 TGACTAGATTTCATGCTCTTC
+612 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+620 + dme-mir-276a 53 TAGGAACTTCATACCGTTCTCT
+628 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+656 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+653 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAGT
+663 + dme-mir-276a 53 TAGGAACTTCATACCGTGCTCT
+666 + dme-mir-305 45 CGGCACATGTTGAAGTACACTCA
+647 + dme-mir-184 60 TGGACGGAGAACTGATAAGG
+668 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACG
+681 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+683 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+684 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+688 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+689 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGA
+702 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+706 + dme-mir-184 60 TGGACGGAGAACTTATAAGGGC
+715 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+719 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+730 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+738 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+752 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+753 + dme-mir-305 45 CGGCACATGTTGAAGTACACTCA
+765 + dme-mir-14 3 GGGAGCGAGACGGGGACTCAC
+766 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+773 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+797 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+795 + dme-mir-184 60 TGGACGGAGAACTGATAAGGTC
+803 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+804 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+816 + dme-mir-184 60 TGGACGGAGAACTGATAAGGTC
+808 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+821 + dme-mir-12 6 TGAGTATTACATCAGGTACT
+837 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+842 + dme-mir-92a 16 AGGACGGGAAGGTGTCAACGT
+859 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+863 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+864 + dme-mir-279 66 TGACTAGATCCACACTCATTAAA
+862 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+873 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+885 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+883 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGGG
+893 + dme-mir-276a 53 TAGGAACTTCATACCGTGCTCT
+902 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+903 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+906 + dme-mir-279 66 TGACTAGATCCACACTCATTAA
+922 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+940 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+944 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+951 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGGG
+957 + dme-mir-305 45 CGGCACATGTTGAAGTACACTCA
+953 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCTA
+964 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+968 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+974 + dme-mir-305 45 CGGCACATGTTGAAGTACACTCA
+979 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+983 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+990 + dme-mir-184 60 TGGACGGAGAACTGATAAGGTC
+1030 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1058 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1037 + dme-mir-1 55 TGGAATGTAAAGAAGGATGGAG
+1061 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1065 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1076 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1075 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1077 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1082 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCA
+1098 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1101 + dme-mir-279 66 TGACTAGATCCACACTCATTAA
+1091 + dme-mir-184 60 TGGACGGAGAACTGATAAGGTC
+1110 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAGT
+1123 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGGG
+1150 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCC
+1153 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCT
+1164 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1171 + dme-mir-279 66 TGACTAGATCCACACTCATTAA
+1187 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1185 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1190 + dme-mir-92b 60 ATTGCACTAGTCCCGGCCTG
+1197 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1206 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCA
+1194 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1219 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1213 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1221 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1233 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGA
+1239 + dme-mir-184 60 TGGACGGAGAACTGATAAGG
+1252 + dme-mir-279 66 TGACTAGATCCACACTCATTA
+1259 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1262 + dme-mir-316 53 ACAGGAAAGGGAAAAAGGCGTA
+1265 + dme-mir-275 58 TCAGGTACCTGAAGTAGCGCGCG
+1273 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1275 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1296 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1298 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1302 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGA
+1305 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1306 + dme-mir-14 3 GGGAGCGAGACGGGGACTCACT
+1317 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1321 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1324 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1337 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1340 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1352 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1357 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCA
+1356 + dme-mir-184 60 TAGACGGAGAACTGATAAGGGC
+1360 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1366 + dme-mir-184 60 TGGACGGAGAACGGATAAGGGC
+1374 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGA
+1383 + dme-mir-184 60 TGGACGGAGAACTTATAAGG
+1381 + dme-mir-14 3 GGGAGCGAGACGGGGACTCACT
+1389 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1401 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1417 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCT
+1420 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1426 + dme-mir-275 58 TCAGGTACCTGAAGTAGCGCGCG
+1427 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1444 + dme-mir-276a 53 TAGGAACTTCATACCGTGCTCT
+1438 + dme-mir-14 3 GGGAGCGAGACGGGGACTCACT
+1449 + dme-mir-318 42 TCACTGGGCTTTGTTTATCTC
+1455 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1454 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1473 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1505 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGG
+1510 + dme-mir-994 11 CTAAGGAAATAGTAGCCGTGA
+1512 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1517 + dme-mir-184 60 TGGACGGAGAACTGATAAGGTC
+1526 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1522 + dme-bantam 51 TGAGATCATTTTGAAAGCTGATT
+1530 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+1524 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGGG
+1533 + dme-mir-79 59 TAAAGCTAGATTACCAAAGCAT
+1538 + dme-mir-989 138 TGTGATGTGACGTAGTGGAA
+1550 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1554 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1555 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+1558 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1563 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1564 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGGG
+1570 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1587 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1591 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAGA
+1601 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1609 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1610 + dme-mir-305 45 CGGCACATGTTGAAGTACACTCA
+1625 + dme-mir-79 59 TAAAGCTAGATTACCAAAGCAT
+1630 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1657 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1655 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1664 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1670 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1689 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1705 + dme-mir-275 58 TCAGGTACCTGAAGTAGCGCGCG
+1704 + dme-mir-276b 23 AGCGAGGTATAGAGTTCCTACG
+1721 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1715 + dme-mir-276a 53 TAGGAACTTCATACCGTTCTCTT
+1733 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1732 + dme-mir-279 66 TGACTAGATCCACACTCATTAA
+1729 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGA
+1750 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGA
+1747 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGG
+1757 + dme-mir-994 12 TAAGGAAATAGTAGTCGTGATT
+1760 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1762 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1770 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGG
+1780 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1800 + dme-mir-12 6 TGAGTATTACATCAGGTACTGGT
+1795 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1799 + dme-mir-960 56 CATACGGTCTGGGACACTTTTA
+1807 + dme-mir-184 60 TGGACGGAGAACTGATAAGGTC
+1821 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1823 + dme-mir-2a-1 48 TATCACAGCCAGCTTTTATGAGCT
+1866 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1879 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+1914 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1928 + dme-mir-279 66 TGACTAGATTCACACTCATTA
+1938 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1946 + dme-mir-8 52 TAATACTGTCAGGTAAAGATGTC
+1955 + dme-mir-276a 53 TAGGAACTTCATACCGTGCTCT
+1961 + dme-mir-184 60 TGGACGGAGAACTGATAATGGC
+1965 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+1971 + dme-mir-275 58 TCAGGTACCTGAAGTAGCGCGCGA
+1974 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCA
+1978 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1988 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2011 + dme-mir-14 3 GGGAGCGAGACGGGGACTCACT
+2023 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2021 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2033 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+2040 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2045 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2048 + dme-mir-989 138 TGTGATGTGACGTAGTGGAA
+2061 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2053 + dme-mir-14 3 GGGAGCGAGACGGGGACTCAC
+2062 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2070 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2072 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2098 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2104 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2123 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCA
+2131 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2177 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2173 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2172 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2179 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2186 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2190 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2191 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2198 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2194 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2223 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2227 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGGG
+2254 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2259 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2278 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCT
+2274 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2297 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2301 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2307 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2299 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2337 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2359 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2382 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2391 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2399 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2400 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2413 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2437 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2445 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2448 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2461 + dme-mir-305 45 CGGCACATGTTGAAGTACACTCA
+2463 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGGG
+2473 + dme-mir-275 58 TCAGGTACCTTAAGTAGCGCG
+2483 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGGG
+2491 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2520 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2550 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAG
+2556 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2568 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTG
+2591 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2602 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2598 + dme-mir-79 59 TAAAGCTAGATTACCAAAGCAT
+2601 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGA
+2614 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2613 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2625 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCA
+2652 + dme-mir-312 42 TATTGCACTTGAGACGGCCTTA
+2662 + dme-mir-1 55 TGGAATGTAAAGAAGTATGGAGT
+2671 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2675 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2672 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2693 + dme-mir-31b 13 TGGCAAGATGTCGGAATAGCT
+2705 + dme-mir-305 45 CGGCACATGTTGAAGTACACTCA
+2711 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2716 + dme-mir-79 59 TAAAGCTAGATTACCAAAGCAT
+2725 + dme-mir-305 45 CGGCACATGTTGAAGTACACTCA
+2735 + dme-mir-79 59 TAAAGCTAGATTACCAAAGCAT
+2739 + dme-mir-276a 53 TAGGAACTTCATACCGTGCTCT
+2749 + dme-mir-275 58 TCAGGTACCTGAAGTAGCGCGCG
+2748 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTG
+2756 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2771 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+2790 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+2811 + dme-mir-11 47 CATCACAGTCTGAGTTCTTGCT
diff -r 000000000000 -r b996480cd604 test-data/matchedSample_2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/matchedSample_2 Wed May 27 17:19:15 2015 -0400
@@ -0,0 +1,300 @@
+22 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+16 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+14 + dme-mir-281-1 13 AAAGAGAGCTGTCCGTCGACA
+26 + dme-mir-281-2 15 AGAGAGCTATCCGTCGACAGT
+28 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+32 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+34 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+41 + dme-mir-956 116 TTTCGAGACCACTCTAATCCAT
+42 + dme-mir-14 3 GGGAGCGAGACGGGGACTC
+43 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+61 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTT
+65 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTT
+67 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+69 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGTC
+79 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+84 + dme-mir-956 116 TTTCGAGACCACTCTAATCCAT
+89 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+96 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+103 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+108 + dme-mir-956 116 TTTCGAGACCACTCTAATCCAT
+110 + dme-mir-276a 53 TAGGAACTTCATACCGTGCTCT
+112 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTGA
+117 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+120 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+123 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+124 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+125 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTT
+129 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTG
+140 + dme-mir-100 11 AACCCGTAAATCCGAACTTG
+141 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+143 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+147 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+157 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTG
+159 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+164 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+166 + dme-mir-8 52 TAATACTGTCAGGTAAAGATGTC
+181 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+17 + dme-mir-92b 59 AATTGCACTAGACCCGGCCTGC
+190 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTG
+189 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTGA
+194 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+197 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+203 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+205 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+215 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+206 + dme-mir-956 117 TTCGAGACCACTCTAATCCAT
+230 + dme-mir-956 117 TTCGAGACCACTCTAATCCAT
+218 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTT
+236 + dme-mir-14 40 TCAGTCTTTTTCTCTCTCCT
+245 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+248 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+247 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCT
+253 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGT
+251 + dme-mir-281-2 59 TGTCATGGAATTGCTCTCTTTG
+264 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+270 + dme-mir-92b 59 AATTGCACTAGTCCCGGCC
+275 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+274 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTGA
+280 + dme-mir-10 48 CAAATTCGGTTCTAGAGAGGTT
+278 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGTC
+287 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+291 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+306 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+310 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+315 + dme-mir-956 116 TTTCGAGACCACTCTAATCCAT
+322 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+323 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+324 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+331 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+342 + dme-mir-276a 53 TAGGAACTTCATACCGTGCTCT
+346 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+350 + dme-mir-995 54 TAGCACCACATGATTCGGCTT
+347 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTGA
+352 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+358 + dme-mir-305 45 CGGCACATGTTGAAGTACACTCA
+363 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTG
+369 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+392 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+395 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+400 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+401 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+406 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+414 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGTC
+419 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+425 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+435 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+432 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+437 + dme-mir-956 117 TTCGAGACCACTCTAATCCATT
+439 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+442 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+453 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+450 + dme-mir-989 138 TGTGATGTGACGTAGTGGAACA
+451 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+463 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+466 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+467 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+470 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCT
+483 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+484 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTG
+485 + dme-mir-11 47 CATCACAGTCTGAGTTCTTGCT
+487 + dme-bantam 51 TGAGATCATTTTGAAAGCTGATT
+494 + dme-mir-125 28 TCCCTGAGACCCTAACTTGTG
+502 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+507 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+510 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+518 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+520 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+526 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTGA
+528 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+529 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAG
+540 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+542 + dme-mir-276a 53 TAGGAACTTCATACCGTGCTCT
+547 + dme-mir-14 40 TCAGTCTTTTTCTCTCTCCT
+544 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+552 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+553 + dme-mir-956 116 TTTCGAGACCACTCTAATCCAT
+563 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+566 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+568 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+564 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACA
+569 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+575 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+584 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+586 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+589 + dme-mir-14 40 TCAGTCTTTTTCTCTCTCCT
+592 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+598 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+3 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+6 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+608 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+614 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+618 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+641 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+642 + dme-mir-956 117 TTCGAGACCACTCTAATCCATT
+646 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+648 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGT
+651 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+650 + dme-mir-956 116 TTTCGAGACCACTCTAATCCAT
+658 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+673 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACA
+683 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+696 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+694 + dme-mir-956 116 TTTCGAGACCACTCTAATCCAT
+692 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAG
+697 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTTT
+713 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+718 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGT
+709 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+700 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGTC
+744 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+737 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+720 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+750 + dme-mir-8 52 TAATACTGTCAGGTAAAGATGTC
+756 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGT
+751 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+761 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+760 + dme-mir-956 116 TTTCGAGACCACACTAATCCATT
+769 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTGA
+763 + dme-mir-8 52 TAATACTGTCAGGTAAAGATGTC
+771 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGC
+770 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTGA
+783 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+792 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+805 + dme-mir-184 60 TGGACGGAGAACTGATAAGG
+806 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+810 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTT
+818 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+815 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTT
+823 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGG
+822 + dme-mir-281-1 13 AAAGAGAGCTGTCCGTCGACA
+836 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+842 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+848 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+843 + dme-mir-956 117 TTCGAGACCACTCTAATCCATT
+865 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+863 + dme-mir-276a 53 TAGGAACTTCATACCGTGCTCT
+864 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGTT
+876 + dme-mir-277 58 TAAATGCACTATCTGGTACGAC
+866 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+881 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTT
+880 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTGA
+889 + dme-mir-956 116 TTTCGAGACCACTCTAATCCAT
+901 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+899 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGCA
+904 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+912 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+910 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+917 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTG
+914 + dme-mir-956 116 TTTCGAGACCACTCTAATCCAT
+924 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+927 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+919 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+922 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+931 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGT
+930 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+944 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+946 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+950 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+948 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+963 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+960 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTGA
+965 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+968 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+964 + dme-mir-184 60 TGGACAGAGAACTGATAAGGG
+969 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+978 + dme-mir-965 65 TAAGCGTATAGCTTTTCCCCTT
+971 + dme-mir-959 60 TTGTCATCGGGGGTATTATGAA
+985 + dme-mir-956 117 TTCGAGACCACTCTAATCCATT
+983 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+993 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+995 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTG
+1002 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1013 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1011 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1004 + dme-mir-281-1 13 AAAGAGAGCTGTCCGTCGACA
+1022 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1033 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1035 + dme-mir-956 117 TTCGAGACCACTCTAATCCATT
+1046 + dme-mir-276a 53 TAGGAACTTCATACCGTGCTCT
+1061 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1057 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1070 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1067 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTG
+1066 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+1073 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+1071 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1072 + dme-mir-11 8 CAAGAACTTTCTCTGTGACCCG
+1065 + dme-mir-2b-2 53 TATCACAGCCAGCTTTGAGGAG
+1081 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+1103 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+1108 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1106 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTGA
+1115 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTG
+1128 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+1127 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+1135 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+1138 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1139 + dme-mir-14 40 TCAGTCTTTTTCTCTCTCCT
+1151 + dme-mir-956 117 TTCGAGACCACTCTAATCCATT
+1158 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+1170 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1177 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTT
+1175 + dme-mir-956 117 TTCGAGACCACTCTAATCCAT
+1197 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGT
+1186 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+1199 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1201 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGTC
+1216 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+1221 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1230 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1234 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+1233 + dme-mir-14 40 TCAGTCTTTTTCTCTCTCCT
+1243 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1235 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTT
+1240 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTG
+1246 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+1248 + dme-mir-956 117 TTCGAGACCACTCTAATCCATT
+1241 + dme-mir-8 52 TAATACTGTCAGGTAAAGATGTC
+1254 + dme-mir-956 117 TTCGAGACCACTCTAATCCATT
+1255 + dme-mir-956 117 TTCGAGACCACTCTAATCCATT
+1266 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1262 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAG
+1272 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+1281 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1277 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTG
+1288 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+1291 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+1297 + dme-mir-305 45 CGGCACATGTTGAAGTACACT
+1302 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTT
+1300 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+1304 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+1314 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1326 + dme-mir-184 60 TGGACGGAGAACTGATAAGGG
+1334 + dme-mir-92b 59 AATTGCACTAGTCCCGGCCTGC
+1343 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1342 + dme-mir-956 116 TTTCGAGACCACTCTAATCCAT
+1349 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1337 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+1352 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTGA
+1361 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTG
+1364 + dme-mir-999 61 TGTTAACTGTAAGACTGTGTCT
+1365 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCT
+1357 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAG
+1370 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1379 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+1389 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1388 + dme-mir-991 60 TTAAAGTTGTAGTTTGGAAAGT
+1384 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTT
+1403 + dme-mir-14 40 CCAGTCTTTTTCTCTCTCCT
+1406 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGT
+1420 + dme-mir-956 116 TTTCGAGACCACTCTAATCCATT
+1422 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1414 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCT
+1424 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
+1427 + dme-mir-263a 17 AATGGCACTGGAAGAATTCACGG
+1440 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGT
+1439 + dme-mir-10 8 ACCCTGTAGATCCGAATTTGTT
+1442 + dme-mir-281-2 14 AAGAGAGCTATCCGTCGACAGTC
+1444 + dme-mir-31a 13 TGGCAAGATGTCGGCATAGCTGA
+1452 + dme-mir-184 60 TGGACGGAGAACTGATAAGGGC
diff -r 000000000000 -r b996480cd604 tool-data/bowtie_indices.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/bowtie_indices.loc.sample Wed May 27 17:19:15 2015 -0400
@@ -0,0 +1,37 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Bowtie indexed sequences data files. You will
+#need to create these data files and then create a bowtie_indices.loc
+#file similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The bowtie_indices.loc
+#file has this format (longer white space characters are TAB characters):
+#
+#
+#
+#So, for example, if you had hg18 indexed stored in
+#/depot/data2/galaxy/bowtie/hg18/,
+#then the bowtie_indices.loc entry would look like this:
+#
+#hg18 hg18 hg18 /depot/data2/galaxy/bowtie/hg18/hg18
+#
+#and your /depot/data2/galaxy/bowtie/hg18/ directory
+#would contain hg18.*.ebwt files:
+#
+#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt
+#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt
+#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt
+#...etc...
+#
+#Your bowtie_indices.loc file should include an entry per line for each
+#index set you have stored. The "file" in the path does not actually
+#exist, but it is the prefix for the actual index files. For example:
+#
+#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie/hg18/hg18canon
+#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie/hg18/hg18full
+#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie/hg19/hg19
+#...etc...
+#
+#Note that for backwards compatibility with workflows, the unique ID of
+#an entry must be the path that was in the original loc file, because that
+#is the value stored in the workflow for that parameter. That is why the
+#hg19 entry above looks odd. New genomes can be better-looking.
+#
diff -r 000000000000 -r b996480cd604 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed May 27 17:19:15 2015 -0400
@@ -0,0 +1,8 @@
+
+
+
+
+ value, dbkey, name, path
+
+
+
diff -r 000000000000 -r b996480cd604 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed May 27 17:19:15 2015 -0400
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+