# HG changeset patch
# User xuebing
# Date 1333198386 14400
# Node ID 0e221dbd17b2c7f4bfed1e8f42bda964bd48c766
# Parent  d1f0f85ee5bce9f1f05d2339b0776f410a9e691a
Uploaded
diff -r d1f0f85ee5bc -r 0e221dbd17b2 altschulEriksonDinuclShuffle.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/altschulEriksonDinuclShuffle.py	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,150 @@
+#! /usr/bin/env python
+
+# altschulEriksonDinuclShuffle.py
+# P. Clote, Oct 2003
+# NOTE: One cannot use function "count(s,word)" to count the number
+# of occurrences of dinucleotide word in string s, since the built-in
+# function counts only nonoverlapping words, presumably in a left to
+# right fashion.
+
+
+import sys,string,random
+
+
+
+def computeCountAndLists(s):
+  #WARNING: Use of function count(s,'UU') returns 1 on word UUU
+  #since it apparently counts only nonoverlapping words UU
+  #For this reason, we work with the indices.
+
+  #Initialize lists and mono- and dinucleotide dictionaries
+  List = {} #List is a dictionary of lists
+  List['A'] = []; List['C'] = [];
+  List['G'] = []; List['T'] = [];
+  nuclList   = ["A","C","G","T"]
+  s       = s.upper()
+  s       = s.replace("U","T")
+  nuclCnt    = {}  #empty dictionary
+  dinuclCnt  = {}  #empty dictionary
+  for x in nuclList:
+    nuclCnt[x]=0
+    dinuclCnt[x]={}
+    for y in nuclList:
+      dinuclCnt[x][y]=0
+
+  #Compute count and lists
+  nuclCnt[s[0]] = 1
+  nuclTotal     = 1
+  dinuclTotal   = 0
+  for i in range(len(s)-1):
+    x = s[i]; y = s[i+1]
+    List[x].append( y )
+    nuclCnt[y] += 1; nuclTotal  += 1
+    dinuclCnt[x][y] += 1; dinuclTotal += 1
+  assert (nuclTotal==len(s))
+  assert (dinuclTotal==len(s)-1)
+  return nuclCnt,dinuclCnt,List
+ 
+ 
+def chooseEdge(x,dinuclCnt):
+  numInList = 0
+  for y in ['A','C','G','T']:
+    numInList += dinuclCnt[x][y]
+  z = random.random()
+  denom=dinuclCnt[x]['A']+dinuclCnt[x]['C']+dinuclCnt[x]['G']+dinuclCnt[x]['T']
+  numerator = dinuclCnt[x]['A']
+  if z < float(numerator)/float(denom):
+    dinuclCnt[x]['A'] -= 1
+    return 'A'
+  numerator += dinuclCnt[x]['C']
+  if z < float(numerator)/float(denom):
+    dinuclCnt[x]['C'] -= 1
+    return 'C'
+  numerator += dinuclCnt[x]['G']
+  if z < float(numerator)/float(denom):
+    dinuclCnt[x]['G'] -= 1
+    return 'G'
+  dinuclCnt[x]['T'] -= 1
+  return 'T'
+
+def connectedToLast(edgeList,nuclList,lastCh):
+  D = {}
+  for x in nuclList: D[x]=0
+  for edge in edgeList:
+    a = edge[0]; b = edge[1]
+    if b==lastCh: D[a]=1
+  for i in range(2):
+    for edge in edgeList:
+      a = edge[0]; b = edge[1]
+      if D[b]==1: D[a]=1
+  ok = 0
+  for x in nuclList:
+    if x!=lastCh and D[x]==0: return 0
+  return 1
+
+ 
+
+def eulerian(s):
+  nuclCnt,dinuclCnt,List = computeCountAndLists(s)
+  #compute nucleotides appearing in s
+  nuclList = []
+  for x in ["A","C","G","T"]:
+    if x in s: nuclList.append(x)
+  #compute numInList[x] = number of dinucleotides beginning with x
+  numInList = {}
+  for x in nuclList:
+    numInList[x]=0
+    for y in nuclList:
+      numInList[x] += dinuclCnt[x][y]
+  #create dinucleotide shuffle L 
+  firstCh = s[0]  #start with first letter of s
+  lastCh  = s[-1]
+  edgeList = []
+  for x in nuclList:
+    if x!= lastCh: edgeList.append( [x,chooseEdge(x,dinuclCnt)] )
+  ok = connectedToLast(edgeList,nuclList,lastCh)
+  return ok,edgeList,nuclList,lastCh
+
+
+def shuffleEdgeList(L):
+  n = len(L); barrier = n
+  for i in range(n-1):
+    z = int(random.random() * barrier)
+    tmp = L[z]
+    L[z]= L[barrier-1]
+    L[barrier-1] = tmp
+    barrier -= 1
+  return L
+
+def dinuclShuffle(s):
+  ok = 0
+  while not ok:
+    ok,edgeList,nuclList,lastCh = eulerian(s)
+  nuclCnt,dinuclCnt,List = computeCountAndLists(s)
+
+  #remove last edges from each vertex list, shuffle, then add back
+  #the removed edges at end of vertex lists.
+  for [x,y] in edgeList: List[x].remove(y)
+  for x in nuclList: shuffleEdgeList(List[x])
+  for [x,y] in edgeList: List[x].append(y)
+
+  #construct the eulerian path
+  L = [s[0]]; prevCh = s[0]
+  for i in range(len(s)-2):
+    ch = List[prevCh][0] 
+    L.append( ch )
+    del List[prevCh][0]
+    prevCh = ch
+  L.append(s[-1])
+  t = string.join(L,"")
+  return t
+ 
+ 
+
+if __name__ == '__main__':
+  if len(sys.argv)!=3:
+    print "Usage: python altschulEriksonDinuclShuffle.py GCATCGA 5"
+    sys.exit(1)
+  s = sys.argv[1].upper()
+  for i in range(int(sys.argv[2])):
+    print dinuclShuffle(s)
diff -r d1f0f85ee5bc -r 0e221dbd17b2 dreme.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dreme.xml	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,50 @@
+
+  short motif discovery
+  /Users/xuebing/bin/dreme.py -p $input -png     -e $ethresh
+    #if $background_select.bg_select == "fromfile":
+        -n "${bgfile}"
+    #end if
+
+  && mv dreme_out/dreme.html ${html_outfile}
+  
+  && mv dreme_out/dreme.txt ${txt_outfile}
+
+  && mv dreme_out/dreme.xml ${xml_outfile}
+  
+  && rm -rf dreme_out
+  
+  
+  
+            
+     
+    	
+		
+		
+	    
+	    
+		    
+	    
+    
+          
+      
+  
+  
+
+    
+    
+        
+  
+  
+
+**What it does**
+
+http://meme.sdsc.edu/meme/doc/dreme.html
+
+DREME (Discriminative Regular Expression Motif Elicitation) finds relatively short motifs (up to 8 bases) fast, and can perform discriminative motif discovery if given a negative set, consisting of sequences unlikely to contain a motif of interest that is however likely to be found in the main ("positive") sequence set. If you do not provide a negative set the program shuffles the positive set to provide a background (in the role of the negative set).
+
+The input to DREME is one or two sets of DNA sequences. The program uses a Fisher Exact Test to determine significance of each motif found in the postive set as compared with its representation in the negative set, using a significance threshold that may be set on the command line.
+
+DREME achieves its high speed by restricting its search to regular expressions based on the IUPAC alphabet representing bases and ambiguous characters, and by using a heuristic estimate of generalised motifs' statistical significance. 
+
+  
+
diff -r d1f0f85ee5bc -r 0e221dbd17b2 fasta-dinucleotide-shuffle.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta-dinucleotide-shuffle.py	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,223 @@
+#!/usr/bin/python
+
+import sys, string, random
+import sequence
+
+# 
+# turn on psyco to speed up by 3X
+#
+if __name__=='__main__': 
+  try:
+    import psyco
+    #psyco.log()
+    psyco.full()
+    psyco_found = True
+  except ImportError:
+#    psyco_found = False
+    pass
+#  print >> sys.stderr, "psyco_found", psyco_found
+
+
+# altschulEriksonDinuclShuffle.py
+# P. Clote, Oct 2003
+
+def computeCountAndLists(s):
+
+  #Initialize lists and mono- and dinucleotide dictionaries
+  List = {} #List is a dictionary of lists
+  List['A'] = []; List['C'] = [];
+  List['G'] = []; List['T'] = [];
+  # FIXME: is this ok?
+  List['N'] = []
+  nuclList   = ["A","C","G","T","N"]
+  s       = s.upper()
+  #s       = s.replace("U","T")
+  nuclCnt    = {}  #empty dictionary
+  dinuclCnt  = {}  #empty dictionary
+  for x in nuclList:
+    nuclCnt[x]=0
+    dinuclCnt[x]={}
+    for y in nuclList:
+      dinuclCnt[x][y]=0
+
+  #Compute count and lists
+  nuclCnt[s[0]] = 1
+  nuclTotal     = 1
+  dinuclTotal   = 0
+  for i in range(len(s)-1):
+    x = s[i]; y = s[i+1]
+    List[x].append( y )
+    nuclCnt[y] += 1; nuclTotal  += 1
+    dinuclCnt[x][y] += 1; dinuclTotal += 1
+  assert (nuclTotal==len(s))
+  assert (dinuclTotal==len(s)-1)
+  return nuclCnt,dinuclCnt,List
+ 
+ 
+def chooseEdge(x,dinuclCnt):
+  z = random.random()
+  denom=dinuclCnt[x]['A']+dinuclCnt[x]['C']+dinuclCnt[x]['G']+dinuclCnt[x]['T']+dinuclCnt[x]['N']
+  numerator = dinuclCnt[x]['A']
+  if z < float(numerator)/float(denom):
+    dinuclCnt[x]['A'] -= 1
+    return 'A'
+  numerator += dinuclCnt[x]['C']
+  if z < float(numerator)/float(denom):
+    dinuclCnt[x]['C'] -= 1
+    return 'C'
+  numerator += dinuclCnt[x]['G']
+  if z < float(numerator)/float(denom):
+    dinuclCnt[x]['G'] -= 1
+    return 'G'
+  numerator += dinuclCnt[x]['T']
+  if z < float(numerator)/float(denom):
+    dinuclCnt[x]['T'] -= 1
+    return 'T'
+  dinuclCnt[x]['N'] -= 1
+  return 'N'
+
+def connectedToLast(edgeList,nuclList,lastCh):
+  D = {}
+  for x in nuclList: D[x]=0
+  for edge in edgeList:
+    a = edge[0]; b = edge[1]
+    if b==lastCh: D[a]=1
+  for i in range(3):
+    for edge in edgeList:
+      a = edge[0]; b = edge[1]
+      if D[b]==1: D[a]=1
+  ok = 0
+  for x in nuclList:
+    if x!=lastCh and D[x]==0: return 0
+  return 1
+
+def eulerian(s):
+  nuclCnt,dinuclCnt,List = computeCountAndLists(s)
+  #compute nucleotides appearing in s
+  nuclList = []
+  for x in ["A","C","G","T","N"]:
+    if x in s: nuclList.append(x)
+  #create dinucleotide shuffle L 
+  firstCh = s[0]  #start with first letter of s
+  lastCh  = s[-1]
+  edgeList = []
+  for x in nuclList:
+    if x!= lastCh: edgeList.append( [x,chooseEdge(x,dinuclCnt)] )
+  ok = connectedToLast(edgeList,nuclList,lastCh)
+  return ok,edgeList,nuclList,lastCh
+
+
+def shuffleEdgeList(L):
+  n = len(L); barrier = n
+  for i in range(n-1):
+    z = int(random.random() * barrier)
+    tmp = L[z]
+    L[z]= L[barrier-1]
+    L[barrier-1] = tmp
+    barrier -= 1
+  return L
+
+def dinuclShuffle(s):
+  ok = 0
+  while not ok:
+    ok,edgeList,nuclList,lastCh = eulerian(s)
+  nuclCnt,dinuclCnt,List = computeCountAndLists(s)
+
+  #remove last edges from each vertex list, shuffle, then add back
+  #the removed edges at end of vertex lists.
+  for [x,y] in edgeList: List[x].remove(y)
+  for x in nuclList: shuffleEdgeList(List[x])
+  for [x,y] in edgeList: List[x].append(y)
+
+  #construct the eulerian path
+  L = [s[0]]; prevCh = s[0]
+  for i in range(len(s)-2):
+    ch = List[prevCh][0] 
+    L.append( ch )
+    del List[prevCh][0]
+    prevCh = ch
+  L.append(s[-1])
+  t = string.join(L,"")
+  return t
+ 
+def main():
+
+	#
+	# defaults
+	#
+	file_name = None
+	seed = 1
+	copies = 1
+
+	#
+	# get command line arguments
+	#
+	usage = """USAGE: 
+	%s [options]
+
+        -f    file name (required)
+        -t         added to shuffled sequence names
+        -s 	random seed; default: %d
+	-c 		make  shuffled copies of each sequence; default: %d
+        -h              print this usage message
+	""" % (sys.argv[0], seed, copies)
+
+        # no arguments: print usage
+	if len(sys.argv) == 1:
+		print >> sys.stderr, usage; sys.exit(1)
+
+        tag = "";
+
+        # parse command line
+        i = 1
+        while i < len(sys.argv):
+                arg = sys.argv[i]
+                if (arg == "-f"):
+                        i += 1
+                        try: file_name = sys.argv[i]
+                        except: print >> sys.stderr, usage; sys.exit(1)
+                elif (arg == "-t"):
+                        i += 1
+                        try: tag = sys.argv[i]
+                        except: print >> sys.stderr, usage; sys.exit(1)
+                elif (arg == "-s"):
+                        i += 1
+                        try: seed = string.atoi(sys.argv[i])
+                        except: print >> sys.stderr, usage; sys.exit(1)
+                elif (arg == "-c"):
+                        i += 1
+                        try: copies = string.atoi(sys.argv[i])
+                        except: print >> sys.stderr, usage; sys.exit(1)
+                elif (arg == "-h"):
+                        print >> sys.stderr, usage; sys.exit(1)
+                else:
+                        print >> sys.stderr, "Unknown command line argument: " + arg
+                        sys.exit(1)
+                i += 1
+
+        # check that required arguments given
+        if (file_name == None):
+        	print >> sys.stderr, usage; sys.exit(1)
+
+	random.seed(seed)
+
+	# read sequences
+	seqs = sequence.readFASTA(file_name,'Extended DNA')
+
+	for s in seqs:
+		str = s.getString()
+		#FIXME altschul can't handle ambigs
+		name = s.getName()
+
+		#print >> sys.stderr, ">%s" % name
+
+		for i in range(copies):
+
+			shuffledSeq = dinuclShuffle(str)
+
+			if (copies == 1):
+				print >> sys.stdout, ">%s\n%s" % (name+tag, shuffledSeq)
+			else:
+				print >> sys.stdout, ">%s_%d\n%s" % (name+tag, i, shuffledSeq)
+	
+if __name__ == '__main__': main()
diff -r d1f0f85ee5bc -r 0e221dbd17b2 fastamarkov.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastamarkov.xml	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,21 @@
+
+  of DNA sequence
+  cat $input | fasta-get-markov -m $m $norc > $output 2> err.txt
+    
+    
+  
+      
+      
+    
+  
+  
+    
+  
+  
+
+**What it does**
+
+This tool generates a markov model from a fasta sequence file. 
+
+  
+
diff -r d1f0f85ee5bc -r 0e221dbd17b2 fastashuffle1.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastashuffle1.xml	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,17 @@
+
+  preserving mono-nucleotide frequency
+  cat $input | fasta-shuffle-letters > $output  
+  
+     
+  
+  
+    
+  
+  
+
+**Description**
+
+shuffle the position of nucleotides in each sequence, preserving the frequency of nucleotides in that sequence, but do not preserve di-nucleotide frequency. 
+
+  
+
diff -r d1f0f85ee5bc -r 0e221dbd17b2 fastashuffle2.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastashuffle2.xml	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,24 @@
+
+  preserving dinucleotide frequency
+  fasta-dinucleotide-shuffle.py -f $input -t $tag -c $n -s $seed > $output 
+  
+    
+    
+    
+    
+  
+  
+    
+  
+  
+
+**What it does**
+
+This tool shuffles the sequences in the input file but preserves the dinucleotide frequency of each sequence. 
+
+The code implements the Altschul-Erikson dinucleotide shuffle algorithm, described in "Significance of nucleotide sequence alignments: A method for random sequence permutation that preserves dinucleotide and codon usage", S.F. Altschul and B.W. Erikson, Mol. Biol. Evol., 2(6):526--538, 1985. 
+
+Code adapted from http://bioinformatics.bc.edu/clotelab/RNAdinucleotideShuffle/dinucleotideShuffle.html
+
+  
+
diff -r d1f0f85ee5bc -r 0e221dbd17b2 fastqdump.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastqdump.xml	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,18 @@
+
+  convert SRA to FASTQ
+  /Users/xuebing/tools/sratoolkit.2.1.9-mac32/fastq-dump -A $input -M $minReadLen -Z > $out_file1 
+  
+    
+    
+  
+  
+    
+  
+  
+
+**What it does**
+
+This is a wrapper of the fastq-dump tool from sra-toolkit. See http://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software 
+
+  
+
diff -r d1f0f85ee5bc -r 0e221dbd17b2 fimo2-old.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fimo2-old.xml	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,63 @@
+
+  using FIMO
+   fimo 
+    #if $background_select.bg_select == "fromfile":
+    -bgfile $bgfile
+    #end if
+    
+  $norc --output-pthresh $pth --verbosity 1 $motif $database 
+  && mv fimo_out/fimo.html ${html_outfile}
+  
+  && mv fimo_out/fimo.txt ${txt_outfile}
+  
+  && rm -rf fimo_out
+  
+  
+  
+       
+        
+            
+            
+        
+        ##
+        ##    
+        ##        
+        ##        
+        ##        
+        ##    
+        ##
+        
+            
+        
+         
+         
+    
+      
+    
+    	
+		  
+		  
+	    
+	    
+		    
+	    
+    
+      
+      
+    
+  
+  
+    
+    
+  
+  
+
+**What it does**
+
+This tool uses FIMO to find matches of a motif in a fasta file. See more details:
+
+http://meme.sdsc.edu/meme/fimo-intro.html
+ 
+
+  
+
diff -r d1f0f85ee5bc -r 0e221dbd17b2 fimo2.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fimo2.xml	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,47 @@
+
+  using FIMO
+   fimo 
+    #if $background_select.bg_select == "fromfile":
+    -bgfile $bgfile
+    #end if
+    
+  $norc --max-stored-scores 5000000 --output-pthresh $pth --verbosity 1 $motif $database 
+  && mv fimo_out/fimo.html ${html_outfile}
+  
+  && mv fimo_out/fimo.txt ${txt_outfile}
+  
+  && rm -rf fimo_out
+  
+  
+  
+    
+                     
+    
+      
+    
+    	
+		  
+		  
+	    
+	    
+		    
+	    
+    
+      
+      
+    
+  
+  
+    
+    
+  
+  
+
+**What it does**
+
+This tool uses FIMO to find matches of a motif in a fasta file. See more details:
+
+http://meme.sdsc.edu/meme/fimo-intro.html
+ 
+  
+
diff -r d1f0f85ee5bc -r 0e221dbd17b2 fimo2bed.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fimo2bed.py	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,46 @@
+'''
+#pattern name	sequence name	start	stop	score	p-value	q-value	matched sequence
+constitutive-donor	mm9_chr1_39533592_39535592_-	1815	1823	12.032	4.26e-06	0.397	CAGGTAAGT
+constitutive-donor	mm9_chr1_59313750_59315750_+	1889	1897	12.032	4.26e-06	0.397	CAGGTAAGT
+
+#pattern name	sequence name	start	stop	score	p-value	q-value	matched sequence
+constitutive-donor	mm9_chr1_172019075_172021075_-	1947	1955	12.032	4.26e-06	0.843	CAGGTAAGT
+constitutive-donor	mm9_chr1_15300532_15302532_+	156	164	12.032	4.26e-06	0.843	CAGGTAAGT
+'''
+
+import sys
+
+def fimo2bed(filename,rc):
+    '''
+    parse fimo output to make a bed file
+    rc: the sequence have been reverse complemented
+    '''
+    f = open(filename)
+    header = f.readline()
+    for line in f:
+        pattern,posi,begin,stop,score,pv,qv,seq = line.strip().split('\t')
+        flds = posi.split('_')
+        start = flds[-3]
+        end = flds[-2]
+        strand = flds[-1]
+        chrom = '_'.join(flds[1:-3]) #'chrX_random'
+        if not rc:
+            if strand == '+':
+                start1 = str(int(start) + int(begin)-1)
+                end1 = str(int(start) + int(stop))
+                print '\t'.join([chrom,start1,end1,seq,score,strand]) 
+            else:
+                start1 = str(int(end) - int(stop))
+                end1 = str(int(end) - int(begin)+1)
+                print '\t'.join([chrom,start1,end1,seq,score,strand])
+        else:
+            if strand == '-':
+                start1 = str(int(start) + int(begin)-1)
+                end1 = str(int(start) + int(stop))
+                print '\t'.join([chrom,start1,end1,seq,score,'+']) 
+            else:
+                start1 = str(int(end) - int(stop))
+                end1 = str(int(end) - int(begin)+1)
+                print '\t'.join([chrom,start1,end1,seq,score,'-'])      
+
+fimo2bed(sys.argv[1],sys.argv[2]=='rc')                                   
diff -r d1f0f85ee5bc -r 0e221dbd17b2 fimo2bed.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fimo2bed.xml	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,19 @@
+
+  convert FIMO output to BED
+  fimo2bed.py $input $rc > $output
+  
+    
+    
+  
+  
+    
+  
+  
+
+  Only works if your original FIMO input fasta sequences have ids like:: 
+  
+    mm9_chr15_99358448_99360448_-
+  
+  
+  
+
diff -r d1f0f85ee5bc -r 0e221dbd17b2 iupac2meme.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/iupac2meme.xml	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,59 @@
+
+  from one sequence
+  iupac2meme 
+    #if $background_select.bg_select == "fromfile":
+    -bg $bgfile
+    #end if
+  -numseqs $numseqs $logodds $motif > $output 
+  
+      
+      
+    	
+		  
+		  
+	    
+	    
+		    
+	    
+    
+    
+      
+    
+  
+  
+    
+  
+  
+
+**Description**
+
+Convert an IUPAC motif into a MEME version 4 formatted file suitable for use with FIMO and other MEME Suite programs.
+
+See additional information: 
+
+http://meme.sdsc.edu/meme/doc/iupac2meme.html
+
+**IUPAC code**::
+
+    Nucleotide Code:  Base:
+    ----------------  -----
+    A.................Adenine
+    C.................Cytosine
+    G.................Guanine
+    T (or U)..........Thymine (or Uracil)
+    R.................A or G
+    Y.................C or T
+    S.................G or C
+    W.................A or T
+    K.................G or T
+    M.................A or C
+    B.................C or G or T
+    D.................A or G or T
+    H.................A or C or T
+    V.................A or C or G
+    N.................any base
+
+
+
+  
+
diff -r d1f0f85ee5bc -r 0e221dbd17b2 match
Binary file match has changed
diff -r d1f0f85ee5bc -r 0e221dbd17b2 match.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/match.cpp	Sat Mar 31 08:53:06 2012 -0400
@@ -0,0 +1,413 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include