annotate tools/mytools/shuffleSequenceUsingAltschulErikson.txt @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #! /usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 #shuffleSequenceUsingAltschulErikson.py
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 #P. Clote, Oct 2003
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 #------------------------------------------------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 #Input RNAs in FASTA file, and compute NUM many shufflings of RNA sequence
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 #using Altman-Erikson randomly shuffled dinucleotide method.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 #------------------------------------------------------------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 PRINT = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 LINELEN = 70
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 import sys,os,stats,string
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 from altschulEriksonDinuclShuffle import dinuclShuffle
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 import computeRNAfoldEnergyForRNAsInFile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 def file2string(fileName):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 file = open(fileName,"r")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 L = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 line = file.readline()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 while line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 while line[0]==">": # treat lines beginning with '>' as comment and skip
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 line = file.readline()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 line = line[:-1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 L.append(line)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 line = file.readline()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 text = string.join(L,"")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 return text
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 def main(fileName,NUM):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 seq = file2string(fileName)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 for i in range(NUM):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 shuffledSeq = dinuclShuffle(seq)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 sys.stdout.write(">%d\n" % (i+1))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 sys.stdout.write("%s\n" % shuffledSeq)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 if __name__ == '__main__':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 if len(sys.argv) < 3 :
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 print "Usage: %s RNAs.faa NUM" % sys.argv[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 text = """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 1) RNA.faa is FASTA file of ONE RNA sequence
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 2) NUM is number of random sequences to generate by
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 shuffling the dinucleotides of RNAs input
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 Script to compute Altman-Erikson randomly shuffled dinucleotides.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 print text
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 sys.exit(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 fileName = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 NUM = int(sys.argv[2])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 main(fileName,NUM)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61