Mercurial > repos > drosofff > msp_fasta_tabular_converter
comparison fasta_tabular_converter.py @ 0:951cb6b3979b draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author | drosofff |
---|---|
date | Sun, 21 Jun 2015 14:28:49 -0400 |
parents | |
children | 2f7278120be9 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:951cb6b3979b |
---|---|
1 #!/usr/bin/python | |
2 # | |
3 import sys | |
4 from collections import defaultdict | |
5 | |
6 def readfasta_writetabular(fasta, tabular): | |
7 F = open(fasta, "r") | |
8 for line in F: | |
9 if line[0] == ">": continue | |
10 else: | |
11 seqdic[line[:-1]] += 1 | |
12 F.close() | |
13 F = open(tabular, "w") | |
14 for seq in sorted(seqdic, key=seqdic.get, reverse=True): | |
15 print >> F, "%s\t%s" % (seq, seqdic[seq]) | |
16 F.close() | |
17 | |
18 | |
19 def readtabular_writefasta(tabular, fasta): | |
20 F = open(tabular, "r") | |
21 Fw = open(fasta, "w") | |
22 counter = 0 | |
23 for line in F: | |
24 fields = line.split() | |
25 for i in range(int(fields[1])): | |
26 counter += 1 | |
27 print >> Fw, ">%s\n%s" % (counter, fields[0]) | |
28 F.close() | |
29 Fw.close() | |
30 | |
31 def readtabular_writefastaweighted (tabular, fasta): | |
32 F = open(tabular, "r") | |
33 Fw = open(fasta, "w") | |
34 counter = 0 | |
35 for line in F: | |
36 counter += 1 | |
37 fields = line[:-1].split() | |
38 print >> Fw, ">%s_%s\n%s" % (counter, fields[1], fields[0]) | |
39 F.close() | |
40 Fw.close() | |
41 | |
42 def readfastaeighted_writefastaweighted(fastaweigthed_input, fastaweigthed_reparsed): | |
43 F = open(fastaweigthed_input, "r") | |
44 number_reads = 0 | |
45 for line in F: | |
46 if line[0] == ">": | |
47 weigth = int(line[1:-1].split("_")[-1]) | |
48 number_reads += weigth | |
49 else: | |
50 seqdic[line[:-1]] += weigth | |
51 F.close() | |
52 F = open(fastaweigthed_reparsed, "w") | |
53 n=0 | |
54 for seq in sorted(seqdic, key=seqdic.get, reverse=True): | |
55 n += 1 | |
56 print >> F, ">%s_%s\n%s" % (n, seqdic[seq], seq) | |
57 F.close() | |
58 print "%s reads collapsed" % number_reads | |
59 | |
60 def readfastaeighted_writefasta(fastaweigthed, fasta): | |
61 F = open(fastaweigthed, "r") | |
62 Fw = open(fasta, "w") | |
63 counter = 0 | |
64 for line in F: | |
65 if line[0] == ">": | |
66 weigth = int(line[1:-1].split("_")[-1]) | |
67 else: | |
68 seq = line[:-1] | |
69 for i in range (weigth): | |
70 counter += 1 | |
71 print >> Fw, ">%s\n%s" % (counter, seq) | |
72 F.close() | |
73 Fw.close() | |
74 | |
75 | |
76 seqdic = defaultdict(int) | |
77 option = sys.argv[3] | |
78 | |
79 if option == "fasta2tabular": | |
80 readfasta_writetabular(sys.argv[1], sys.argv[2]) | |
81 elif option == "tabular2fasta": | |
82 readtabular_writefasta(sys.argv[1], sys.argv[2]) | |
83 elif option == "tabular2fastaweight": | |
84 readtabular_writefastaweighted (sys.argv[1], sys.argv[2]) | |
85 elif option == "fastaweight2fastaweight": | |
86 readfastaeighted_writefastaweighted(sys.argv[1], sys.argv[2]) | |
87 elif option == "fastaweight2fasta": | |
88 readfastaeighted_writefasta(sys.argv[1], sys.argv[2]) |