Mercurial > repos > drosofff > msp_fasta_tabular_converter
comparison fasta_tabular_converter.py @ 0:951cb6b3979b draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
| author | drosofff |
|---|---|
| date | Sun, 21 Jun 2015 14:28:49 -0400 |
| parents | |
| children | 2f7278120be9 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:951cb6b3979b |
|---|---|
| 1 #!/usr/bin/python | |
| 2 # | |
| 3 import sys | |
| 4 from collections import defaultdict | |
| 5 | |
| 6 def readfasta_writetabular(fasta, tabular): | |
| 7 F = open(fasta, "r") | |
| 8 for line in F: | |
| 9 if line[0] == ">": continue | |
| 10 else: | |
| 11 seqdic[line[:-1]] += 1 | |
| 12 F.close() | |
| 13 F = open(tabular, "w") | |
| 14 for seq in sorted(seqdic, key=seqdic.get, reverse=True): | |
| 15 print >> F, "%s\t%s" % (seq, seqdic[seq]) | |
| 16 F.close() | |
| 17 | |
| 18 | |
| 19 def readtabular_writefasta(tabular, fasta): | |
| 20 F = open(tabular, "r") | |
| 21 Fw = open(fasta, "w") | |
| 22 counter = 0 | |
| 23 for line in F: | |
| 24 fields = line.split() | |
| 25 for i in range(int(fields[1])): | |
| 26 counter += 1 | |
| 27 print >> Fw, ">%s\n%s" % (counter, fields[0]) | |
| 28 F.close() | |
| 29 Fw.close() | |
| 30 | |
| 31 def readtabular_writefastaweighted (tabular, fasta): | |
| 32 F = open(tabular, "r") | |
| 33 Fw = open(fasta, "w") | |
| 34 counter = 0 | |
| 35 for line in F: | |
| 36 counter += 1 | |
| 37 fields = line[:-1].split() | |
| 38 print >> Fw, ">%s_%s\n%s" % (counter, fields[1], fields[0]) | |
| 39 F.close() | |
| 40 Fw.close() | |
| 41 | |
| 42 def readfastaeighted_writefastaweighted(fastaweigthed_input, fastaweigthed_reparsed): | |
| 43 F = open(fastaweigthed_input, "r") | |
| 44 number_reads = 0 | |
| 45 for line in F: | |
| 46 if line[0] == ">": | |
| 47 weigth = int(line[1:-1].split("_")[-1]) | |
| 48 number_reads += weigth | |
| 49 else: | |
| 50 seqdic[line[:-1]] += weigth | |
| 51 F.close() | |
| 52 F = open(fastaweigthed_reparsed, "w") | |
| 53 n=0 | |
| 54 for seq in sorted(seqdic, key=seqdic.get, reverse=True): | |
| 55 n += 1 | |
| 56 print >> F, ">%s_%s\n%s" % (n, seqdic[seq], seq) | |
| 57 F.close() | |
| 58 print "%s reads collapsed" % number_reads | |
| 59 | |
| 60 def readfastaeighted_writefasta(fastaweigthed, fasta): | |
| 61 F = open(fastaweigthed, "r") | |
| 62 Fw = open(fasta, "w") | |
| 63 counter = 0 | |
| 64 for line in F: | |
| 65 if line[0] == ">": | |
| 66 weigth = int(line[1:-1].split("_")[-1]) | |
| 67 else: | |
| 68 seq = line[:-1] | |
| 69 for i in range (weigth): | |
| 70 counter += 1 | |
| 71 print >> Fw, ">%s\n%s" % (counter, seq) | |
| 72 F.close() | |
| 73 Fw.close() | |
| 74 | |
| 75 | |
| 76 seqdic = defaultdict(int) | |
| 77 option = sys.argv[3] | |
| 78 | |
| 79 if option == "fasta2tabular": | |
| 80 readfasta_writetabular(sys.argv[1], sys.argv[2]) | |
| 81 elif option == "tabular2fasta": | |
| 82 readtabular_writefasta(sys.argv[1], sys.argv[2]) | |
| 83 elif option == "tabular2fastaweight": | |
| 84 readtabular_writefastaweighted (sys.argv[1], sys.argv[2]) | |
| 85 elif option == "fastaweight2fastaweight": | |
| 86 readfastaeighted_writefastaweighted(sys.argv[1], sys.argv[2]) | |
| 87 elif option == "fastaweight2fasta": | |
| 88 readfastaeighted_writefasta(sys.argv[1], sys.argv[2]) |
