Mercurial > repos > drosofff > msp_fasta_tabular_converter
diff fasta_tabular_converter.py @ 0:951cb6b3979b draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author | drosofff |
---|---|
date | Sun, 21 Jun 2015 14:28:49 -0400 |
parents | |
children | 2f7278120be9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_tabular_converter.py Sun Jun 21 14:28:49 2015 -0400 @@ -0,0 +1,88 @@ +#!/usr/bin/python +# +import sys +from collections import defaultdict + +def readfasta_writetabular(fasta, tabular): + F = open(fasta, "r") + for line in F: + if line[0] == ">": continue + else: + seqdic[line[:-1]] += 1 + F.close() + F = open(tabular, "w") + for seq in sorted(seqdic, key=seqdic.get, reverse=True): + print >> F, "%s\t%s" % (seq, seqdic[seq]) + F.close() + + +def readtabular_writefasta(tabular, fasta): + F = open(tabular, "r") + Fw = open(fasta, "w") + counter = 0 + for line in F: + fields = line.split() + for i in range(int(fields[1])): + counter += 1 + print >> Fw, ">%s\n%s" % (counter, fields[0]) + F.close() + Fw.close() + +def readtabular_writefastaweighted (tabular, fasta): + F = open(tabular, "r") + Fw = open(fasta, "w") + counter = 0 + for line in F: + counter += 1 + fields = line[:-1].split() + print >> Fw, ">%s_%s\n%s" % (counter, fields[1], fields[0]) + F.close() + Fw.close() + +def readfastaeighted_writefastaweighted(fastaweigthed_input, fastaweigthed_reparsed): + F = open(fastaweigthed_input, "r") + number_reads = 0 + for line in F: + if line[0] == ">": + weigth = int(line[1:-1].split("_")[-1]) + number_reads += weigth + else: + seqdic[line[:-1]] += weigth + F.close() + F = open(fastaweigthed_reparsed, "w") + n=0 + for seq in sorted(seqdic, key=seqdic.get, reverse=True): + n += 1 + print >> F, ">%s_%s\n%s" % (n, seqdic[seq], seq) + F.close() + print "%s reads collapsed" % number_reads + +def readfastaeighted_writefasta(fastaweigthed, fasta): + F = open(fastaweigthed, "r") + Fw = open(fasta, "w") + counter = 0 + for line in F: + if line[0] == ">": + weigth = int(line[1:-1].split("_")[-1]) + else: + seq = line[:-1] + for i in range (weigth): + counter += 1 + print >> Fw, ">%s\n%s" % (counter, seq) + F.close() + Fw.close() + + +seqdic = defaultdict(int) +option = sys.argv[3] + +if option == "fasta2tabular": + readfasta_writetabular(sys.argv[1], sys.argv[2]) +elif option == "tabular2fasta": + readtabular_writefasta(sys.argv[1], sys.argv[2]) +elif option == "tabular2fastaweight": + readtabular_writefastaweighted (sys.argv[1], sys.argv[2]) +elif option == "fastaweight2fastaweight": + readfastaeighted_writefastaweighted(sys.argv[1], sys.argv[2]) +elif option == "fastaweight2fasta": + readfastaeighted_writefasta(sys.argv[1], sys.argv[2])