Mercurial > repos > nml > stringmlst
comparison split_by_allele.py @ 1:4e03573653fe draft default tip
planemo upload commit 008f4667b70be22e9ddf496738b3f74bb942ed28
author | nml |
---|---|
date | Tue, 19 Sep 2017 16:34:57 -0400 |
parents | fc0f15ca12e0 |
children |
comparison
equal
deleted
inserted
replaced
0:fc0f15ca12e0 | 1:4e03573653fe |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 import getopt | 2 import getopt |
3 import os | |
3 import sys | 4 import sys |
4 import os | 5 |
5 from Bio import SeqIO | 6 from Bio import SeqIO |
6 | 7 |
7 def split_allele_file(alleles,profiles): | 8 ERROR_MSG = "Error could not parse out allele name and number from '%s'" |
8 | 9 |
10 | |
11 def split_allele_file(alleles, profiles): | |
12 | |
9 writers = {} | 13 writers = {} |
10 | 14 |
11 handle = open(alleles, "rU") | 15 handle = open(alleles, "rU") |
12 for record in SeqIO.parse(handle, "fasta"): | 16 for record in SeqIO.parse(handle, "fasta"): |
13 | 17 |
14 seqid=record.id | 18 seqid = record.id |
15 | 19 |
16 #split out the alelle name from the version number | 20 # split out the alelle name from the version number |
17 #attempting to split based on '-' first, if that fails, then '_' | 21 # attempting to split based on '-' first, if that fails, then '_' |
18 result = seqid.split('_') | 22 result = seqid.split('_') |
19 | 23 |
20 if len(result) !=2: | 24 if len(result) != 2: |
21 result = seqid.split('-') | 25 result = seqid.split('-') |
22 if len(result) ==2: | 26 if len(result) == 2: |
23 newid = '_'.join(result) | 27 newid = '_'.join(result) |
24 record.id = newid | 28 record.id = newid |
25 else: | 29 else: |
26 print "Error could not parse out allele name and number from '%s'" % seqid | 30 print(ERROR_MSG % seqid) |
27 exit(0) | 31 exit(0) |
28 | |
29 | |
30 name,num = result | |
31 | 32 |
33 name, num = result | |
32 | 34 |
33 #if writer exist, then write to that current fasta file | 35 # if writer exist, then write to that current fasta file |
34 if name in writers: | 36 if name in writers: |
35 SeqIO.write(record, writers[name], "fasta") | 37 SeqIO.write(record, writers[name], "fasta") |
36 else: | 38 else: |
37 #new allele found, create new writer and add the first record | 39 # new allele found, create new writer and add the first record |
38 file_name = name + '.fasta' | 40 file_name = name + '.fasta' |
39 output_fh = open(file_name, "w") | 41 output_fh = open(file_name, "w") |
40 SeqIO.write(record, output_fh, "fasta") | 42 SeqIO.write(record, output_fh, "fasta") |
41 writers[name] = output_fh | 43 writers[name] = output_fh |
42 | 44 |
43 handle.close() | 45 handle.close() |
44 | 46 |
45 #creat config file based on the alleles found | 47 # create config file based on the alleles found |
46 with open('config.txt','w') as cfile: | 48 with open('config.txt', 'w') as cfile: |
47 cfile.write("[loci]\n") | 49 cfile.write("[loci]\n") |
48 for name, writer in writers.iteritems() : | 50 for name, writer in writers.items(): |
49 path = os.path.realpath(writer.name) | 51 path = os.path.realpath(writer.name) |
50 cfile.write("%s\t%s\n" % (name,path)) | 52 cfile.write("%s\t%s\n" % (name, path)) |
51 cfile.write("[profile]\n") | 53 cfile.write("[profile]\n") |
52 cfile.write("profile\t%s\n" % profiles) | 54 cfile.write("profile\t%s\n" % profiles) |
53 | 55 |
54 | |
55 return | 56 return |
56 | 57 |
57 | 58 |
58 alleles=None | 59 alleles = None |
59 profiles=None | 60 profiles = None |
60 | 61 |
61 """Input arguments""" | 62 """Input arguments""" |
62 options, remainder = getopt.getopt(sys.argv[1:], '', [ | 63 options, remainder = getopt.getopt(sys.argv[1:], '', [ |
63 'alleles=', | 64 'alleles=', |
64 'profiles=' | 65 'profiles=' |
69 alleles = arg | 70 alleles = arg |
70 elif opt in ('--profiles'): | 71 elif opt in ('--profiles'): |
71 profiles = arg | 72 profiles = arg |
72 | 73 |
73 if alleles and profiles: | 74 if alleles and profiles: |
74 split_allele_file(alleles,profiles) | 75 split_allele_file(alleles, profiles) |
75 |