Mercurial > repos > nml > stringmlst
comparison split_by_allele.py @ 0:fc0f15ca12e0 draft
planemo upload commit 0366addb646f1ddea484915abdeda939d7d49bd5
author | nml |
---|---|
date | Mon, 24 Oct 2016 13:15:20 -0400 |
parents | |
children | 4e03573653fe |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fc0f15ca12e0 |
---|---|
1 #!/usr/bin/env python | |
2 import getopt | |
3 import sys | |
4 import os | |
5 from Bio import SeqIO | |
6 | |
7 def split_allele_file(alleles,profiles): | |
8 | |
9 writers = {} | |
10 | |
11 handle = open(alleles, "rU") | |
12 for record in SeqIO.parse(handle, "fasta"): | |
13 | |
14 seqid=record.id | |
15 | |
16 #split out the alelle name from the version number | |
17 #attempting to split based on '-' first, if that fails, then '_' | |
18 result = seqid.split('_') | |
19 | |
20 if len(result) !=2: | |
21 result = seqid.split('-') | |
22 if len(result) ==2: | |
23 newid = '_'.join(result) | |
24 record.id = newid | |
25 else: | |
26 print "Error could not parse out allele name and number from '%s'" % seqid | |
27 exit(0) | |
28 | |
29 | |
30 name,num = result | |
31 | |
32 | |
33 #if writer exist, then write to that current fasta file | |
34 if name in writers: | |
35 SeqIO.write(record, writers[name], "fasta") | |
36 else: | |
37 #new allele found, create new writer and add the first record | |
38 file_name = name + '.fasta' | |
39 output_fh = open(file_name, "w") | |
40 SeqIO.write(record, output_fh, "fasta") | |
41 writers[name] = output_fh | |
42 | |
43 handle.close() | |
44 | |
45 #creat config file based on the alleles found | |
46 with open('config.txt','w') as cfile: | |
47 cfile.write("[loci]\n") | |
48 for name, writer in writers.iteritems() : | |
49 path = os.path.realpath(writer.name) | |
50 cfile.write("%s\t%s\n" % (name,path)) | |
51 cfile.write("[profile]\n") | |
52 cfile.write("profile\t%s\n" % profiles) | |
53 | |
54 | |
55 return | |
56 | |
57 | |
58 alleles=None | |
59 profiles=None | |
60 | |
61 """Input arguments""" | |
62 options, remainder = getopt.getopt(sys.argv[1:], '', [ | |
63 'alleles=', | |
64 'profiles=' | |
65 ]) | |
66 | |
67 for opt, arg in options: | |
68 if opt in ('--alleles'): | |
69 alleles = arg | |
70 elif opt in ('--profiles'): | |
71 profiles = arg | |
72 | |
73 if alleles and profiles: | |
74 split_allele_file(alleles,profiles) | |
75 |