# HG changeset patch # User nml # Date 1505853297 14400 # Node ID 4e03573653fe72e642af3f3416e2e1da8706cb9a # Parent fc0f15ca12e051b659bbd776269de26b8b13b638 planemo upload commit 008f4667b70be22e9ddf496738b3f74bb942ed28 diff -r fc0f15ca12e0 -r 4e03573653fe split_by_allele.py --- a/split_by_allele.py Mon Oct 24 13:15:20 2016 -0400 +++ b/split_by_allele.py Tue Sep 19 16:34:57 2017 -0400 @@ -1,62 +1,63 @@ #!/usr/bin/env python import getopt +import os import sys -import os + from Bio import SeqIO -def split_allele_file(alleles,profiles): - +ERROR_MSG = "Error could not parse out allele name and number from '%s'" + + +def split_allele_file(alleles, profiles): + writers = {} handle = open(alleles, "rU") for record in SeqIO.parse(handle, "fasta"): - - seqid=record.id - - #split out the alelle name from the version number - #attempting to split based on '-' first, if that fails, then '_' + + seqid = record.id + + # split out the alelle name from the version number + # attempting to split based on '-' first, if that fails, then '_' result = seqid.split('_') - - if len(result) !=2: + + if len(result) != 2: result = seqid.split('-') - if len(result) ==2: + if len(result) == 2: newid = '_'.join(result) record.id = newid else: - print "Error could not parse out allele name and number from '%s'" % seqid + print(ERROR_MSG % seqid) exit(0) - - - name,num = result + name, num = result - #if writer exist, then write to that current fasta file + # if writer exist, then write to that current fasta file if name in writers: SeqIO.write(record, writers[name], "fasta") else: - #new allele found, create new writer and add the first record + # new allele found, create new writer and add the first record file_name = name + '.fasta' output_fh = open(file_name, "w") SeqIO.write(record, output_fh, "fasta") writers[name] = output_fh - + handle.close() - #creat config file based on the alleles found - with open('config.txt','w') as cfile: + # create config file based on the alleles found + with open('config.txt', 'w') as cfile: cfile.write("[loci]\n") - for name, writer in writers.iteritems() : + for name, writer in writers.items(): path = os.path.realpath(writer.name) - cfile.write("%s\t%s\n" % (name,path)) + cfile.write("%s\t%s\n" % (name, path)) cfile.write("[profile]\n") cfile.write("profile\t%s\n" % profiles) - return -alleles=None -profiles=None +alleles = None +profiles = None """Input arguments""" options, remainder = getopt.getopt(sys.argv[1:], '', [ @@ -71,5 +72,4 @@ profiles = arg if alleles and profiles: - split_allele_file(alleles,profiles) - + split_allele_file(alleles, profiles) diff -r fc0f15ca12e0 -r 4e03573653fe stringmlst.xml --- a/stringmlst.xml Mon Oct 24 13:15:20 2016 -0400 +++ b/stringmlst.xml Tue Sep 19 16:34:57 2017 -0400 @@ -1,7 +1,8 @@ - + k-mer tool for multilocus sequence typing - stringmlst + stringMLST + biopython diff -r fc0f15ca12e0 -r 4e03573653fe test-data/results.tsv --- a/test-data/results.tsv Mon Oct 24 13:15:20 2016 -0400 +++ b/test-data/results.tsv Tue Sep 19 16:34:57 2017 -0400 @@ -1,2 +1,2 @@ Sample abcZ bglA cat dapE dat ldh lhkA ST -answer 2 1 11 3 3 1 7 5 +input 2 1 11 3 3 1 7 5 diff -r fc0f15ca12e0 -r 4e03573653fe tool_dependencies.xml --- a/tool_dependencies.xml Mon Oct 24 13:15:20 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ - - - - - -