# HG changeset patch # User artbio # Date 1508110210 14400 # Node ID fffdb903f2d1dd498754c267c7464f569e3e9379 # Parent dfcdac284538387114f682a9548680a1a5a944e2 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blast_unmatched commit 1db05fc1c4849528c99b149d482eb34d3a80f22e diff -r dfcdac284538 -r fffdb903f2d1 blast_unmatched.py --- a/blast_unmatched.py Thu Oct 05 05:11:01 2017 -0400 +++ b/blast_unmatched.py Sun Oct 15 19:30:10 2017 -0400 @@ -3,21 +3,24 @@ import optparse import re + def parse_options(): """ Parse the options guiven to the script """ parser = optparse.OptionParser(description='Get unmatched blast queries') - parser.add_option('-f','--fasta', dest='fasta_file', help='Query fasta file\ -used during blast') - parser.add_option('-b','--blast', dest='blast_file', help='Blast tabular\ -output (queries in 1rst column)') - parser.add_option('-o','--output', dest='output_file', help='Output file name') + parser.add_option('-f', '--fasta', dest='fasta_file', + help='Query fasta file used during blast') + parser.add_option('-b', '--blast', dest='blast_file', + help='Blast tabular output (queries in 1rst column)') + parser.add_option('-o', '--output', dest='output_file', + help='Output file name') (options, args) = parser.parse_args() if len(args) > 0: parser.error('Wrong number of arguments') return options + def get_matched(blast_file): """ Get a dictionary of all the queries that got a match @@ -30,6 +33,7 @@ matched[query_id] = 1 return matched + def get_unmatched(output_file, fasta_file, matched): """ Compares matched queries to query fasta file and print unmatched to ouput @@ -40,8 +44,8 @@ with open(fasta_file, 'r') as infile: for line in infile: if line.startswith('>'): - subline = line[1:].rstrip() #qid are 100chars long in blast - while end.match(subline) != None: + subline = line[1:].rstrip() # qid are 100chars long in blast + if end.match(subline) is not None: subline = subline[:-1] if subline not in matched: output_file_handle.write(line) @@ -52,10 +56,12 @@ output_file_handle.write(line) output_file_handle.close() + def __main__(): opts = parse_options() matched = get_matched(opts.blast_file) get_unmatched(opts.output_file, opts.fasta_file, matched) + if __main__(): __main__() diff -r dfcdac284538 -r fffdb903f2d1 blast_unmatched.xml --- a/blast_unmatched.xml Thu Oct 05 05:11:01 2017 -0400 +++ b/blast_unmatched.xml Sun Oct 15 19:30:10 2017 -0400 @@ -1,4 +1,4 @@ - + get query sequences that didn't get a match during a blast