annotate find_in_reference.py @ 3:2429b413d90a draft default tip

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
author jjohnson
date Thu, 12 May 2022 19:30:54 +0000
parents c4fd2ea4f988
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
1 #!/usr/bin/env python3
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
2
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
3
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
4 import os.path
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
5 import sys
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
6 import optparse
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
7
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
8
0
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
9 """
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
10 #
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
11 #------------------------------------------------------------------------------
3
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
12 # University of Minnesota
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
13 # Copyright 2013, Regents of the University of Minnesota
0
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
14 #------------------------------------------------------------------------------
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
15 # Author:
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
16 #
3
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
17 # James E Johnson
0
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
18 #
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
19 #------------------------------------------------------------------------------
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
20 """
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
21
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
22 """
3
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
23 Takes 2 tabular files as input:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
24 1. The file to be filtered
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
25 2. The reference file
0
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
26
3
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
27 The string value of selected column of the input file is searched for
0
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
28 in the string values of the selected column of the reference file.
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
29
3
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
30 The intended purpose is to filter a peptide fasta file in tabular format
0
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
31 by whether those peptide sequences are found in a reference fasta file.
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
32
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
33 """
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
34
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
35
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
36 def __main__():
3
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
37 # Parse Command Line
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
38 parser = optparse.OptionParser()
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
39 parser.add_option('-i', '--input', dest='input', help='The input file to filter. (Otherwise read from stdin)')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
40 parser.add_option('-r', '--reference', dest='reference', help='The reference file to filter against')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
41 parser.add_option('-o', '--output', dest='output', help='The output file for input lines filtered by reference')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
42 parser.add_option('-f', '--filtered', dest='filtered', help='The output file for input lines not in the output')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
43 parser.add_option('-c', '--input_column', dest='input_column', type="int", default=None, help='The column for the value in the input file. (first column = 1, default to last column)')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
44 parser.add_option('-C', '--reference_column', dest='reference_column', type="int", default=None, help='The column for the value in the reference file. (first column = 1, default to last column)')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
45 parser.add_option('-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
46 parser.add_option('-R', '--reverse_find', dest='reverse_find', action="store_true", default=False, help='find the reference string in the input string')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
47 parser.add_option('-B', '--test_reverse', dest='test_reverse', action="store_true", default=False, help='Also search for reversed input string in reference')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
48 parser.add_option('-D', '--test_dna_reverse_complement', dest='test_reverse_comp', action="store_true", default=False, help='Also search for the DNA reverse complement of input string')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
49 parser.add_option('-k', '--keep', dest='keep', action="store_true", default=False, help='')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
50 parser.add_option('-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
51 parser.add_option('-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
52 parser.add_option('-S', '--annotation_col_sep', dest='annotation_col_sep', default=', ', help='separator character between annotation column from the same line')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
53 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
54 (options, args) = parser.parse_args()
2
c4fd2ea4f988 Add the option to test the reversed sequence and the DNA reverse complement of the sequence (ignored if the sequence cannot be interpreted as DNA)
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
55
3
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
56 # revcompl = lambda x: ''.join([{'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'a': 't', 'c': 'g', 'g': 'c', 't': 'a', 'N': 'N', 'n': 'n'}[B] for B in x][: : -1])
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
57
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
58 COMP = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'a': 't', 'c': 'g', 'g': 'c', 't': 'a', 'N': 'N', 'n': 'n'}
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
59
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
60 def revcompl(seq):
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
61 return ''.join([COMP[B] for B in seq][::-1])
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
62
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
63 def test_rcomplement(seq, target):
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
64 if options.test_reverse_comp:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
65 try:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
66 comp = revcompl(seq)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
67 return comp in target
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
68 except Exception:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
69 pass
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
70 return False
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
71
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
72 def test_reverse(seq, target):
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
73 return options.test_reverse and seq and seq[::-1] in target
2
c4fd2ea4f988 Add the option to test the reversed sequence and the DNA reverse complement of the sequence (ignored if the sequence cannot be interpreted as DNA)
Jim Johnson <jj@umn.edu>
parents: 1
diff changeset
74
3
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
75 # Input files
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
76 if options.input is not None:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
77 try:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
78 inputPath = os.path.abspath(options.input)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
79 inputFile = open(inputPath, 'r')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
80 except Exception as e:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
81 print("failed: %s" % e, file=sys.stderr)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
82 exit(2)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
83 else:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
84 inputFile = sys.stdin
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
85 # Reference
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
86 if options.reference is None:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
87 print("failed: reference file is required", file=sys.stderr)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
88 exit(2)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
89 # Output files
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
90 outFile = None
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
91 filteredFile = None
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
92 if options.filtered is None and options.output is None:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
93 # write to stdout
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
94 outFile = sys.stdout
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
95 else:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
96 if options.output is not None:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
97 try:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
98 outPath = os.path.abspath(options.output)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
99 outFile = open(outPath, 'w')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
100 except Exception as e:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
101 print("failed: %s" % e, file=sys.stderr)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
102 exit(3)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
103 if options.filtered is not None:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
104 try:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
105 filteredPath = os.path.abspath(options.filtered)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
106 filteredFile = open(filteredPath, 'w')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
107 except Exception as e:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
108 print("failed: %s" % e, file=sys.stderr)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
109 exit(3)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
110 incol = -1
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
111 if options.input_column and options.input_column > 0:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
112 incol = int(options.input_column)-1
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
113 refcol = -1
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
114 if options.reference_column and options.reference_column > 0:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
115 refcol = int(options.reference_column)-1
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
116 if options.annotation_columns:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
117 annotate = True
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
118 annotation_columns = [int(x) - 1 for x in options.annotation_columns.split(', ')]
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
119 else:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
120 annotate = False
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
121 refFile = None
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
122 num_found = 0
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
123 num_novel = 0
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
124 for ln, line in enumerate(inputFile):
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
125 annotations = []
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
126 try:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
127 found = False
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
128 search_string = line.split('\t')[incol].rstrip('\r\n')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
129 if options.ignore_case:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
130 search_string = search_string.upper()
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
131 if options.debug:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
132 print("search: %s" % (search_string), file=sys.stderr)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
133 refFile = open(options.reference, 'r')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
134 for tn, fline in enumerate(refFile):
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
135 fields = fline.split('\t')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
136 target_string = fields[refcol].rstrip('\r\n')
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
137 if options.ignore_case:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
138 target_string = target_string.upper()
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
139 search = search_string if not options.reverse_find else target_string
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
140 target = target_string if not options.reverse_find else search_string
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
141 if options.debug:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
142 print("in: %s %s %s" % (search, search in target, target), file=sys.stderr)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
143 if search in target or test_reverse(search, target) or test_rcomplement(search, target):
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
144 found = True
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
145 if annotate:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
146 annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns])
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
147 annotations.append(annotation)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
148 else:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
149 break
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
150 if found:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
151 num_found += 1
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
152 if annotate:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
153 line = '%s\t%s\n' % (line.rstrip('\r\n'), options.annotation_separator.join(annotations))
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
154 if options.keep is True:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
155 if outFile:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
156 outFile.write(line)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
157 else:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
158 if filteredFile:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
159 filteredFile.write(line)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
160 else:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
161 num_novel += 1
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
162 if options.keep is True:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
163 if filteredFile:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
164 filteredFile.write(line)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
165 else:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
166 if outFile:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
167 outFile.write(line)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
168 except Exception as e:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
169 print("failed: Error reading %s - %s" % (options.reference, e), file=sys.stderr)
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
170 finally:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
171 if refFile:
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
172 refFile.close()
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
173 print("found: %d novel: %d" % (num_found, num_novel), file=sys.stdout)
0
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
174
e7e56b51d156 Uploaded
jjohnson
parents:
diff changeset
175
3
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
176 if __name__ == "__main__":
2429b413d90a "planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents: 2
diff changeset
177 __main__()