Mercurial > repos > jjohnson > find_in_reference
annotate find_in_reference.py @ 3:2429b413d90a draft default tip
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
author | jjohnson |
---|---|
date | Thu, 12 May 2022 19:30:54 +0000 |
parents | c4fd2ea4f988 |
children |
rev | line source |
---|---|
3
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
1 #!/usr/bin/env python3 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
2 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
3 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
4 import os.path |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
5 import sys |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
6 import optparse |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
7 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
8 |
0 | 9 """ |
10 # | |
11 #------------------------------------------------------------------------------ | |
3
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
12 # University of Minnesota |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
13 # Copyright 2013, Regents of the University of Minnesota |
0 | 14 #------------------------------------------------------------------------------ |
15 # Author: | |
16 # | |
3
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
17 # James E Johnson |
0 | 18 # |
19 #------------------------------------------------------------------------------ | |
20 """ | |
21 | |
22 """ | |
3
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
23 Takes 2 tabular files as input: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
24 1. The file to be filtered |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
25 2. The reference file |
0 | 26 |
3
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
27 The string value of selected column of the input file is searched for |
0 | 28 in the string values of the selected column of the reference file. |
29 | |
3
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
30 The intended purpose is to filter a peptide fasta file in tabular format |
0 | 31 by whether those peptide sequences are found in a reference fasta file. |
32 | |
33 """ | |
34 | |
35 | |
36 def __main__(): | |
3
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
37 # Parse Command Line |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
38 parser = optparse.OptionParser() |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
39 parser.add_option('-i', '--input', dest='input', help='The input file to filter. (Otherwise read from stdin)') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
40 parser.add_option('-r', '--reference', dest='reference', help='The reference file to filter against') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
41 parser.add_option('-o', '--output', dest='output', help='The output file for input lines filtered by reference') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
42 parser.add_option('-f', '--filtered', dest='filtered', help='The output file for input lines not in the output') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
43 parser.add_option('-c', '--input_column', dest='input_column', type="int", default=None, help='The column for the value in the input file. (first column = 1, default to last column)') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
44 parser.add_option('-C', '--reference_column', dest='reference_column', type="int", default=None, help='The column for the value in the reference file. (first column = 1, default to last column)') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
45 parser.add_option('-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
46 parser.add_option('-R', '--reverse_find', dest='reverse_find', action="store_true", default=False, help='find the reference string in the input string') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
47 parser.add_option('-B', '--test_reverse', dest='test_reverse', action="store_true", default=False, help='Also search for reversed input string in reference') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
48 parser.add_option('-D', '--test_dna_reverse_complement', dest='test_reverse_comp', action="store_true", default=False, help='Also search for the DNA reverse complement of input string') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
49 parser.add_option('-k', '--keep', dest='keep', action="store_true", default=False, help='') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
50 parser.add_option('-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
51 parser.add_option('-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
52 parser.add_option('-S', '--annotation_col_sep', dest='annotation_col_sep', default=', ', help='separator character between annotation column from the same line') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
53 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
54 (options, args) = parser.parse_args() |
2
c4fd2ea4f988
Add the option to test the reversed sequence and the DNA reverse complement of the sequence (ignored if the sequence cannot be interpreted as DNA)
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
55 |
3
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
56 # revcompl = lambda x: ''.join([{'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'a': 't', 'c': 'g', 'g': 'c', 't': 'a', 'N': 'N', 'n': 'n'}[B] for B in x][: : -1]) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
57 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
58 COMP = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'a': 't', 'c': 'g', 'g': 'c', 't': 'a', 'N': 'N', 'n': 'n'} |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
59 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
60 def revcompl(seq): |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
61 return ''.join([COMP[B] for B in seq][::-1]) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
62 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
63 def test_rcomplement(seq, target): |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
64 if options.test_reverse_comp: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
65 try: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
66 comp = revcompl(seq) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
67 return comp in target |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
68 except Exception: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
69 pass |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
70 return False |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
71 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
72 def test_reverse(seq, target): |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
73 return options.test_reverse and seq and seq[::-1] in target |
2
c4fd2ea4f988
Add the option to test the reversed sequence and the DNA reverse complement of the sequence (ignored if the sequence cannot be interpreted as DNA)
Jim Johnson <jj@umn.edu>
parents:
1
diff
changeset
|
74 |
3
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
75 # Input files |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
76 if options.input is not None: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
77 try: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
78 inputPath = os.path.abspath(options.input) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
79 inputFile = open(inputPath, 'r') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
80 except Exception as e: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
81 print("failed: %s" % e, file=sys.stderr) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
82 exit(2) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
83 else: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
84 inputFile = sys.stdin |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
85 # Reference |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
86 if options.reference is None: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
87 print("failed: reference file is required", file=sys.stderr) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
88 exit(2) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
89 # Output files |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
90 outFile = None |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
91 filteredFile = None |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
92 if options.filtered is None and options.output is None: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
93 # write to stdout |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
94 outFile = sys.stdout |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
95 else: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
96 if options.output is not None: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
97 try: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
98 outPath = os.path.abspath(options.output) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
99 outFile = open(outPath, 'w') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
100 except Exception as e: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
101 print("failed: %s" % e, file=sys.stderr) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
102 exit(3) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
103 if options.filtered is not None: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
104 try: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
105 filteredPath = os.path.abspath(options.filtered) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
106 filteredFile = open(filteredPath, 'w') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
107 except Exception as e: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
108 print("failed: %s" % e, file=sys.stderr) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
109 exit(3) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
110 incol = -1 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
111 if options.input_column and options.input_column > 0: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
112 incol = int(options.input_column)-1 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
113 refcol = -1 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
114 if options.reference_column and options.reference_column > 0: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
115 refcol = int(options.reference_column)-1 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
116 if options.annotation_columns: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
117 annotate = True |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
118 annotation_columns = [int(x) - 1 for x in options.annotation_columns.split(', ')] |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
119 else: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
120 annotate = False |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
121 refFile = None |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
122 num_found = 0 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
123 num_novel = 0 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
124 for ln, line in enumerate(inputFile): |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
125 annotations = [] |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
126 try: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
127 found = False |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
128 search_string = line.split('\t')[incol].rstrip('\r\n') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
129 if options.ignore_case: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
130 search_string = search_string.upper() |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
131 if options.debug: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
132 print("search: %s" % (search_string), file=sys.stderr) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
133 refFile = open(options.reference, 'r') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
134 for tn, fline in enumerate(refFile): |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
135 fields = fline.split('\t') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
136 target_string = fields[refcol].rstrip('\r\n') |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
137 if options.ignore_case: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
138 target_string = target_string.upper() |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
139 search = search_string if not options.reverse_find else target_string |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
140 target = target_string if not options.reverse_find else search_string |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
141 if options.debug: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
142 print("in: %s %s %s" % (search, search in target, target), file=sys.stderr) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
143 if search in target or test_reverse(search, target) or test_rcomplement(search, target): |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
144 found = True |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
145 if annotate: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
146 annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns]) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
147 annotations.append(annotation) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
148 else: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
149 break |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
150 if found: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
151 num_found += 1 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
152 if annotate: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
153 line = '%s\t%s\n' % (line.rstrip('\r\n'), options.annotation_separator.join(annotations)) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
154 if options.keep is True: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
155 if outFile: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
156 outFile.write(line) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
157 else: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
158 if filteredFile: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
159 filteredFile.write(line) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
160 else: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
161 num_novel += 1 |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
162 if options.keep is True: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
163 if filteredFile: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
164 filteredFile.write(line) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
165 else: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
166 if outFile: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
167 outFile.write(line) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
168 except Exception as e: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
169 print("failed: Error reading %s - %s" % (options.reference, e), file=sys.stderr) |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
170 finally: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
171 if refFile: |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
172 refFile.close() |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
173 print("found: %d novel: %d" % (num_found, num_novel), file=sys.stdout) |
0 | 174 |
175 | |
3
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
176 if __name__ == "__main__": |
2429b413d90a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/find_in_reference commit 074e95e1b598ec41f0e18a2798b00cf65e9b399e-dirty"
jjohnson
parents:
2
diff
changeset
|
177 __main__() |