Mercurial > repos > jjohnson > find_in_reference
comparison find_in_reference.py @ 2:c4fd2ea4f988
Add the option to test the reversed sequence and the DNA reverse complement of the sequence (ignored if the sequence cannot be interpreted as DNA)
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 13 Nov 2014 14:09:50 -0600 |
parents | e83e0ce8fb68 |
children | 2429b413d90a |
comparison
equal
deleted
inserted
replaced
1:e83e0ce8fb68 | 2:c4fd2ea4f988 |
---|---|
40 parser.add_option( '-f', '--filtered', dest='filtered', help='The output file for input lines not in the output') | 40 parser.add_option( '-f', '--filtered', dest='filtered', help='The output file for input lines not in the output') |
41 parser.add_option('-c','--input_column', dest='input_column', default=None, help='The column for the value in the input file. (first column = 1, default to last column)') | 41 parser.add_option('-c','--input_column', dest='input_column', default=None, help='The column for the value in the input file. (first column = 1, default to last column)') |
42 parser.add_option('-C','--reference_column', dest='reference_column', default=None, help='The column for the value in the reference file. (first column = 1, default to last column)') | 42 parser.add_option('-C','--reference_column', dest='reference_column', default=None, help='The column for the value in the reference file. (first column = 1, default to last column)') |
43 parser.add_option( '-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive' ) | 43 parser.add_option( '-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive' ) |
44 parser.add_option( '-R', '--reverse_find', dest='reverse_find', action="store_true", default=False, help='find the reference string in the input string' ) | 44 parser.add_option( '-R', '--reverse_find', dest='reverse_find', action="store_true", default=False, help='find the reference string in the input string' ) |
45 parser.add_option( '-B', '--test_reverse', dest='test_reverse', action="store_true", default=False, help='Also search for reversed input string in reference' ) | |
46 parser.add_option( '-D', '--test_dna_reverse_complement', dest='test_reverse_comp', action="store_true", default=False, help='Also search for the DNA reverse complement of input string' ) | |
45 parser.add_option( '-k', '--keep', dest='keep', action="store_true", default=False, help='' ) | 47 parser.add_option( '-k', '--keep', dest='keep', action="store_true", default=False, help='' ) |
46 parser.add_option( '-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference' ) | 48 parser.add_option( '-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference' ) |
47 parser.add_option( '-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines' ) | 49 parser.add_option( '-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines' ) |
48 parser.add_option( '-S', '--annotation_col_sep', dest='annotation_col_sep', default=',', help='separator character between annotation column from the same line' ) | 50 parser.add_option( '-S', '--annotation_col_sep', dest='annotation_col_sep', default=',', help='separator character between annotation column from the same line' ) |
49 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' ) | 51 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' ) |
50 (options, args) = parser.parse_args() | 52 (options, args) = parser.parse_args() |
53 | |
54 revcompl = lambda x: ''.join([{'A':'T','C':'G','G':'C','T':'A','a':'t','c':'g','g':'c','t':'a','N':'N','n':'n'}[B] for B in x][::-1]) | |
55 def test_rcomplement(seq, target): | |
56 if options.test_reverse_comp: | |
57 try: | |
58 comp = revcompl(seq) | |
59 return comp in target | |
60 except: | |
61 pass | |
62 return False | |
63 | |
64 def test_reverse(seq,target): | |
65 return options.test_reverse and seq and seq[::-1] in target | |
66 | |
51 # Input files | 67 # Input files |
52 if options.input != None: | 68 if options.input != None: |
53 try: | 69 try: |
54 inputPath = os.path.abspath(options.input) | 70 inputPath = os.path.abspath(options.input) |
55 inputFile = open(inputPath, 'r') | 71 inputFile = open(inputPath, 'r') |
114 target_string = target_string.upper() | 130 target_string = target_string.upper() |
115 search = search_string if not options.reverse_find else target_string | 131 search = search_string if not options.reverse_find else target_string |
116 target = target_string if not options.reverse_find else search_string | 132 target = target_string if not options.reverse_find else search_string |
117 if options.debug: | 133 if options.debug: |
118 print >> sys.stderr, "in: %s %s %s" % (search,search in target,target) | 134 print >> sys.stderr, "in: %s %s %s" % (search,search in target,target) |
119 if search in target: | 135 if search in target or test_reverse(search,target) or test_rcomplement(search,target): |
120 found = True | 136 found = True |
121 if annotate: | 137 if annotate: |
122 annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns]) | 138 annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns]) |
123 annotations.append(annotation) | 139 annotations.append(annotation) |
124 else: | 140 else: |