comparison find_in_reference.py @ 1:e83e0ce8fb68

Add option to reverse the search, find reference field in input field
author Jim Johnson <jj@umn.edu>
date Wed, 13 Aug 2014 15:01:33 -0500
parents e7e56b51d156
children c4fd2ea4f988
comparison
equal deleted inserted replaced
0:e7e56b51d156 1:e83e0ce8fb68
39 parser.add_option( '-o', '--output', dest='output', help='The output file for input lines filtered by reference') 39 parser.add_option( '-o', '--output', dest='output', help='The output file for input lines filtered by reference')
40 parser.add_option( '-f', '--filtered', dest='filtered', help='The output file for input lines not in the output') 40 parser.add_option( '-f', '--filtered', dest='filtered', help='The output file for input lines not in the output')
41 parser.add_option('-c','--input_column', dest='input_column', default=None, help='The column for the value in the input file. (first column = 1, default to last column)') 41 parser.add_option('-c','--input_column', dest='input_column', default=None, help='The column for the value in the input file. (first column = 1, default to last column)')
42 parser.add_option('-C','--reference_column', dest='reference_column', default=None, help='The column for the value in the reference file. (first column = 1, default to last column)') 42 parser.add_option('-C','--reference_column', dest='reference_column', default=None, help='The column for the value in the reference file. (first column = 1, default to last column)')
43 parser.add_option( '-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive' ) 43 parser.add_option( '-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive' )
44 parser.add_option( '-R', '--reverse_find', dest='reverse_find', action="store_true", default=False, help='find the reference string in the input string' )
44 parser.add_option( '-k', '--keep', dest='keep', action="store_true", default=False, help='' ) 45 parser.add_option( '-k', '--keep', dest='keep', action="store_true", default=False, help='' )
45 parser.add_option( '-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference' ) 46 parser.add_option( '-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference' )
46 parser.add_option( '-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines' ) 47 parser.add_option( '-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines' )
47 parser.add_option( '-S', '--annotation_col_sep', dest='annotation_col_sep', default=',', help='separator character between annotation column from the same line' ) 48 parser.add_option( '-S', '--annotation_col_sep', dest='annotation_col_sep', default=',', help='separator character between annotation column from the same line' )
48 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' ) 49 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' )
106 if options.debug: 107 if options.debug:
107 print >> sys.stderr, "search: %s" % (search_string) 108 print >> sys.stderr, "search: %s" % (search_string)
108 refFile = open(options.reference,'r') 109 refFile = open(options.reference,'r')
109 for tn,fline in enumerate(refFile): 110 for tn,fline in enumerate(refFile):
110 fields = fline.split('\t') 111 fields = fline.split('\t')
111 target_string =fields[refcol] 112 target_string = fields[refcol].rstrip('\r\n')
112 if options.ignore_case: 113 if options.ignore_case:
113 target_string = target_string.upper() 114 target_string = target_string.upper()
115 search = search_string if not options.reverse_find else target_string
116 target = target_string if not options.reverse_find else search_string
114 if options.debug: 117 if options.debug:
115 print >> sys.stderr, "in: %s %s %s" % (search_string,search_string in target_string,target_string) 118 print >> sys.stderr, "in: %s %s %s" % (search,search in target,target)
116 if search_string in target_string: 119 if search in target:
117 found = True 120 found = True
118 if annotate: 121 if annotate:
119 annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns]) 122 annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns])
120 annotations.append(annotation) 123 annotations.append(annotation)
121 else: 124 else: