Mercurial > repos > jjohnson > find_in_reference
comparison find_in_reference.py @ 1:e83e0ce8fb68
Add option to reverse the search, find reference field in input field
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Wed, 13 Aug 2014 15:01:33 -0500 |
parents | e7e56b51d156 |
children | c4fd2ea4f988 |
comparison
equal
deleted
inserted
replaced
0:e7e56b51d156 | 1:e83e0ce8fb68 |
---|---|
39 parser.add_option( '-o', '--output', dest='output', help='The output file for input lines filtered by reference') | 39 parser.add_option( '-o', '--output', dest='output', help='The output file for input lines filtered by reference') |
40 parser.add_option( '-f', '--filtered', dest='filtered', help='The output file for input lines not in the output') | 40 parser.add_option( '-f', '--filtered', dest='filtered', help='The output file for input lines not in the output') |
41 parser.add_option('-c','--input_column', dest='input_column', default=None, help='The column for the value in the input file. (first column = 1, default to last column)') | 41 parser.add_option('-c','--input_column', dest='input_column', default=None, help='The column for the value in the input file. (first column = 1, default to last column)') |
42 parser.add_option('-C','--reference_column', dest='reference_column', default=None, help='The column for the value in the reference file. (first column = 1, default to last column)') | 42 parser.add_option('-C','--reference_column', dest='reference_column', default=None, help='The column for the value in the reference file. (first column = 1, default to last column)') |
43 parser.add_option( '-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive' ) | 43 parser.add_option( '-I', '--case_insensitive', dest='ignore_case', action="store_true", default=False, help='case insensitive' ) |
44 parser.add_option( '-R', '--reverse_find', dest='reverse_find', action="store_true", default=False, help='find the reference string in the input string' ) | |
44 parser.add_option( '-k', '--keep', dest='keep', action="store_true", default=False, help='' ) | 45 parser.add_option( '-k', '--keep', dest='keep', action="store_true", default=False, help='' ) |
45 parser.add_option( '-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference' ) | 46 parser.add_option( '-a', '--annotation_columns', dest='annotation_columns', default=None, help='If string is found, add these columns from reference' ) |
46 parser.add_option( '-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines' ) | 47 parser.add_option( '-s', '--annotation_separator', dest='annotation_separator', default=';', help='separator character between annotations from different lines' ) |
47 parser.add_option( '-S', '--annotation_col_sep', dest='annotation_col_sep', default=',', help='separator character between annotation column from the same line' ) | 48 parser.add_option( '-S', '--annotation_col_sep', dest='annotation_col_sep', default=',', help='separator character between annotation column from the same line' ) |
48 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' ) | 49 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stdout' ) |
106 if options.debug: | 107 if options.debug: |
107 print >> sys.stderr, "search: %s" % (search_string) | 108 print >> sys.stderr, "search: %s" % (search_string) |
108 refFile = open(options.reference,'r') | 109 refFile = open(options.reference,'r') |
109 for tn,fline in enumerate(refFile): | 110 for tn,fline in enumerate(refFile): |
110 fields = fline.split('\t') | 111 fields = fline.split('\t') |
111 target_string =fields[refcol] | 112 target_string = fields[refcol].rstrip('\r\n') |
112 if options.ignore_case: | 113 if options.ignore_case: |
113 target_string = target_string.upper() | 114 target_string = target_string.upper() |
115 search = search_string if not options.reverse_find else target_string | |
116 target = target_string if not options.reverse_find else search_string | |
114 if options.debug: | 117 if options.debug: |
115 print >> sys.stderr, "in: %s %s %s" % (search_string,search_string in target_string,target_string) | 118 print >> sys.stderr, "in: %s %s %s" % (search,search in target,target) |
116 if search_string in target_string: | 119 if search in target: |
117 found = True | 120 found = True |
118 if annotate: | 121 if annotate: |
119 annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns]) | 122 annotation = options.annotation_col_sep.join([fields[i] for i in annotation_columns]) |
120 annotations.append(annotation) | 123 annotations.append(annotation) |
121 else: | 124 else: |