comparison fromgtfTobed12.py @ 1:6fd4b3b90220 draft default tip

planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 15b8c2cc83708044413a152322bcbfca8a74d29a
author lldelisle
date Fri, 03 Nov 2023 14:13:51 +0000
parents 418e4d0fe0bd
children
comparison
equal deleted inserted replaced
0:418e4d0fe0bd 1:6fd4b3b90220
21 "feature in your GTF file. You may want to use the " 21 "feature in your GTF file. You may want to use the "
22 "`disable_infer_transcripts=True` option to speed up " 22 "`disable_infer_transcripts=True` option to speed up "
23 "database creation") 23 "database creation")
24 24
25 25
26 def convert_gtf_to_bed(fn, fo, useGene, mergeTranscripts, 26 def convert_gtf_to_bed(fn, fo, preferedName, mergeTranscripts,
27 mergeTranscriptsAndOverlappingExons, ucsc): 27 mergeTranscriptsAndOverlappingExons, ucsc):
28 db = gffutils.create_db(fn, ':memory:') 28 db = gffutils.create_db(fn, ':memory:')
29 # For each transcript: 29 # For each transcript:
30 prefered_name = "transcript_name" 30 if preferedName is not None:
31 if useGene or mergeTranscripts or mergeTranscriptsAndOverlappingExons: 31 prefered_name = preferedName
32 elif mergeTranscripts or mergeTranscriptsAndOverlappingExons:
32 prefered_name = "gene_name" 33 prefered_name = "gene_name"
34 else:
35 prefered_name = "transcript_name"
33 if mergeTranscripts or mergeTranscriptsAndOverlappingExons: 36 if mergeTranscripts or mergeTranscriptsAndOverlappingExons:
34 all_items = db.features_of_type("gene", order_by='start') 37 all_items = db.features_of_type("gene", order_by='start')
35 else: 38 else:
36 all_items = db.features_of_type("transcript", order_by='start') 39 all_items = db.features_of_type("transcript", order_by='start')
37 for tr in all_items: 40 for tr in all_items:
125 argp.add_argument('input', default=None, 128 argp.add_argument('input', default=None,
126 help="Input gtf file (can be gzip).") 129 help="Input gtf file (can be gzip).")
127 argp.add_argument('--output', default=sys.stdout, 130 argp.add_argument('--output', default=sys.stdout,
128 type=argparse.FileType('w'), 131 type=argparse.FileType('w'),
129 help="Output bed12 file.") 132 help="Output bed12 file.")
130 argp.add_argument('--useGene', action="store_true",
131 help="Use the gene name instead of the "
132 "transcript name.")
133 argp.add_argument('--ucscformat', action="store_true", 133 argp.add_argument('--ucscformat', action="store_true",
134 help="If you want that all chromosome names " 134 help="If you want that all chromosome names "
135 "begin with 'chr'.") 135 "begin with 'chr'.")
136 argp.add_argument('--preferedName', default=None,
137 help="Name to use for bed output.")
136 group = argp.add_mutually_exclusive_group() 138 group = argp.add_mutually_exclusive_group()
137 group.add_argument('--mergeTranscripts', action="store_true", 139 group.add_argument('--mergeTranscripts', action="store_true",
138 help="Merge all transcripts into a single " 140 help="Merge all transcripts into a single "
139 "entry to have one line per gene.") 141 "entry to have one line per gene.")
140 group.add_argument('--mergeTranscriptsAndOverlappingExons', 142 group.add_argument('--mergeTranscriptsAndOverlappingExons',
142 help="Merge all transcripts into a single " 144 help="Merge all transcripts into a single "
143 "entry to have one line per gene and merge" 145 "entry to have one line per gene and merge"
144 " overlapping exons.") 146 " overlapping exons.")
145 147
146 args = argp.parse_args() 148 args = argp.parse_args()
147 convert_gtf_to_bed(args.input, args.output, args.useGene, 149 convert_gtf_to_bed(args.input, args.output, args.preferedName,
148 args.mergeTranscripts, 150 args.mergeTranscripts,
149 args.mergeTranscriptsAndOverlappingExons, 151 args.mergeTranscriptsAndOverlappingExons,
150 args.ucscformat) 152 args.ucscformat)