comparison trips_bam_to_sqlite/bam_to_sqlite.py @ 2:c8d8675697c6 draft

Uploaded
author jackcurragh
date Wed, 20 Apr 2022 15:18:00 +0000
parents 3ac12b611d7f
children
comparison
equal deleted inserted replaced
1:3ac12b611d7f 2:c8d8675697c6
226 three_ok = True 226 three_ok = True
227 227
228 return (pos_modifier, readlen_modifier, mismatches) 228 return (pos_modifier, readlen_modifier, mismatches)
229 229
230 230
231 def process_bam(bam_filepath, transcriptome_info_dict_path, outputfile): 231 def process_bam(bam_filepath, transcriptome_info_dict_path, outputfile, desc):
232 desc = "NULL" 232 desc = desc
233 start_time = time.time() 233 start_time = time.time()
234 study_dict = {} 234 study_dict = {}
235 nuc_count_dict = {"mapped": {}, "unmapped": {}} 235 nuc_count_dict = {"mapped": {}, "unmapped": {}}
236 dinuc_count_dict = {} 236 dinuc_count_dict = {}
237 threeprime_nuc_count_dict = {"mapped": {}, "unmapped": {}} 237 threeprime_nuc_count_dict = {"mapped": {}, "unmapped": {}}
287 master_trip_dict = {"fiveprime": {}, "threeprime": {}} 287 master_trip_dict = {"fiveprime": {}, "threeprime": {}}
288 master_offset_dict = {"fiveprime": {}, "threeprime": {}} 288 master_offset_dict = {"fiveprime": {}, "threeprime": {}}
289 master_metagene_stop_dict = {"fiveprime": {}, "threeprime": {}} 289 master_metagene_stop_dict = {"fiveprime": {}, "threeprime": {}}
290 290
291 os.system(f'samtools sort -n {bam_filepath} -o {bam_filepath}_n_sorted.bam') 291 os.system(f'samtools sort -n {bam_filepath} -o {bam_filepath}_n_sorted.bam')
292 pysam.set_verbosity(0)
292 infile = pysam.Samfile(f"{bam_filepath}_n_sorted.bam", "rb") 293 infile = pysam.Samfile(f"{bam_filepath}_n_sorted.bam", "rb")
293 header = infile.header["HD"] 294 header = infile.header["HD"]
294 unsorted = False 295 unsorted = False
295 if "SO" in header: 296 if "SO" in header:
296 if header["SO"] != "queryname": 297 if header["SO"] != "queryname":
719 "Usage: python bam_to_sqlite.py <path_to_bam_file> <path_to_organism.sqlite> <file_description (optional)>" 720 "Usage: python bam_to_sqlite.py <path_to_bam_file> <path_to_organism.sqlite> <file_description (optional)>"
720 ) 721 )
721 sys.exit() 722 sys.exit()
722 bam_filepath = sys.argv[1] 723 bam_filepath = sys.argv[1]
723 annotation_sqlite_filepath = sys.argv[2] 724 annotation_sqlite_filepath = sys.argv[2]
724 # try: 725 try:
725 # desc = sys.argv[3] 726 desc = sys.argv[3]
726 # except: 727 except:
727 # desc = bam_filepath.split("/")[-1] 728 desc = bam_filepath.split("/")[-1]
728 outputfile = bam_filepath + "v2.sqlite" 729
729 process_bam(bam_filepath, annotation_sqlite_filepath, outputfile) 730 outputfile = sys.argv[4]
731 process_bam(bam_filepath, annotation_sqlite_filepath, outputfile, desc)