Mercurial > repos > jackcurragh > trips_viz_create_annotation
comparison trips_create_annotation/create_annotation_sqlite.py @ 3:f1c72ed4b32c draft
Uploaded
author | jackcurragh |
---|---|
date | Wed, 20 Apr 2022 15:18:02 +0000 |
parents | d70696d3341e |
children | cdecd5f9a4d3 |
comparison
equal
deleted
inserted
replaced
2:d70696d3341e | 3:f1c72ed4b32c |
---|---|
4 # All start codon positions (including cds_start) should be at the first nucleotide of the codon | 4 # All start codon positions (including cds_start) should be at the first nucleotide of the codon |
5 # All stop codon positions (including cds_stop) should be at the last nucleotide of the codon | 5 # All stop codon positions (including cds_stop) should be at the last nucleotide of the codon |
6 import sys | 6 import sys |
7 import re | 7 import re |
8 import sqlite3 | 8 import sqlite3 |
9 import subprocess | |
10 | |
9 from intervaltree import Interval, IntervalTree | 11 from intervaltree import Interval, IntervalTree |
10 import itertools | 12 import itertools |
11 from sqlitedict import SqliteDict | 13 from sqlitedict import SqliteDict |
12 import os | 14 import os |
13 | 15 |
22 user_transcript_id = sys.argv[5] | 24 user_transcript_id = sys.argv[5] |
23 # An example of a gene name from the annotation file | 25 # An example of a gene name from the annotation file |
24 user_gene_name = sys.argv[6] | 26 user_gene_name = sys.argv[6] |
25 # Set to true if transcript version is included in transcript_id, e.g: ENST000000123456.1 | 27 # Set to true if transcript version is included in transcript_id, e.g: ENST000000123456.1 |
26 TRAN_VERSION = True | 28 TRAN_VERSION = True |
29 output = sys.argv[7] | |
27 | 30 |
28 | 31 |
29 if os.path.isfile("{}.sqlite".format(organism)): | 32 if os.path.isfile("{}.sqlite".format(organism)): |
30 print("{}.sqlite already exists".format(organism)) | 33 print("{}.sqlite already exists".format(organism)) |
31 sys.exit() | 34 sys.exit() |
608 tree_dict[chrom] = IntervalTree.from_tuples(genomic_cds_dict[chrom]) | 611 tree_dict[chrom] = IntervalTree.from_tuples(genomic_cds_dict[chrom]) |
609 | 612 |
610 # print (list(tree_dict)) | 613 # print (list(tree_dict)) |
611 | 614 |
612 | 615 |
613 connection = sqlite3.connect("{}.sqlite".format(organism)) | 616 connection = sqlite3.connect(output) |
614 cursor = connection.cursor() | 617 cursor = connection.cursor() |
615 cursor.execute( | 618 cursor.execute( |
616 "CREATE TABLE IF NOT EXISTS transcripts (transcript VARCHAR(50), gene VARCHAR(50), length INT(6), cds_start INT(6), cds_stop INT(6), sequence VARCHAR(50000), strand CHAR(1), stop_list VARCHAR(10000), start_list VARCHAR(10000), exon_junctions VARCHAR(1000), tran_type INT(1), gene_type INT(1), principal INT(1), version INT(2),gc INT(3),five_gc INT(3), cds_gc INT(3), three_gc INT(3), chrom VARCHAR(20));" | 619 "CREATE TABLE IF NOT EXISTS transcripts (transcript VARCHAR(50), gene VARCHAR(50), length INT(6), cds_start INT(6), cds_stop INT(6), sequence VARCHAR(50000), strand CHAR(1), stop_list VARCHAR(10000), start_list VARCHAR(10000), exon_junctions VARCHAR(1000), tran_type INT(1), gene_type INT(1), principal INT(1), version INT(2),gc INT(3),five_gc INT(3), cds_gc INT(3), three_gc INT(3), chrom VARCHAR(20));" |
617 ) | 620 ) |
618 cursor.execute( | 621 cursor.execute( |
967 cursor.execute( | 970 cursor.execute( |
968 "INSERT INTO coding_regions VALUES('{}',{},{});".format( | 971 "INSERT INTO coding_regions VALUES('{}',{},{});".format( |
969 transcript, tup[0], tup[1] | 972 transcript, tup[0], tup[1] |
970 ) | 973 ) |
971 ) | 974 ) |
972 | 975 # print(cursor.execute( |
973 connection.commit() | 976 # ".tables" |
974 connection.close() | 977 # )) |
975 | 978 |
976 print("delim", delimiters) | 979 print("delim", delimiters) |
977 if (nuc_dict["starts"]["other"] / nuc_dict["starts"]["starts"]) > 0.05: | 980 if (nuc_dict["starts"]["other"] / nuc_dict["starts"]["starts"]) > 0.05: |
978 print( | 981 print( |
979 "Warning: {} transcripts do not have a an AUG at the CDS start position".format( | 982 "Warning: {} transcripts do not have a an AUG at the CDS start position".format( |
990 print( | 993 print( |
991 "Warning: {} transcripts were in the fasta file, but not the annotation file, these will be discarded".format( | 994 "Warning: {} transcripts were in the fasta file, but not the annotation file, these will be discarded".format( |
992 len(notinannotation) | 995 len(notinannotation) |
993 ) | 996 ) |
994 ) | 997 ) |
998 | |
999 | |
1000 | |
1001 connection.commit() | |
1002 connection.close() | |
1003 | |
1004 |