Mercurial > repos > galaxyp > pep_pointer
annotate pep_pointer.py @ 3:a6282baa8c6f draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
author | galaxyp |
---|---|
date | Mon, 20 Jun 2022 13:59:52 +0000 |
parents | 073a2965e3b2 |
children |
rev | line source |
---|---|
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
1 |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
2 # |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
3 # Author: Praveen Kumar |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
4 # Updated: April 6th, 2018 (updated to python3: May 2022) |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
5 # |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
6 # |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
7 # |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
8 |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
9 import re |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
10 |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
11 |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
12 def main(): |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
13 import sys |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
14 if len(sys.argv) == 4: |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
15 inputFile = sys.argv |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
16 infh = open(inputFile[1], "r") |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
17 # infh = open("Mus_musculus.GRCm38.90.chr.gtf", "r") |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
18 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
19 gtf = {} |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
20 gtf_transcript = {} |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
21 gtf_gene = {} |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
22 for each in infh.readlines(): |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
23 a = each.split("\t") |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
24 if re.search("^[^#]", each): |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
25 if re.search("gene_biotype \"protein_coding\"", a[8]) and int(a[4].strip()) != int(a[3].strip()): |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
26 type = a[2].strip() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
27 if type == "gene" or type == "exon" or type == "CDS" or type == "five_prime_utr" or type == "three_prime_utr": |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
28 chr = "chr" + a[0].strip() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
29 strand = a[6].strip() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
30 if strand == "+": |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
31 start = a[3].strip() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
32 end = a[4].strip() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
33 elif strand == "-": |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
34 if int(a[4].strip()) > int(a[3].strip()): |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
35 start = a[3].strip() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
36 end = a[4].strip() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
37 elif int(a[4].strip()) < int(a[3].strip()): |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
38 start = a[4].strip() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
39 end = a[3].strip() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
40 else: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
41 print("Please check the start end coordinates in the GTF file") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
42 else: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
43 print("Please check the strand information in the GTF file. It should be '+' or '-'.") |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
44 if strand not in gtf: |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
45 gtf[strand] = {} |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
46 if type not in gtf[strand]: |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
47 gtf[strand][type] = [] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
48 b = re.search("gene_id \"(.+?)\";", a[8].strip()) |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
49 gene = b.group(1) |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
50 if type == "gene": |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
51 transcript = "" |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
52 else: |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
53 b = re.search("transcript_id \"(.+?)\";", a[8].strip()) |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
54 transcript = b.group(1) |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
55 data = (chr, start, end, gene, transcript, strand, type) |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
56 gtf[strand][type].append(data) |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
57 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
58 if type == "exon": |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
59 if chr + "#" + strand in gtf_transcript: |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
60 if transcript + "#" + gene in gtf_transcript[chr + "#" + strand]: |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
61 gtf_transcript[chr + "#" + strand][transcript + "#" + gene][0].append(int(start)) |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
62 gtf_transcript[chr + "#" + strand][transcript + "#" + gene][1].append(int(end)) |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
63 else: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
64 gtf_transcript[chr + "#" + strand][transcript + "#" + gene] = [[], []] |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
65 gtf_transcript[chr + "#" + strand][transcript + "#" + gene][0].append(int(start)) |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
66 gtf_transcript[chr + "#" + strand][transcript + "#" + gene][1].append(int(end)) |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
67 else: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
68 gtf_transcript[chr + "#" + strand] = {} |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
69 gtf_transcript[chr + "#" + strand][transcript + "#" + gene] = [[], []] |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
70 gtf_transcript[chr + "#" + strand][transcript + "#" + gene][0].append(int(start)) |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
71 gtf_transcript[chr + "#" + strand][transcript + "#" + gene][1].append(int(end)) |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
72 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
73 if type == "gene": |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
74 if chr + "#" + strand in gtf_gene: |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
75 gtf_gene[chr + "#" + strand][0].append(int(start)) |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
76 gtf_gene[chr + "#" + strand][1].append(int(end)) |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
77 gtf_gene[chr + "#" + strand][2].append(gene) |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
78 else: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
79 gtf_gene[chr + "#" + strand] = [[0], [0], ["no_gene"]] |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
80 gtf_gene[chr + "#" + strand][0].append(int(start)) |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
81 gtf_gene[chr + "#" + strand][1].append(int(end)) |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
82 gtf_gene[chr + "#" + strand][2].append(gene) |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
83 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
84 # "Starting Reading Intron . . ." |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
85 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
86 gtf["+"]["intron"] = [] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
87 gtf["-"]["intron"] = [] |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
88 for chr_strand in gtf_transcript.keys(): |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
89 chr = chr_strand.split("#")[0] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
90 strand = chr_strand.split("#")[1] |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
91 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
92 for transcript_gene in gtf_transcript[chr_strand].keys(): |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
93 start_list = gtf_transcript[chr_strand][transcript_gene][0] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
94 end_list = gtf_transcript[chr_strand][transcript_gene][1] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
95 sorted_start_index = [i[0] for i in sorted(enumerate(start_list), key=lambda x:x[1])] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
96 sorted_end_index = [i[0] for i in sorted(enumerate(end_list), key=lambda x:x[1])] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
97 if sorted_start_index == sorted_end_index: |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
98 sorted_start = sorted(start_list) |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
99 sorted_end = [end_list[i] for i in sorted_start_index] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
100 for x in range(len(sorted_start))[1:]: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
101 intron_start = sorted_end[x - 1] + 1 |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
102 intron_end = sorted_start[x] - 1 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
103 transcript = transcript_gene.split("#")[0] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
104 gene = transcript_gene.split("#")[1] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
105 data = (chr, str(intron_start), str(intron_end), gene, transcript, strand, "intron") |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
106 gtf[strand]["intron"].append(data) |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
107 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
108 # "Starting Reading Intergenic . . ." |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
109 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
110 gtf["+"]["intergenic"] = [] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
111 gtf["-"]["intergenic"] = [] |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
112 for chr_strand in gtf_gene.keys(): |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
113 chr = chr_strand.split("#")[0] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
114 strand = chr_strand.split("#")[1] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
115 start_list = gtf_gene[chr_strand][0] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
116 end_list = gtf_gene[chr_strand][1] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
117 gene_list = gtf_gene[chr_strand][2] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
118 sorted_start_index = [i[0] for i in sorted(enumerate(start_list), key=lambda x:x[1])] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
119 sorted_end_index = [i[0] for i in sorted(enumerate(end_list), key=lambda x:x[1])] |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
120 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
121 sorted_start = sorted(start_list) |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
122 sorted_end = [end_list[i] for i in sorted_start_index] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
123 sorted_gene = [gene_list[i] for i in sorted_start_index] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
124 for x in range(len(sorted_start))[1:]: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
125 intergene_start = sorted_end[x - 1] + 1 |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
126 intergene_end = sorted_start[x] - 1 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
127 if intergene_start < intergene_end: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
128 intergene_1 = sorted_gene[x - 1] |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
129 intergene_2 = sorted_gene[x] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
130 gene = intergene_1 + "-#-" + intergene_2 |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
131 data = (chr, str(intergene_start), str(intergene_end), gene, "", strand, "intergenic") |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
132 gtf[strand]["intergenic"].append(data) |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
133 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
134 import sqlite3 |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
135 # conn = sqlite3.connect('gtf_database.db') |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
136 conn = sqlite3.connect(":memory:") |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
137 c = conn.cursor() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
138 # c.execute("DROP TABLE IF EXISTS gtf_data;") |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
139 # c.execute("CREATE TABLE IF NOT EXISTS gtf_data(chr text, start int, end int, gene text, transcript text, strand text, type text)") |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
140 c.execute("CREATE TABLE gtf_data(chr text, start int, end int, gene text, transcript text, strand text, type text)") |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
141 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
142 for strand in gtf.keys(): |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
143 if strand not in ["+", "-"]: |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
144 print("Please check the strand information in the GTF file. It should be '+' or '-'.") |
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
145 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
146 for type in gtf[strand].keys(): |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
147 data = gtf[strand][type] |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
148 c.executemany('INSERT INTO gtf_data VALUES (?,?,?,?,?,?,?)', data) |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
149 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
150 conn.commit() |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
151 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
152 infh = open(inputFile[2], "r") |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
153 # infh = open("Mouse_Data_All_peptides_withNewDBs.txt", "r") |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
154 data = infh.readlines() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
155 # output file |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
156 outfh = open(inputFile[3], 'w') |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
157 # outfh = open("classified_1_Mouse_Data_All_peptides_withNewDBs.txt", "w") |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
158 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
159 for each in data: |
2
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
160 a = each.strip().split("\t") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
161 chr = a[0].strip() |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
162 pep_start = str(int(a[1].strip()) + 1) |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
163 pep_end = a[2].strip() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
164 strand = a[5].strip() |
2
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
165 each = "\t".join(a[:6]) |
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
166 if (len(a) == 12 and int(a[9]) == 1) or (len(a) == 6): |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
167 c.execute("select * from gtf_data where type = 'CDS' and chr = '" + chr + "' and start <= " + pep_start + " and end >= " + pep_end + " and strand = '" + strand + "' ") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
168 rows = c.fetchall() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
169 if len(rows) > 0: |
2
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
170 outfh.write(each.strip() + "\tCDS\n") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
171 else: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
172 c.execute("select * from gtf_data where type = 'five_prime_utr' and chr = '" + chr + "' and start <= " + pep_start + " and end >= " + pep_end + " and strand = '" + strand + "' ") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
173 rows = c.fetchall() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
174 if len(rows) > 0: |
2
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
175 outfh.write(each.strip() + "\tfive_prime_utr\n") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
176 else: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
177 c.execute("select * from gtf_data where type = 'three_prime_utr' and chr = '" + chr + "' and start <= " + pep_start + " and end >= " + pep_end + " and strand = '" + strand + "' ") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
178 rows = c.fetchall() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
179 if len(rows) > 0: |
2
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
180 outfh.write(each.strip() + "\tthree_prime_utr\n") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
181 else: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
182 c.execute("select * from gtf_data where type = 'exon' and chr = '" + chr + "' and start <= " + pep_start + " and end >= " + pep_end + " and strand = '" + strand + "' ") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
183 rows = c.fetchall() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
184 if len(rows) > 0: |
2
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
185 outfh.write(each.strip() + "\texon\n") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
186 else: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
187 c.execute("select * from gtf_data where type = 'intron' and chr = '" + chr + "' and start <= " + pep_start + " and end >= " + pep_end + " and strand = '" + strand + "' ") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
188 rows = c.fetchall() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
189 if len(rows) > 0: |
2
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
190 outfh.write(each.strip() + "\tintron\n") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
191 else: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
192 c.execute("select * from gtf_data where type = 'gene' and chr = '" + chr + "' and start <= " + pep_start + " and end >= " + pep_end + " and strand = '" + strand + "' ") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
193 rows = c.fetchall() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
194 if len(rows) > 0: |
2
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
195 outfh.write(each.strip() + "\tgene\n") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
196 else: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
197 c.execute("select * from gtf_data where type = 'intergenic' and chr = '" + chr + "' and start <= " + pep_start + " and end >= " + pep_end + " and strand = '" + strand + "' ") |
2
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
198 rows = c.fetchall() |
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
199 if len(rows) > 0: |
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
200 outfh.write(each.strip() + "\tintergene\n") |
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
201 else: |
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
202 outfh.write(each.strip() + "\tOVERLAPPING_ON_TWO_REGIONS: PLEASE_LOOK_MANUALLY (Will be updated in next version)\n") |
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
203 elif (len(a) == 12 and int(a[9]) == 2): |
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
204 outfh.write(each.strip() + "\tSpliceJunction\n") |
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
205 else: |
073a2965e3b2
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents:
0
diff
changeset
|
206 outfh.write(each.strip() + "\tPlease check\n") |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
207 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
208 conn.close() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
209 outfh.close() |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
210 else: |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
211 print("USAGE: python pep_pointer.py <input GTF file> <input tblastn file> <name of output file>") |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
212 return None |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
213 |
3
a6282baa8c6f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit 494bc6dd87b9a6e2af40cb32aa5d2ee6e9bfebfc
galaxyp
parents:
2
diff
changeset
|
214 |
0
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
215 if __name__ == "__main__": |
149ed6a9680f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff
changeset
|
216 main() |