Mercurial > repos > galaxyp > cravatool
annotate cravatp_submit.py @ 3:a018c44dc18b draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatp_score_and_annotate commit d80e60ce74aabe64e131d560085af099d52b81cf-dirty
author | galaxyp |
---|---|
date | Fri, 07 Sep 2018 16:53:05 -0400 |
parents | 2c7bcc1219fc |
children |
rev | line source |
---|---|
1
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
1 # -*- coding: utf-8 -*- |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
2 # |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
3 # Author: Ray W. Sajulga |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
4 # |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
5 # |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
6 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
7 import requests # pipenv requests |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
8 import json |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
9 import time |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
10 import urllib |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
11 import sys |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
12 import csv |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
13 import re |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
14 import math |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
15 import argparse |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
16 from xml.etree import ElementTree as ET |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
17 from zipfile import ZipFile |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
18 try: #Python 3 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
19 from urllib.request import urlopen |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
20 except ImportError: #Python 2 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
21 from urllib2 import urlopen |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
22 from io import BytesIO |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
23 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
24 # initializes blank parameters |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
25 chasm_classifier = '' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
26 probed_filename = None |
3
a018c44dc18b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatp_score_and_annotate commit d80e60ce74aabe64e131d560085af099d52b81cf-dirty
galaxyp
parents:
1
diff
changeset
|
27 all_intersect = False |
1
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
28 vcf_output = None |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
29 analysis_type = None |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
30 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
31 # # Testing Command |
3
a018c44dc18b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatp_score_and_annotate commit d80e60ce74aabe64e131d560085af099d52b81cf-dirty
galaxyp
parents:
1
diff
changeset
|
32 # python cravatp_submit.py test-data/Freebayes_two-variants.vcf GRCh38 test-data/variant.tsv test-data/gene.tsv test-data/noncoding.tsv test-data/error.tsv CHASM -—classifier Breast -—proBED test-data/MCF7_proBed.bed |
1
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
33 parser = argparse.ArgumentParser() |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
34 parser.add_argument('cravatInput',help='The filename of the input ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
35 'CRAVAT-formatted tabular file ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
36 '(e.g., VCF)') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
37 parser.add_argument('GRCh', help='The name of the human reference ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
38 'genome used for annotation: ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
39 'GRCh38/hg38 or GRCh37/hg19') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
40 parser.add_argument('variant', help='The filename of the output ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
41 'variant file') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
42 parser.add_argument('gene', help='The filename of the output gene ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
43 'variant report') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
44 parser.add_argument('noncoding', help='The filename of the output ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
45 'non-coding variant report') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
46 parser.add_argument('error', help='The filename of the output error ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
47 'file') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
48 parser.add_argument('analysis', help='The machine-learning algorithm ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
49 'used for CRAVAT annotation (VEST' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
50 ' and/or CHASM)') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
51 parser.add_argument('--classifier', help='The cancer classifier for the' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
52 ' CHASM algorithm') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
53 parser.add_argument('--proBED', help='The filename of the proBED file ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
54 'containing peptides with genomic ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
55 'coordinates') |
3
a018c44dc18b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatp_score_and_annotate commit d80e60ce74aabe64e131d560085af099d52b81cf-dirty
galaxyp
parents:
1
diff
changeset
|
56 parser.add_argument('--allIntersect', help='Specifies whether to ' |
a018c44dc18b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatp_score_and_annotate commit d80e60ce74aabe64e131d560085af099d52b81cf-dirty
galaxyp
parents:
1
diff
changeset
|
57 'analyze all variants') |
1
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
58 parser.add_argument('--vcfOutput', help='The output filename of the ' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
59 'intersected VCF file') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
60 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
61 # assigns parsed arguments to appropriate variables |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
62 args = parser.parse_args() |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
63 input_filename = args.cravatInput |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
64 GRCh_build = args.GRCh |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
65 output_filename = args.variant |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
66 file_3 = args.gene |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
67 file_4 = args.noncoding |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
68 file_5 = args.error |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
69 if args.analysis != 'None': |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
70 analysis_type = args.analysis |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
71 if args.classifier: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
72 chasm_classifier = args.classifier |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
73 if args.proBED: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
74 probed_filename = args.proBED |
3
a018c44dc18b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatp_score_and_annotate commit d80e60ce74aabe64e131d560085af099d52b81cf-dirty
galaxyp
parents:
1
diff
changeset
|
75 if args.allIntersect: |
a018c44dc18b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatp_score_and_annotate commit d80e60ce74aabe64e131d560085af099d52b81cf-dirty
galaxyp
parents:
1
diff
changeset
|
76 all_intersect = args.allIntersect |
1
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
77 if args.vcfOutput: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
78 vcf_output = args.vcfOutput |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
79 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
80 if analysis_type and '+' in analysis_type: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
81 analysis_type = 'CHASM;VEST' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
82 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
83 # obtains the transcript's protein sequence using Ensembl API |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
84 def getSequence(transcript_id): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
85 server = 'http://rest.ensembl.org' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
86 ext = ('/sequence/id/' + transcript_id |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
87 + '?content-type=text/x-seqxml%2Bxml;' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
88 'multiple_sequences=1;type=protein') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
89 req = requests.get(server+ext, |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
90 headers={ "Content-Type" : "text/plain"}) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
91 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
92 if not req.ok: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
93 return None |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
94 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
95 root = ET.fromstring(req.content) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
96 for child in root.iter('AAseq'): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
97 return child.text |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
98 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
99 # parses the proBED file as a list. |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
100 def loadProBED(): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
101 proBED = [] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
102 with open(probed_filename) as tsvin: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
103 tsvreader = csv.reader(tsvin, delimiter='\t') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
104 for i, row in enumerate(tsvreader): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
105 proBED.append(row) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
106 return proBED |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
107 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
108 write_header = True |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
109 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
110 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
111 # Creates an VCF file that only contains variants that overlap with the |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
112 # proteogenomic input (proBED) file if the user specifies that they want |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
113 # to only include intersected variants or if they want to receive the |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
114 # intersected VCF as well. |
3
a018c44dc18b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatp_score_and_annotate commit d80e60ce74aabe64e131d560085af099d52b81cf-dirty
galaxyp
parents:
1
diff
changeset
|
115 if probed_filename and (vcf_output or all_intersect == 'false'): |
1
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
116 proBED = loadProBED() |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
117 if not vcf_output: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
118 vcf_output = 'intersected_input.vcf' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
119 with open(input_filename) as tsvin, open(vcf_output, 'wb') as tsvout: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
120 tsvreader = csv.reader(tsvin, delimiter='\t') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
121 tsvout = csv.writer(tsvout, delimiter='\t', escapechar=' ', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
122 quoting=csv.QUOTE_NONE) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
123 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
124 for row in tsvreader: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
125 if row == [] or row[0][0] == '#': |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
126 tsvout.writerow(row) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
127 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
128 genchrom = row[0] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
129 genpos = int(row[1]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
130 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
131 for peptide in proBED: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
132 pepchrom = peptide[0] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
133 pepposA = int(peptide[1]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
134 pepposB = int(peptide[2]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
135 if (genchrom == pepchrom and |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
136 pepposA <= genpos and |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
137 genpos <= pepposB): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
138 tsvout.writerow(row) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
139 break |
3
a018c44dc18b
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatp_score_and_annotate commit d80e60ce74aabe64e131d560085af099d52b81cf-dirty
galaxyp
parents:
1
diff
changeset
|
140 if all_intersect == 'false': |
1
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
141 input_filename = vcf_output |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
142 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
143 # sets up the parameters for submission to the CRAVAT API |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
144 parameters = {'email':'rsajulga@umn.edu', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
145 'hg19': 'on' if GRCh_build == 'GRCh37' else 'off', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
146 'functionalannotation': 'on', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
147 'tsvreport' : 'on', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
148 'mupitinput' : 'on'} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
149 if analysis_type: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
150 parameters['analyses'] = analysis_type |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
151 if chasm_classifier: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
152 parameters['chasmclassifier'] = chasm_classifier |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
153 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
154 # plugs in params to given URL |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
155 submit = requests.post('http://www.cravat.us/CRAVAT/rest/service/submit', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
156 files = {'inputfile':open(input_filename)}, |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
157 data = parameters) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
158 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
159 # makes the data a json dictionary; takes out only the job ID |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
160 jobid = json.loads(submit.text)['jobid'] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
161 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
162 # loops until we find a status equal to Success, then breaks |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
163 while True: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
164 check = requests.get( |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
165 'http://www.cravat.us/CRAVAT/rest/service/status', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
166 params = {'jobid' : jobid}) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
167 status = json.loads(check.text)['status'] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
168 resultfileurl = json.loads(check.text)['resultfileurl'] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
169 #out_file.write(str(status) + ', ') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
170 if status == 'Success': |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
171 #out_file.write('\t' + resultfileurl) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
172 break |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
173 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
174 time.sleep(2) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
175 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
176 # obtains the zipfile created by CRAVAT and loads the variants and VAD |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
177 # file for processing |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
178 r = requests.get(resultfileurl, stream=True) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
179 url = urlopen(resultfileurl) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
180 zipfile = ZipFile(BytesIO(r.content)) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
181 variants = zipfile.open(jobid + '/Variant.Result.tsv').readlines() |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
182 vad = zipfile.open(jobid + '/Variant_Additional_Details.Result.tsv').readlines() |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
183 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
184 # reads and writes the gene, noncoding, and error files |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
185 open(file_3, 'wb').write(zipfile.read(jobid + '/Gene_Level_Analysis.Result.tsv')) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
186 open(file_4, 'wb').write(zipfile.read(jobid + '/Variant_Non-coding.Result.tsv')) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
187 open(file_5, 'wb').write(zipfile.read(jobid + '/Input_Errors.Result.tsv')) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
188 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
189 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
190 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
191 if probed_filename and not vcf_output: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
192 proBED = loadProBED() |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
193 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
194 if probed_filename: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
195 with open(output_filename, 'w') as tsvout: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
196 tsvout = csv.writer(tsvout, |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
197 delimiter='\t', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
198 escapechar=' ', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
199 quoting=csv.QUOTE_NONE) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
200 n = 11 #Index for proteogenomic column start |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
201 reg_seq_change = re.compile('([A-Z]+)(\d+)([A-Z]+)') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
202 SOtranscripts = re.compile('([A-Z]+[\d\.]+):([A-Z]+\d+[A-Z]+)') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
203 pep_muts = {} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
204 pep_map = {} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
205 rows = [] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
206 for row in vad: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
207 row = row.decode().split('\t') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
208 row[-1] = row[-1].replace('\n','') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
209 if row and row[0] and not row[0].startswith('#'): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
210 # checks if the row begins with input line |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
211 if row[0].startswith('Input line'): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
212 vad_headers = row |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
213 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
214 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
215 # Initially screens through the output Variant |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
216 # Additional Details to catch mutations on |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
217 # same peptide region |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
218 genchrom = row[vad_headers.index('Chromosome')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
219 genpos = int(row[vad_headers.index('Position')]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
220 aa_change = row[vad_headers.index('Protein sequence change')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
221 input_line = row[vad_headers.index('Input line')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
222 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
223 for peptide in proBED: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
224 pepseq = peptide[3] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
225 pepchrom = peptide[0] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
226 pepposA = int(peptide[1]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
227 pepposB = int(peptide[2]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
228 if genchrom == pepchrom and pepposA <= genpos and genpos <= pepposB: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
229 strand = row[vad_headers.index('Strand')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
230 transcript_strand = row[vad_headers.index('S.O. transcript strand')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
231 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
232 # Calculates the position of the variant |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
233 # amino acid(s) on peptide |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
234 if transcript_strand == strand: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
235 aa_peppos = int(math.ceil((genpos - pepposA)/3.0) - 1) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
236 if (strand == '-' or transcript_strand == '-' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
237 or aa_peppos >= len(pepseq)): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
238 aa_peppos = int(math.floor((pepposB - genpos)/3.0)) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
239 if pepseq in pep_muts: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
240 if aa_change not in pep_muts[pepseq]: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
241 pep_muts[pepseq][aa_change] = [aa_peppos] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
242 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
243 if aa_peppos not in pep_muts[pepseq][aa_change]: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
244 pep_muts[pepseq][aa_change].append(aa_peppos) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
245 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
246 pep_muts[pepseq] = {aa_change : [aa_peppos]} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
247 # Stores the intersect information by mapping |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
248 # Input Line (CRAVAT output) to peptide sequence. |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
249 if input_line in pep_map: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
250 if pepseq not in pep_map[input_line]: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
251 pep_map[input_line].append(pepseq) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
252 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
253 pep_map[input_line] = [pepseq] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
254 # TODO: Need to obtain strand information as |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
255 # well i.e., positive (+) or negative (-) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
256 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
257 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
258 with open(output_filename, 'w') as tsvout: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
259 tsvout = csv.writer(tsvout, delimiter='\t', escapechar='', |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
260 quoting=csv.QUOTE_NONE) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
261 headers = [] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
262 duplicate_indices = [] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
263 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
264 # loops through each row in the Variant Additional Details (VAD) file |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
265 for x, row in enumerate(variants): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
266 row = row.decode().split('\t') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
267 row[-1] = row[-1].replace('\n','') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
268 # sets row_2 equal to the same row in Variant Result (VR) file |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
269 row_2 = vad[x].decode().split('\t') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
270 row_2[-1] = row_2[-1].replace('\n','') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
271 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
272 # checks if row is empty or if the first term contains '#' |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
273 if not row or not row[0] or row[0].startswith('#'): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
274 if row[0]: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
275 tsvout.writerow(row) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
276 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
277 if row[0].startswith('Input line'): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
278 # goes through each value in the headers list in VAD |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
279 headers = row |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
280 # loops through the Keys in VR |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
281 for i,value in enumerate(row_2): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
282 #Checks if the value is already in headers |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
283 if value in headers: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
284 duplicate_indices.append(i) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
285 continue |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
286 #else adds the header to headers |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
287 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
288 headers.append(value) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
289 # adds appropriate headers when proteomic input is supplied |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
290 if probed_filename: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
291 headers.insert(n, 'Variant peptide') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
292 headers.insert(n, 'Reference peptide') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
293 tsvout.writerow(headers) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
294 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
295 cells = [] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
296 # goes through each value in the next list |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
297 for value in row: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
298 #adds it to cells |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
299 cells.append(value) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
300 # goes through each value from the VR file after position |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
301 # 11 (After it is done repeating from VAD file) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
302 for i,value in enumerate(row_2): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
303 # adds in the rest of the values to cells |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
304 if i not in duplicate_indices: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
305 # Skips the initial 11 columns and the |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
306 # VEST p-value (already in VR file) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
307 cells.append(value) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
308 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
309 # Verifies the peptides intersected previously through |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
310 # sequences obtained from Ensembl's API |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
311 if probed_filename: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
312 cells.insert(n,'') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
313 cells.insert(n,'') |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
314 input_line = cells[headers.index('Input line')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
315 if input_line in pep_map: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
316 pepseq = pep_map[input_line][0] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
317 aa_changes = pep_muts[pepseq] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
318 transcript_id = cells[headers.index('S.O. transcript')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
319 ref_fullseq = getSequence(transcript_id) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
320 # Checks the other S.O. transcripts if the primary |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
321 # S.O. transcript has no sequence available |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
322 if not ref_fullseq: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
323 transcripts = cells[headers.index('S.O. all transcripts')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
324 for transcript in transcripts.split(','): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
325 if transcript: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
326 mat = SOtranscripts.search(transcript) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
327 ref_fullseq = getSequence(mat.group(1)) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
328 if ref_fullseq: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
329 aa_changes = {mat.group(2): [aa_changes.values()[0][0]]} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
330 break |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
331 # Resubmits the previous transcripts without |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
332 # extensions if all S.O. transcripts fail to |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
333 # provide a sequence |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
334 if not ref_fullseq: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
335 transcripts = cells[headers.index('S.O. all transcripts')] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
336 for transcript in transcripts.split(','): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
337 if transcript: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
338 mat = SOtranscripts.search(transcript) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
339 ref_fullseq = getSequence(mat.group(1).split('.')[0]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
340 if ref_fullseq: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
341 aa_changes = {mat.group(2): [aa_changes.values()[0][0]]} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
342 break |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
343 if ref_fullseq: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
344 # Sorts the amino acid changes |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
345 positions = {} |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
346 for aa_change in aa_changes: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
347 m = reg_seq_change.search(aa_change) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
348 aa_protpos = int(m.group(2)) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
349 aa_peppos = aa_changes[aa_change][0] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
350 aa_startpos = aa_protpos - aa_peppos - 1 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
351 if aa_startpos in positions: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
352 positions[aa_startpos].append(aa_change) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
353 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
354 positions[aa_startpos] = [aa_change] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
355 # Goes through the sorted categories to mutate the Ensembl peptide |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
356 # (uses proBED peptide as a reference) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
357 for pep_protpos in positions: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
358 ref_seq = ref_fullseq[pep_protpos:pep_protpos+len(pepseq)] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
359 muts = positions[pep_protpos] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
360 options = [] |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
361 mut_seq = ref_seq |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
362 for mut in muts: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
363 m = reg_seq_change.search(mut) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
364 ref_aa = m.group(1) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
365 mut_pos = int(m.group(2)) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
366 alt_aa = m.group(3) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
367 pep_mutpos = mut_pos - pep_protpos - 1 |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
368 if (ref_seq[pep_mutpos] == ref_aa |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
369 and (pepseq[pep_mutpos] == alt_aa |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
370 or pepseq[pep_mutpos] == ref_aa)): |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
371 if pepseq[pep_mutpos] == ref_aa: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
372 mut_seq = (mut_seq[:pep_mutpos] + ref_aa |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
373 + mut_seq[pep_mutpos+1:]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
374 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
375 mut_seq = (mut_seq[:pep_mutpos] + alt_aa |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
376 + mut_seq[pep_mutpos+1:]) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
377 else: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
378 break |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
379 # Adds the mutated peptide and reference peptide if mutated correctly |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
380 if pepseq == mut_seq: |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
381 cells[n+1] = pepseq |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
382 cells[n] = ref_seq |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
383 tsvout.writerow(cells) |
2c7bcc1219fc
Updated cravatool to version 1.0 with updated formatting and new CRAVAT target URL.
galaxyp
parents:
diff
changeset
|
384 |