# HG changeset patch # User galaxyp # Date 1526664329 14400 # Node ID 83181dabeb905166e792003e78c3a7ac37f19b19 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cravatool commit 4f73619e5f750916a9971e433ddd6b8dee0d7dd3 diff -r 000000000000 -r 83181dabeb90 cravat_submit.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cravat_submit.py Fri May 18 13:25:29 2018 -0400 @@ -0,0 +1,287 @@ +import requests +import json +import time +import urllib +import sys +import csv +import re +import math +from difflib import SequenceMatcher +from xml.etree import ElementTree as ET +import sqlite3 + +try: + input_filename = sys.argv[1] + input_select_bar = sys.argv[2] + GRCh_build = sys.argv[3] + probed_filename = sys.argv[4] + output_filename = sys.argv[5] + file_3 = sys.argv[6] + file_4 = sys.argv[7] + file_5 = sys.argv[8] +except: + # Filenames for testing. + input_filename = 'test-data/[VCF-BEDintersect__on_data_65_and_data_6].vcf' + probed_filename = 'test-data/[PepPointer].bed' + input_select_bar = 'VEST' + GRCh_build = 'GRCh38' + output_filename = 'combined_variants.tsv' + file_3 = 'test-results/Gene_Level_Analysis.tsv' + file_4 = 'test-results/Variant_Non-coding.Result.tsv' + file_5 = 'test-results/Input_Errors.Result.tsv' + matches_filename = 'matches.tsv' + +def getSequence(transcript_id): + server = 'http://rest.ensembl.org' + ext = '/sequence/id/' + transcript_id + '?content-type=text/x-seqxml%2Bxml;multiple_sequences=1;type=protein' + req = requests.get(server+ext, headers={ "Content-Type" : "text/plain"}) + + if not req.ok: + return None + + root = ET.fromstring(req.content) + for child in root.iter('AAseq'): + return child.text + + +write_header = True + +GRCh37hg19 = 'off' +if GRCh_build == 'GRCh37': + GRCh37hg19 = 'on' + +#plugs in params to given URL +submit = requests.post('http://staging.cravat.us/CRAVAT/rest/service/submit', files={'inputfile':open(input_filename)}, data={'email':'znylund@insilico.us.com', 'analyses': input_select_bar, 'hg19': GRCh37hg19}) + +#Makes the data a json dictionary, takes out only the job ID +jobid = json.loads(submit.text)['jobid'] + +#out_file.write(jobid) +submitted = json.loads(submit.text)['status'] +#out_file.write('\t' + submitted) + +input_file = open(input_filename) + +# Loads the proBED file as a list. +if (probed_filename != 'None'): + proBED = [] + with open(probed_filename) as tsvin: + tsvreader = csv.reader(tsvin, delimiter='\t') + for i, row in enumerate(tsvreader): + proBED.append(row) + +#loops until we find a status equal to Success, then breaks +while True: + check = requests.get('http://staging.cravat.us/CRAVAT/rest/service/status', params={'jobid': jobid}) + status = json.loads(check.text)['status'] + resultfileurl = json.loads(check.text)['resultfileurl'] + #out_file.write(str(status) + ', ') + if status == 'Success': + #out_file.write('\t' + resultfileurl) + break + else: + time.sleep(2) + +#out_file.write('\n') + +#creates three files +file_1 = 'Variant_Result.tsv' +file_2 = 'Additional_Details.tsv' +#file_3 = time.strftime("%H:%M") + 'Combined_Variant_Results.tsv' + +#Downloads the tabular results +urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant.Result.tsv", file_1) +urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Additional_Details.Result.tsv", file_2) +urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Gene_Level_Analysis.Result.tsv", file_3) +urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Variant_Non-coding.Result.tsv", file_4) +urllib.urlretrieve("http://staging.cravat.us/CRAVAT/results/" + jobid + "/" + "Input_Errors.Result.tsv", file_5) + +#opens the Variant Result file and the Variant Additional Details file as csv readers, then opens the output file (galaxy) as a writer +with open(file_1) as tsvin_1, open(file_2) as tsvin_2, open(output_filename, 'wb') as tsvout: + tsvreader_2 = csv.reader(tsvin_2, delimiter='\t') + tsvout = csv.writer(tsvout, delimiter='\t') + + headers = [] + duplicate_indices = [] + n = 12 #Index for proteogenomic column start + reg_seq_change = re.compile('([A-Z]+)(\d+)([A-Z]+)') + SOtranscripts = re.compile('([A-Z]+[\d\.]+):([A-Z]+\d+[A-Z]+)') + pep_muts = {} + pep_map = {} + rows = [] + + for row in tsvreader_2: + if row != [] and row[0][0] != '#': + #checks if the row begins with input line + if row[0] == 'Input line': + vad_headers = row + else: + # Initially screens through the output Variant Additional Details to catch mutations on same peptide region + genchrom = row[vad_headers.index('Chromosome')] + genpos = int(row[vad_headers.index('Position')]) + aa_change = row[vad_headers.index('Protein sequence change')] + input_line = row[vad_headers.index('Input line')] + + for peptide in proBED: + pepseq = peptide[3] + pepchrom = peptide[0] + pepposA = int(peptide[1]) + pepposB = int(peptide[2]) + if genchrom == pepchrom and pepposA <= genpos and genpos <= pepposB: + strand = row[vad_headers.index('Strand')] + transcript_strand = row[vad_headers.index('S.O. transcript strand')] + + # Calculates the position of the variant amino acid(s) on peptide + if transcript_strand == strand: + aa_peppos = int(math.ceil((genpos - pepposA)/3.0) - 1) + if strand == '-' or transcript_strand == '-' or aa_peppos >= len(pepseq): + aa_peppos = int(math.floor((pepposB - genpos)/3.0)) + if pepseq in pep_muts: + if aa_change not in pep_muts[pepseq]: + pep_muts[pepseq][aa_change] = [aa_peppos] + else: + if aa_peppos not in pep_muts[pepseq][aa_change]: + pep_muts[pepseq][aa_change].append(aa_peppos) + else: + pep_muts[pepseq] = {aa_change : [aa_peppos]} + # Stores the intersect information by mapping Input Line (CRAVAT output) to peptide sequence. + if input_line in pep_map: + if pepseq not in pep_map[input_line]: + pep_map[input_line].append(pepseq) + else: + pep_map[input_line] = [pepseq] + +with open(file_1) as tsvin_1, open(file_2) as tsvin_2, open(output_filename, 'wb') as tsvout: + tsvreader_1 = csv.reader(tsvin_1, delimiter='\t') + tsvreader_2 = csv.reader(tsvin_2, delimiter='\t') + tsvout = csv.writer(tsvout, delimiter='\t') + + headers = [] + + #loops through each row in the Variant Additional Details (VAD) file + for row in tsvreader_2: + + #sets row_2 equal to the same row in Variant Result (VR) file + row_2 = tsvreader_1.next() + #checks if row is empty or if the first term contains '#' + if row == [] or row[0][0] == '#': + tsvout.writerow(row) + else: + if row[0] == 'Input line': + #Goes through each value in the headers list in VAD + for value in row: + #Adds each value into headers + headers.append(value) + #Loops through the Keys in VR + for i,value in enumerate(row_2): + #Checks if the value is already in headers + if value in headers: + duplicate_indices.append(i) + continue + #else adds the header to headers + else: + headers.append(value) + #Adds appropriate headers when proteomic input is supplied + if (probed_filename != 'None'): + headers.insert(n, 'Variant peptide') + headers.insert(n, 'Reference peptide') + tsvout.writerow(headers) + else: + cells = [] + #Goes through each value in the next list + for value in row: + #adds it to cells + cells.append(value) + #Goes through each value from the VR file after position 11 (After it is done repeating from VAD file) + for i,value in enumerate(row_2): + #adds in the rest of the values to cells + if i not in duplicate_indices: + # Skips the initial 11 columns and the VEST p-value (already in VR file) + cells.append(value) + + # Verifies the peptides intersected previously through sequences obtained from Ensembl's API + if (probed_filename != 'None'): + cells.insert(n,'') + cells.insert(n,'') + input_line = cells[headers.index('Input line')] + if input_line in pep_map: + pepseq = pep_map[input_line][0] + aa_changes = pep_muts[pepseq] + transcript_id = cells[headers.index('S.O. transcript')] + ref_fullseq = getSequence(transcript_id) + # Checks the other S.O. transcripts if the primary S.O. transcript has no sequence available + if not ref_fullseq: + transcripts = cells[headers.index('S.O. all transcripts')] + for transcript in transcripts.split(','): + if transcript: + mat = SOtranscripts.search(transcript) + ref_fullseq = getSequence(mat.group(1)) + if ref_fullseq: + aa_changes = {mat.group(2): [aa_changes.values()[0][0]]} + break + # Resubmits the previous transcripts without extensions if all S.O. transcripts fail to provide a sequence + if not ref_fullseq: + transcripts = cells[headers.index('S.O. all transcripts')] + for transcript in transcripts.split(','): + if transcript: + mat = SOtranscripts.search(transcript) + ref_fullseq = getSequence(mat.group(1).split('.')[0]) + if ref_fullseq: + aa_changes = {mat.group(2): [aa_changes.values()[0][0]]} + break + if ref_fullseq: + # Sorts the amino acid changes + positions = {} + for aa_change in aa_changes: + m = reg_seq_change.search(aa_change) + aa_protpos = int(m.group(2)) + aa_peppos = aa_changes[aa_change][0] + aa_startpos = aa_protpos - aa_peppos - 1 + if aa_startpos in positions: + positions[aa_startpos].append(aa_change) + else: + positions[aa_startpos] = [aa_change] + # Goes through the sorted categories to mutate the Ensembl peptide (uses proBED peptide as a reference) + for pep_protpos in positions: + ref_seq = ref_fullseq[pep_protpos:pep_protpos+len(pepseq)] + muts = positions[pep_protpos] + options = [] + mut_seq = ref_seq + for mut in muts: + m = reg_seq_change.search(mut) + ref_aa = m.group(1) + mut_pos = int(m.group(2)) + alt_aa = m.group(3) + pep_mutpos = mut_pos - pep_protpos - 1 + if ref_seq[pep_mutpos] == ref_aa and (pepseq[pep_mutpos] == alt_aa or pepseq[pep_mutpos] == ref_aa): + if pepseq[pep_mutpos] == ref_aa: + mut_seq = mut_seq[:pep_mutpos] + ref_aa + mut_seq[pep_mutpos+1:] + else: + mut_seq = mut_seq[:pep_mutpos] + alt_aa + mut_seq[pep_mutpos+1:] + else: + break + # Adds the mutated peptide and reference peptide if mutated correctly + if pepseq == mut_seq: + cells[n+1] = pepseq + cells[n] = ref_seq + #print cells + tsvout.writerow(cells) + + + + + + +#a = 'col1\tcol2\tcol3' +#header_list = a.split('\t') + +#loop through the two results, when you first hit header you print out the headers in tabular form +#Print out each header only once +#Combine both headers into one output file +#loop through the rest of the data and assign each value to its assigned header +#combine this all into one output file + + + + + diff -r 000000000000 -r 83181dabeb90 cravat_submit.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cravat_submit.xml Fri May 18 13:25:29 2018 -0400 @@ -0,0 +1,44 @@ + + Submits, checks for, and retrieves data for cancer annotation + cravat_submit.py $input $dropdown $GRCh $psm $Variant $Gene $Noncoding $Error + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This tool submits, checks for, and retrieves data for cancer annotation from the CRAVAT platform at cravat.us. + + + diff -r 000000000000 -r 83181dabeb90 test-data/[PepPointer].bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/[PepPointer].bed Fri May 18 13:25:29 2018 -0400 @@ -0,0 +1,20 @@ +chr14 94079127 94079178 ADVSAWKDLFVPGPVLR 255 - CDS +chr14 94079127 94079178 ADVSAWKDLFVPGPVLR 255 - CDS +chr14 102011973 102012027 ALESLEGVEGVAHIIDPK 255 + CDS +chr19 18856027 18856078 EAIDSPVSFLVLHNQIR 255 + CDS +chr12 110339607 110339637 EWGSGSDILR 255 + CDS +chr12 110339607 110339637 EWGSGSDILR 255 + CDS +chr14 102083930 102083972 GVVDSENLPLNISR 255 - CDS +chr14 102083930 102083972 GVVDSENLPLNISR 255 - CDS +chr19 17205300 17206022 IQSHCSYTYGRMGEPGAEPGHFGVCVDSLTSDK 255 + SpliceJunction +chr1 156705410 156705446 MPNFSGNWEIIR 255 - CDS +chr1 156705410 156705446 MPNFSGNWEIIR 255 - CDS +chr2 231457346 231457474 NSTWSDDSR 255 - SpliceJunction +chr17 82082586 82082643 QGVQVQVSTSNINSLEGAR 255 - CDS +chr17 82082586 82082643 QGVQVQVSTSNINSLEGAR 255 - CDS +chr12 6561014 6561056 STGVILANDANAER 255 - CDS +chr12 6561014 6561056 STGVILANDANAER 255 - CDS +chr12 6561014 6561056 STGVILANDANAER 255 - CDS +chr12 6561014 6561056 STGVILANDANAER 255 - CDS +chr2 231457113 231457124 TLQHVLGESK 255 - SpliceJunction +chr17 2711607 2711658 VIKTDELPAAAPADSAR 255 - CDS diff -r 000000000000 -r 83181dabeb90 test-data/[VCF-BEDintersect__on_data_65_and_data_6].vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/[VCF-BEDintersect__on_data_65_and_data_6].vcf Fri May 18 13:25:29 2018 -0400 @@ -0,0 +1,526 @@ +##fileformat=VCFv4.2 +##fileDate=20180504 +##source=freeBayes v1.1.0-46-g8d2b3a0-dirty +##reference=/panfs/roc/rissdb/galaxy/genomes/hg38/seq/hg38.fa +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##phasing=none +##commandline="freebayes --region chrY_KI270740v1_random:0..37240 --bam b_0.bam --fasta-reference /panfs/roc/rissdb/galaxy/genomes/hg38/seq/hg38.fa --vcf ./vcf_output/part_chrY_KI270740v1_random:0..37240.vcf" +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT unknown +chr1 156705422 . T C 125181 . AB=0.318515;ABP=5157.71;AC=1;AF=0.5;AN=2;AO=5739;CIGAR=1X;DP=18018;DPB=18018;DPRA=0;EPP=283.505;EPPR=1125.27;GTI=0;LEN=1;MEANALT=3;MQM=59.7327;MQMR=59.6874;NS=1;NUMALT=1;ODDS=28824;PAIRED=0.984666;PAIREDR=0.981823;PAO=0;PQA=0;PQR=0;PRO=0;QA=208036;QR=445024;RO=12268;RPL=3614;RPP=841.905;RPPR=1072.42;RPR=2125;RUN=1;SAF=3390;SAP=413.044;SAR=2349;SRF=6908;SRP=427.163;SRR=5360;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:18018:12268,5739:12268:445024:5739:208036:-13208.4,0,-34397.1 +chr12 6561055 . T C 14340.8 . AB=0.235264;ABP=2357.67;AC=1;AF=0.5;AN=2;AO=910;CIGAR=1X;DP=3868;DPB=3868;DPRA=0;EPP=7.21962;EPPR=59.3944;GTI=0;LEN=1;MEANALT=2;MQM=59.9352;MQMR=59.6406;NS=1;NUMALT=1;ODDS=3302.08;PAIRED=0.983516;PAIREDR=0.990186;PAO=0;PQA=0;PQR=0;PRO=0;QA=33224;QR=106496;RO=2955;RPL=107;RPP=1158.94;RPPR=4030.18;RPR=803;RUN=1;SAF=481;SAP=9.46268;SAR=429;SRF=1663;SRP=104.155;SRR=1292;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:3868:2955,910:2955:106496:910:33224:-1821.72,0,-8360.65 +chr12 110339630 . C T 48828.1 . AB=0.268901;ABP=4224.36;AC=1;AF=0.5;AN=2;AO=2447;CIGAR=1X;DP=9100;DPB=9100;DPRA=0;EPP=710.668;EPPR=1883.3;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=11243.1;PAIRED=0.985288;PAIREDR=0.980156;PAO=0;PQA=0;PQR=0;PRO=0;QA=90595;QR=244569;RO=6652;RPL=1094;RPP=62.5381;RPPR=155.737;RPR=1353;RUN=1;SAF=1227;SAP=3.05378;SAR=1220;SRF=3354;SRP=4.03401;SRR=3298;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:9100:6652,2447:6652:244569:2447:90595:-5409.03,0,-19257.6 +chr14 94079142 . T C 0 . AB=0;ABP=0;AC=0;AF=0;AN=2;AO=672;CIGAR=1X;DP=3227;DPB=3227;DPRA=0;EPP=117.219;EPPR=488.229;GTI=0;LEN=1;MEANALT=3;MQM=13.9062;MQMR=8.44728;NS=1;NUMALT=1;ODDS=1413.33;PAIRED=0.995536;PAIREDR=0.994512;PAO=0;PQA=0;PQR=0;PRO=0;QA=24358;QR=92741;RO=2551;RPL=349;RPP=5.1947;RPPR=253.993;RPR=323;RUN=1;SAF=301;SAP=18.844;SAR=371;SRF=1157;SRP=50.8227;SRR=1394;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/0:3227:2551,672:2551:92741:672:24358:0,-455.681,-680.616 +chr14 102011985 . A T 12962.3 . AB=0.301267;ABP=734.044;AC=1;AF=0.5;AN=2;AO=642;CIGAR=1X;DP=2131;DPB=2131;DPRA=0;EPP=6.47383;EPPR=23.6897;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=2984.69;PAIRED=0.995327;PAIREDR=0.98117;PAO=0;PQA=0;PQR=0;PRO=0;QA=22490;QR=51005;RO=1487;RPL=362;RPP=25.7533;RPPR=15.6405;RPR=280;RUN=1;SAF=307;SAP=5.66207;SAR=335;SRF=729;SRP=4.23842;SRR=758;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:2131:1487,642:1487:51005:642:22490:-1382.22,0,-3947.19 +chr14 102083954 . C T 240809 . AB=0.203741;ABP=65012.9;AC=1;AF=0.5;AN=2;AO=17374;CIGAR=1X;DP=85275;DPB=85275;DPRA=0;EPP=2106.04;EPPR=12892.8;GTI=0;LEN=1;MEANALT=3;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=55448.2;PAIRED=0.982387;PAIREDR=0.980402;PAO=0;PQA=0;PQR=0;PRO=0;QA=637599;QR=2489236;RO=67865;RPL=5766;RPP=4268.59;RPPR=13677.6;RPR=11608;RUN=1;SAF=9064;SAP=74.0657;SAR=8310;SRF=35817;SRP=457.538;SRR=32048;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:85275:67865,17374:67865:2489236:17374:637599:-31686.8,0,-198225 +chr17 82082606 . C T 10374.8 . AB=0.202823;ABP=2937.89;AC=1;AF=0.5;AN=2;AO=776;CIGAR=1X;DP=3826;DPB=3826;DPRA=0;EPP=22.755;EPPR=126.853;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=59.9806;NS=1;NUMALT=1;ODDS=2388.89;PAIRED=0.981959;PAIREDR=0.985569;PAO=0;PQA=0;PQR=0;PRO=0;QA=27982;QR=110520;RO=3049;RPL=338;RPP=30.9932;RPPR=253.452;RPR=438;RUN=1;SAF=422;SAP=15.9496;SAR=354;SRF=1759;SRP=159.665;SRR=1290;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:3826:3049,776:3049:110520:776:27982:-1365.58,0,-8788 +chr19 17205335 . A T 0.00158993 . AB=0.285714;ABP=5.80219;AC=1;AF=0.5;AN=2;AO=2;CIGAR=1X;DP=7;DPB=7;DPRA=0;EPP=3.0103;EPPR=6.91895;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=48.2;NS=1;NUMALT=1;ODDS=7.91247;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=28;QR=169;RO=5;RPL=0;RPP=7.35324;RPPR=13.8677;RPR=2;RUN=1;SAF=1;SAP=3.0103;SAR=1;SRF=1;SRP=6.91895;SRR=4;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:7:5,2:5:169:2:28:-0.55277,0,-10.4001 +chr19 17205444 . T C 206.198 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=7;CIGAR=1X;DP=7;DPB=7;DPRA=0;EPP=5.80219;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=0;NS=1;NUMALT=1;ODDS=14.3092;PAIRED=0.857143;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=249;QR=0;RO=0;RPL=2;RPP=5.80219;RPPR=0;RPR=5;RUN=1;SAF=4;SAP=3.32051;SAR=3;SRF=0;SRP=0;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 1/1:7:0,7:0:0:7:249:-22.753,-2.10721,0 +chr19 17205973 . T C 12243.8 . AB=0;ABP=0;AC=2;AF=1;AN=2;AO=406;CIGAR=1X;DP=406;DPB=406;DPRA=0;EPP=14.3276;EPPR=0;GTI=0;LEN=1;MEANALT=1;MQM=58.4015;MQMR=0;NS=1;NUMALT=1;ODDS=567.441;PAIRED=0.985222;PAIREDR=0;PAO=0;PQA=0;PQR=0;PRO=0;QA=14833;QR=0;RO=0;RPL=368;RPP=585.457;RPPR=0;RPR=38;RUN=1;SAF=182;SAP=12.445;SAR=224;SRF=0;SRP=0;SRR=0;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 1/1:406:0,406:0:0:406:14833:-1297.85,-122.218,0 +chr19 18856059 . C T 10269.5 . AB=0.248844;ABP=1306.46;AC=1;AF=0.5;AN=2;AO=592;CIGAR=1X;DP=2379;DPB=2379;DPRA=0;EPP=30.139;EPPR=174.082;GTI=0;LEN=1;MEANALT=2;MQM=60;MQMR=59.9339;NS=1;NUMALT=1;ODDS=2364.65;PAIRED=0.991554;PAIREDR=0.983754;PAO=0;PQA=0;PQR=0;PRO=0;QA=21546;QR=64865;RO=1785;RPL=120;RPP=457.494;RPPR=1048.39;RPR=472;RUN=1;SAF=303;SAP=3.72923;SAR=289;SRF=873;SRP=4.86061;SRR=912;TYPE=snp GT:DP:AD:RO:QR:AO:QA:GL 0/1:2379:1785,592:1785:64865:592:21546:-1222.54,0,-5112.92 diff -r 000000000000 -r 83181dabeb90 test-results/Additional_Details.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-results/Additional_Details.tsv Fri May 18 13:25:29 2018 -0400 @@ -0,0 +1,21 @@ +#Variant Additional Details Report +#2018-05-18 15:15:25.120629 +#CRAVAT version: hybrid +#Analysis done at http://www.cravat.us. +#Job Id: znylund_20180518_111521 +#Input file: _VCF_BEDintersect_on_data_65_and_data_6_.vcf +#This report shows analysis results at variant level. +#hg38 genomic. +#N/A +#For more information on CRAVAT, visit http://www.cravat.us. + +Input line ID Chromosome Position Strand Reference base(s) Alternate base(s) Sample ID HUGO symbol Sequence ontology S.O. transcript S.O. transcript strand Protein sequence change S.O. all transcripts CGC driver class CGC inheritance CGC tumor types somatic CGC tumor types germline ClinVar disease identifier ClinVar XRef COSMIC transcript COSMIC protein change COSMIC variant count ESP6500 AF (European American) ESP6500 AF (African American) HGVS Genomic HGVS Protein HGVS Protein All NCI pathway hits NCI pathway IDs NCI pathway names VEST score transcript VEST p-value VEST score (missense) VEST score (frameshift indels) VEST score (inframe indels) VEST score (stop-gain) VEST score (stop-loss) VEST score (splice site) All transcripts VEST results +1 VAR516_unknown chr1 156705422 + T C unknown CRABP2 MS ENST00000368221.1 - K9E *ENST00000368221.1:K9E(MS),ENST00000621784.4:K9E(MS),ENST00000368222.7:K9E(MS) ENST00000368221 p.K9E (large_intestine 1) 1 0 0 NC_000001.10:g.156705422T>C ENST00000368221.1:p.Lys9Glu *ENST00000368221.1:p.Lys9Glu,ENST00000621784.4:p.Lys9Glu,ENST00000368222.7:p.Lys9Glu 0 ENST00000368221.1:K9E 0.53061 0.2 *ENST00000368221.1:K9E(0.2:0.53061),ENST00000368222.7:K9E(0.187:0.5543),ENST00000621784.4:K9E(0.186:0.55652) +2 VAR517_unknown chr12 6561055 + T C unknown NOP2 MS ENST00000616948.4 - N408S ENST00000617555.4:N404S(MS),ENST00000545200.5:N404S(MS),ENST00000541778.5:N404S(MS),ENST00000399466.6:N404S(MS),ENST00000322166.9:N408S(MS),ENST00000537442.5:N408S(MS),ENST00000382421.7:N441S(MS),ENST00000620535.4:N441S(MS),*ENST00000616948.4:N408S(MS) 0 0 NC_000012.10:g.6561055T>C ENST00000616948.4:p.Asn408Ser ENST00000617555.4:p.Asn404Ser,ENST00000545200.5:p.Asn404Ser,ENST00000541778.5:p.Asn404Ser,ENST00000399466.6:p.Asn404Ser,ENST00000322166.9:p.Asn408Ser,ENST00000537442.5:p.Asn408Ser,ENST00000382421.7:p.Asn441Ser,ENST00000620535.4:p.Asn441Ser,*ENST00000616948.4:p.Asn408Ser 0 ENST00000616948.4:N408S 0.00324 0.958 ENST00000617555.4:N404S(0.954:0.00354),*ENST00000616948.4:N408S(0.958:0.00324),ENST00000541778.5:N404S(0.869:0.01488),ENST00000537442.5:N408S(0.956:0.00344),ENST00000399466.6:N404S(0.951:0.00374),ENST00000545200.5:N404S(0.953:0.00354),ENST00000620535.4:N441S(0.938:0.00536),ENST00000382421.7:N441S(0.938:0.00536),ENST00000322166.9:N408S(0.954:0.00354) +3 VAR518_unknown chr12 110339630 + C T unknown ATP2A2 MS ENST00000539276.6 + T557I ENST00000308664.10:T557I(MS),*ENST00000539276.6:T557I(MS) 0 0 NC_000012.10:g.110339630C>T ENST00000539276.6:p.Thr557Ile ENST00000308664.10:p.Thr557Ile,*ENST00000539276.6:p.Thr557Ile 0 ENST00000539276.6:T557I 0.00374 0.951 ENST00000308664.10:T557I(0.822:0.02459),*ENST00000539276.6:T557I(0.951:0.00374) +4 VAR520_unknown chr14 102011985 + A T unknown DYNC1H1 MS ENST00000360184.8 + R2243S *ENST00000360184.8:R2243S(MS) ENST00000360184 p.R2243S (large_intestine 1) 1 0 0 NC_000014.10:g.102011985A>T ENST00000360184.8:p.Arg2243Ser *ENST00000360184.8:p.Arg2243Ser 1 94da5dd8-5521-11e7-8f50-0ac135e8bacf Lissencephaly gene (LIS1) in neuronal migration and development ENST00000360184.8:R2243S 0.02307 0.829 *ENST00000360184.8:R2243S(0.829:0.02307) +5 VAR521_unknown chr14 102083954 + C T unknown HSP90AA1 MS ENST00000334701.11 - D515N ENST00000216281.12:D393N(MS),*ENST00000334701.11:D515N(MS) somatic NHL ENST00000334701 p.D515N (large_intestine 1) 1 0 0 NC_000014.10:g.102083954C>T ENST00000334701.11:p.Asp515Asn ENST00000216281.12:p.Asp393Asn,*ENST00000334701.11:p.Asp515Asn 15 3814fa62-5521-11e7-8f50-0ac135e8bacf,32ff1916-5521-11e7-8f50-0ac135e8bacf,a8411a5c-5521-11e7-8f50-0ac135e8bacf,541d7e20-5521-11e7-8f50-0ac135e8bacf,98ad85f0-5521-11e7-8f50-0ac135e8bacf,9697501e-5521-11e7-8f50-0ac135e8bacf,e6f69242-5521-11e7-8f50-0ac135e8bacf,603902ca-5521-11e7-8f50-0ac135e8bacf,bb3d7c4a-5521-11e7-8f50-0ac135e8bacf,b1ac7318-5521-11e7-8f50-0ac135e8bacf,cb348d72-5521-11e7-8f50-0ac135e8bacf,945aa686-5521-11e7-8f50-0ac135e8bacf,4c90f780-5521-11e7-8f50-0ac135e8bacf,e4e93610-5521-11e7-8f50-0ac135e8bacf,6f7a316e-5521-11e7-8f50-0ac135e8bacf Validated targets of C-MYC transcriptional activation@VEGFR1 specific signals@IL2 signaling events mediated by PI3K@Signaling events mediated by HDAC Class II@Integrin-linked kinase signaling@Integrins in angiogenesis@Class I PI3K signaling events mediated by Akt@Regulation of Telomerase@Glucocorticoid receptor regulatory network@Hypoxic and oxygen homeostasis regulation of HIF-1-alpha@ErbB receptor signaling network@LKB1 signaling events@Signaling events mediated by VEGFR1 and VEGFR2@Class I PI3K signaling events@Regulation of Androgen receptor activity ENST00000216281.12:D393N 0.02014 0.84 ENST00000334701.11:D515N(0.749:0.04989),*ENST00000216281.12:D393N(0.84:0.02014) +6 VAR522_unknown chr17 82082606 + C T unknown FASN MS ENST00000306749.3 - S1947N ENST00000634990.1:S1945N(MS),*ENST00000306749.3:S1947N(MS) ENST00000306749 p.S1947N (large_intestine 1) 1 0 0 NC_000017.10:g.82082606C>T ENST00000306749.3:p.Ser1947Asn ENST00000634990.1:p.Ser1945Asn,*ENST00000306749.3:p.Ser1947Asn 2 34a994cc-5521-11e7-8f50-0ac135e8bacf,812903c2-5521-11e7-8f50-0ac135e8bacf Validated transcriptional targets of deltaNp63 isoforms@p73 transcription factor network ENST00000634990.1:S1945N 0.18611 0.501 ENST00000306749.3:S1947N(0.493:0.19016),*ENST00000634990.1:S1945N(0.501:0.18611) +7 VAR523_unknown chr19 17205335 + A T unknown MYO9B MS ENST00000594824.5 + K1688M ENST00000397274.6:K1688M(MS),ENST00000595618.5:K1688M(MS),*ENST00000594824.5:K1688M(MS) 0 0 NC_000019.10:g.17205335A>T ENST00000594824.5:p.Lys1688Met ENST00000397274.6:p.Lys1688Met,ENST00000595618.5:p.Lys1688Met,*ENST00000594824.5:p.Lys1688Met 1 60f3521c-5521-11e7-8f50-0ac135e8bacf Regulation of RhoA activity ENST00000397274.6:K1688M 0.20028 0.473 ENST00000594824.5:K1688M(0.464:0.20464),*ENST00000397274.6:K1688M(0.473:0.20028),ENST00000595618.5:K1688M(0.469:0.20231) +9 VAR525_unknown chr19 17205973 + T C unknown MYO9B MS ENST00000594824.5 + V1693A ENST00000397274.6:V1693A(MS),ENST00000595618.5:V1693A(MS),*ENST00000594824.5:V1693A(MS) ENST00000319396 p.V1693A (thyroid 2) 2 0.399857 0.645631 NC_000019.10:g.17205973T>C ENST00000594824.5:p.Val1693Ala ENST00000397274.6:p.Val1693Ala,ENST00000595618.5:p.Val1693Ala,*ENST00000594824.5:p.Val1693Ala 1 60f3521c-5521-11e7-8f50-0ac135e8bacf Regulation of RhoA activity ENST00000397274.6:V1693A 0.95254 0.045 ENST00000594824.5:V1693A(0.025:0.98158),*ENST00000397274.6:V1693A(0.045:0.95254),ENST00000595618.5:V1693A(0.042:0.95749) +10 VAR526_unknown chr19 18856059 + C T unknown UPF1 MS ENST00000599848.5 + A571V ENST00000262803.9:A560V(MS),*ENST00000599848.5:A571V(MS) ENST00000262803 p.A560V (large_intestine 1) 1 0 0 NC_000019.10:g.18856059C>T ENST00000599848.5:p.Ala571Val ENST00000262803.9:p.Ala560Val,*ENST00000599848.5:p.Ala571Val 0 ENST00000262803.9:A560V 0.09372 0.662 ENST00000599848.5:A571V(0.643:0.10292),*ENST00000262803.9:A560V(0.662:0.09372) diff -r 000000000000 -r 83181dabeb90 test-results/Gene_Level_Analysis.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-results/Gene_Level_Analysis.tsv Fri May 18 13:25:29 2018 -0400 @@ -0,0 +1,21 @@ +#Gene Level Annotation Report +#2018-05-18 15:18:04.023450 +#CRAVAT version: hybrid +#Analysis done at http://www.cravat.us. +#Job Id: znylund_20180518_111800 +#Input file: _VCF_BEDintersect_on_data_65_and_data_6_.vcf +#This report shows analysis results at gene level. +#The composite p-value (Stouffer's combined p-value) and composite FDR of a gene show how probable it is to get the same p-value distribution for the gene as that obtained from the input variants by chance. +#hg38 genomic. +#N/A +#For more information on CRAVAT, visit http://www.cravat.us. + +HUGO symbol Number of variants Sequence ontology CGC driver class CGC inheritance CGC tumor types somatic CGC tumor types germline ClinVar disease identifier ClinVar XRef Occurrences in COSMIC COSMIC gene count (tissue) Number of samples with gene mutated CHASM gene score CHASM gene p-value CHASM gene FDR VEST gene score (non-silent) VEST gene p-value VEST gene FDR Protein 3D gene Has a mutation in a TCGA Mutation Cluster NCI pathway hits NCI pathway IDs NCI pathway names TARGET CGL driver class +CRABP2 1 MS 37 upper_aerodigestive_tract(3);large_intestine(9);stomach(4);soft_tissue(3);endometrium(4);lung(3);liver(2);skin(4);NS(1);prostate(1);bone(1);kidney(1);breast(1) 1 0.2 0.53061 0.65 ../MuPIT_Interactive?gm=chr1:156705422 0 +NOP2 1 MS 133 large_intestine(22);endometrium(8);lung(8);skin(20);kidney(5);thyroid(1);cervix(2);central_nervous_system(3);oesophagus(4);NS(2);upper_aerodigestive_tract(5);stomach(8);soft_tissue(1);urinary_tract(4);breast(12);prostate(5);pituitary(1);pancreas(5);adrenal_gland(1);haematopoietic_and_lymphoid_tissue(2);ovary(5);liver(9) 1 0.958 0.00324 0.05 ../MuPIT_Interactive?gm=chr12:6561055 0 +ATP2A2 1 MS C0022595 OMIM:124200 200 large_intestine(31);endometrium(12);lung(7);skin(43);autonomic_ganglia(1);kidney(9);thyroid(3);cervix(2);testis(1);oesophagus(4);NS(2);upper_aerodigestive_tract(6);biliary_tract(6);stomach(14);soft_tissue(2);urinary_tract(8);breast(7);prostate(8);pancreas(6);small_intestine(2);haematopoietic_and_lymphoid_tissue(5);ovary(3);bone(2);liver(16) 1 0.951 0.00374 0.05 ../MuPIT_Interactive?gm=chr12:110339630 0 +DYNC1H1 1 MS C1834690 OMIM:158600 955 large_intestine(151);pleura(1);endometrium(79);lung(62);skin(151);autonomic_ganglia(1);kidney(27);thyroid(8);cervix(13);testis(1);central_nervous_system(12);oesophagus(28);NS(19);upper_aerodigestive_tract(31);biliary_tract(6);stomach(60);soft_tissue(13);urinary_tract(25);breast(50);prostate(38);pancreas(18);adrenal_gland(3);meninges(1);small_intestine(3);haematopoietic_and_lymphoid_tissue(37);ovary(20);bone(4);liver(93) 1 0.829 0.02307 0.1 ../MuPIT_Interactive?gm=chr14:102011985 1 94da5dd8-5521-11e7-8f50-0ac135e8bacf Lissencephaly gene (LIS1) in neuronal migration and development +HSP90AA1 1 MS somatic NHL 174 large_intestine(19);endometrium(9);lung(9);skin(25);kidney(12);thyroid(2);cervix(4);central_nervous_system(2);oesophagus(9);NS(5);biliary_tract(2);stomach(16);soft_tissue(5);urinary_tract(15);liver(11);prostate(3);pancreas(1);salivary_gland(1);haematopoietic_and_lymphoid_tissue(5);ovary(8);bone(1);breast(10) 1 0.84 0.02014 0.1 ../MuPIT_Interactive?gm=chr14:102083954 15 3814fa62-5521-11e7-8f50-0ac135e8bacf,32ff1916-5521-11e7-8f50-0ac135e8bacf,a8411a5c-5521-11e7-8f50-0ac135e8bacf,541d7e20-5521-11e7-8f50-0ac135e8bacf,98ad85f0-5521-11e7-8f50-0ac135e8bacf,9697501e-5521-11e7-8f50-0ac135e8bacf,e6f69242-5521-11e7-8f50-0ac135e8bacf,603902ca-5521-11e7-8f50-0ac135e8bacf,bb3d7c4a-5521-11e7-8f50-0ac135e8bacf,b1ac7318-5521-11e7-8f50-0ac135e8bacf,cb348d72-5521-11e7-8f50-0ac135e8bacf,945aa686-5521-11e7-8f50-0ac135e8bacf,4c90f780-5521-11e7-8f50-0ac135e8bacf,e4e93610-5521-11e7-8f50-0ac135e8bacf,6f7a316e-5521-11e7-8f50-0ac135e8bacf Validated targets of C-MYC transcriptional activation@VEGFR1 specific signals@IL2 signaling events mediated by PI3K@Signaling events mediated by HDAC Class II@Integrin-linked kinase signaling@Integrins in angiogenesis@Class I PI3K signaling events mediated by Akt@Regulation of Telomerase@Glucocorticoid receptor regulatory network@Hypoxic and oxygen homeostasis regulation of HIF-1-alpha@ErbB receptor signaling network@LKB1 signaling events@Signaling events mediated by VEGFR1 and VEGFR2@Class I PI3K signaling events@Regulation of Androgen receptor activity +FASN 1 MS 621 large_intestine(163);endometrium(22);lung(19);skin(82);autonomic_ganglia(1);kidney(9);thyroid(7);cervix(9);central_nervous_system(7);genital_tract(1);oesophagus(24);NS(13);upper_aerodigestive_tract(21);biliary_tract(12);stomach(39);soft_tissue(9);urinary_tract(2);liver(79);prostate(20);pancreas(14);adrenal_gland(2);salivary_gland(3);small_intestine(4);haematopoietic_and_lymphoid_tissue(14);ovary(3);bone(5);breast(37) 1 0.501 0.18611 0.25 ../MuPIT_Interactive?gm=chr17:82082606 2 34a994cc-5521-11e7-8f50-0ac135e8bacf,812903c2-5521-11e7-8f50-0ac135e8bacf Validated transcriptional targets of deltaNp63 isoforms@p73 transcription factor network +MYO9B 2 MS C1857847 OMIM:609753 424 large_intestine(96);endometrium(33);lung(17);skin(57);kidney(12);thyroid(9);cervix(4);central_nervous_system(9);oesophagus(22);NS(8);upper_aerodigestive_tract(22);biliary_tract(9);stomach(24);soft_tissue(10);urinary_tract(8);breast(17);prostate(14);pancreas(15);adrenal_gland(4);haematopoietic_and_lymphoid_tissue(8);ovary(1);bone(4);liver(21) 1 0.473 0.721215732958585 1 ../MuPIT_Interactive?gm=chr19:17205335,chr19:17205973 1 60f3521c-5521-11e7-8f50-0ac135e8bacf Regulation of RhoA activity +UPF1 1 MS 267 large_intestine(57);endometrium(18);lung(13);skin(45);meninges(1);kidney(9);thyroid(3);cervix(4);central_nervous_system(7);oesophagus(5);NS(4);upper_aerodigestive_tract(10);biliary_tract(2);stomach(15);soft_tissue(6);urinary_tract(12);breast(11);prostate(7);pancreas(7);haematopoietic_and_lymphoid_tissue(10);ovary(4);bone(2);liver(15) 1 0.662 0.09372 0.15 ../MuPIT_Interactive?gm=chr19:18856059 diff -r 000000000000 -r 83181dabeb90 test-results/Input_Errors.Result.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-results/Input_Errors.Result.tsv Fri May 18 13:25:29 2018 -0400 @@ -0,0 +1,12 @@ +#Input Errors Report +#2018-05-18 15:18:04.022947 +#CRAVAT version: hybrid +#Analysis done at http://www.cravat.us. +#Job Id: znylund_20180518_111800 +#Input file: _VCF_BEDintersect_on_data_65_and_data_6_.vcf +#This report shows errors that occurred in the input. +#Input coordinate: hg38 genomic. +#CHASM classifier: N/A +#For more information on CRAVAT, visit http://www.cravat.us. + +Input line number$%$Input line UID$%$Gene$%$Error$%$Input Line diff -r 000000000000 -r 83181dabeb90 test-results/Variant_Non-coding.Result.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-results/Variant_Non-coding.Result.tsv Fri May 18 13:25:29 2018 -0400 @@ -0,0 +1,13 @@ +#Non-coding Variant Report +#2018-05-18 15:18:04.020642 +#CRAVAT version: hybrid +#Analysis done at http://www.cravat.us. +#Job Id: znylund_20180518_111800 +#Input file: _VCF_BEDintersect_on_data_65_and_data_6_.vcf +#This report shows analysis results at variant level. +#hg38 genomic. +#N/A +#For more information on CRAVAT, visit http://www.cravat.us. + +Input line ID Chromosome Position Strand Reference base(s) Alternate base(s) Sample ID HUGO symbol Sequence ontology Protein sequence change ClinVar COSMIC ID COSMIC variant count (tissue) Number of samples with variant dbSNP ESP6500 AF (average) gnomAD AF Total gnomAD AF African gnomAD AF American gnomAD AF Ashkenazi Jewish gnomAD AF East Asian gnomAD AF Finnish gnomAD AF Non-Finnish European gnomAD AF Other gnomAD AF South Asian GWAS NHLBI Key (GRASP) GWAS PMID (GRASP) GWAS Phenotype (GRASP) Protein 3D variant In TCGA Mutation Cluster ncRNA Class ncRNA Name Pseudogene Pseudogene Transcript Repeat Class Repeat Family Repeat Name TARGET 1000 Genomes AF UTR/Intron UTR/Intron Gene UTR/Intron All Transcript Phred VCF filters Zygosity Alternate reads Total reads Variant allele frequency VEST p-value VEST FDR CGL driver class +8 VAR524_unknown chr19 17205444 + T C unknown Non-Coding 1 rs2305763 0.0 0.510502431118 0.684095610205 0.693317422434 0.337748344371 0.795930949445 0.435243553009 0.393043827905 0.449691991786 0.629792 intron MYO9B ENST00000595618.5(intron),ENST00000594824.5(intron),ENST00000397274.6(intron) 206.198 . hom 14 7 2.0 diff -r 000000000000 -r 83181dabeb90 test-results/Variant_Result.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-results/Variant_Result.tsv Fri May 18 13:25:29 2018 -0400 @@ -0,0 +1,21 @@ +#Variant Report +#2018-05-18 15:15:25.119179 +#CRAVAT version: hybrid +#Analysis done at http://www.cravat.us. +#Job Id: znylund_20180518_111521 +#Input file: _VCF_BEDintersect_on_data_65_and_data_6_.vcf +#This report shows analysis results at variant level. +#hg38 genomic. +#N/A +#For more information on CRAVAT, visit http://www.cravat.us. + +Input line ID Chromosome Position Strand Reference base(s) Alternate base(s) Sample ID HUGO symbol Sequence ontology Protein sequence change ClinVar COSMIC ID COSMIC variant count (tissue) Number of samples with variant dbSNP ESP6500 AF (average) gnomAD AF Total gnomAD AF African gnomAD AF American gnomAD AF Ashkenazi Jewish gnomAD AF East Asian gnomAD AF Finnish gnomAD AF Non-Finnish European gnomAD AF Other gnomAD AF South Asian GWAS NHLBI Key (GRASP) GWAS PMID (GRASP) GWAS Phenotype (GRASP) Protein 3D variant In TCGA Mutation Cluster ncRNA Class ncRNA Name Pseudogene Pseudogene Transcript Repeat Class Repeat Family Repeat Name TARGET 1000 Genomes AF UTR/Intron UTR/Intron Gene UTR/Intron All Transcript Phred VCF filters Zygosity Alternate reads Total reads Variant allele frequency VEST p-value VEST FDR CGL driver class +1 VAR516_unknown chr1 156705422 + T C unknown CRABP2 MS K9E COSM1984142 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr1:156705422 0 125181 . het 5739 18018 0.318514818515 0.53061 +2 VAR517_unknown chr12 6561055 + T C unknown NOP2 MS N408S 1 0.0 ../MuPIT_Interactive?gm=chr12:6561055 0 14340.8 . het 910 3868 0.235263702172 0.00324 +3 VAR518_unknown chr12 110339630 + C T unknown ATP2A2 MS T557I 1 0.0 ../MuPIT_Interactive?gm=chr12:110339630 0 48828.1 . het 2447 9100 0.268901098901 0.00374 +4 VAR520_unknown chr14 102011985 + A T unknown DYNC1H1 MS R2243S COSM2262213 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr14:102011985 0 12962.3 . het 642 2131 0.301267010793 0.02307 +5 VAR521_unknown chr14 102083954 + C T unknown HSP90AA1 MS D515N COSM2262393 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr14:102083954 0 240809 . het 17374 85275 0.203740838464 0.02014 +6 VAR522_unknown chr17 82082606 + C T unknown FASN MS S1947N COSM4648107 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr17:82082606 0 10374.8 . het 776 3826 0.202822791427 0.18611 +7 VAR523_unknown chr19 17205335 + A T unknown MYO9B MS K1688M 1 0.0 ../MuPIT_Interactive?gm=chr19:17205335 0 0.00158993 . het 2 7 0.285714285714 0.20028 +9 VAR525_unknown chr19 17205973 + T C unknown MYO9B MS V1693A COSM438878 thyroid(2) 1 rs7248508 0.522744 0.526958747465 0.685404424473 0.728029336735 0.352247807018 0.776672496721 0.453231381586 0.406255640183 0.486462728551 0.596900776808 190609063506732,203395363506733,206865653506734,208819603506735,224846277709335,224792027709336 19060906,20339536,20686565,20881960,22484627,22479202 LDL cholesterol(0.0152),HDL cholesterol(0.0279),Triglycerides(0.0141),Height(0.0104),Obesity with early age of onset (age >2)(0.0471),Adiponectin levels(0.0294) ../MuPIT_Interactive?gm=chr19:17205973 0.631589 12243.8 . hom 812 406 2.0 0.95254 +10 VAR526_unknown chr19 18856059 + C T unknown UPF1 MS A571V COSM3100527 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr19:18856059 0 10269.5 . het 592 2379 0.248844052123 0.09372 diff -r 000000000000 -r 83181dabeb90 test-results/combined_variants.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-results/combined_variants.tsv Fri May 18 13:25:29 2018 -0400 @@ -0,0 +1,21 @@ +#Variant Additional Details Report +#2018-05-18 15:15:25.120629 +#CRAVAT version: hybrid +#Analysis done at http://www.cravat.us. +#Job Id: znylund_20180518_111521 +#Input file: _VCF_BEDintersect_on_data_65_and_data_6_.vcf +#This report shows analysis results at variant level. +#hg38 genomic. +#N/A +#For more information on CRAVAT, visit http://www.cravat.us. + +Input line ID Chromosome Position Strand Reference base(s) Alternate base(s) Sample ID HUGO symbol Sequence ontology S.O. transcript S.O. transcript strand Reference peptide Variant peptide Protein sequence change S.O. all transcripts CGC driver class CGC inheritance CGC tumor types somatic CGC tumor types germline ClinVar disease identifier ClinVar XRef COSMIC transcript COSMIC protein change COSMIC variant count ESP6500 AF (European American) ESP6500 AF (African American) HGVS Genomic HGVS Protein HGVS Protein All NCI pathway hits NCI pathway IDs NCI pathway names VEST score transcript VEST p-value VEST score (missense) VEST score (frameshift indels) VEST score (inframe indels) VEST score (stop-gain) VEST score (stop-loss) VEST score (splice site) All transcripts VEST results ClinVar COSMIC ID COSMIC variant count (tissue) Number of samples with variant dbSNP ESP6500 AF (average) gnomAD AF Total gnomAD AF African gnomAD AF American gnomAD AF Ashkenazi Jewish gnomAD AF East Asian gnomAD AF Finnish gnomAD AF Non-Finnish European gnomAD AF Other gnomAD AF South Asian GWAS NHLBI Key (GRASP) GWAS PMID (GRASP) GWAS Phenotype (GRASP) Protein 3D variant In TCGA Mutation Cluster ncRNA Class ncRNA Name Pseudogene Pseudogene Transcript Repeat Class Repeat Family Repeat Name TARGET 1000 Genomes AF UTR/Intron UTR/Intron Gene UTR/Intron All Transcript Phred VCF filters Zygosity Alternate reads Total reads Variant allele frequency VEST FDR CGL driver class +1 VAR516_unknown chr1 156705422 + T C unknown CRABP2 MS ENST00000368221.1 - MPNFSGNWKIIR MPNFSGNWEIIR K9E *ENST00000368221.1:K9E(MS),ENST00000621784.4:K9E(MS),ENST00000368222.7:K9E(MS) ENST00000368221 p.K9E (large_intestine 1) 1 0 0 NC_000001.10:g.156705422T>C ENST00000368221.1:p.Lys9Glu *ENST00000368221.1:p.Lys9Glu,ENST00000621784.4:p.Lys9Glu,ENST00000368222.7:p.Lys9Glu 0 ENST00000368221.1:K9E 0.53061 0.2 *ENST00000368221.1:K9E(0.2:0.53061),ENST00000368222.7:K9E(0.187:0.5543),ENST00000621784.4:K9E(0.186:0.55652) COSM1984142 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr1:156705422 0 125181 . het 5739 18018 0.318514818515 +2 VAR517_unknown chr12 6561055 + T C unknown NOP2 MS ENST00000616948.4 - NTGVILANDANAER STGVILANDANAER N408S ENST00000617555.4:N404S(MS),ENST00000545200.5:N404S(MS),ENST00000541778.5:N404S(MS),ENST00000399466.6:N404S(MS),ENST00000322166.9:N408S(MS),ENST00000537442.5:N408S(MS),ENST00000382421.7:N441S(MS),ENST00000620535.4:N441S(MS),*ENST00000616948.4:N408S(MS) 0 0 NC_000012.10:g.6561055T>C ENST00000616948.4:p.Asn408Ser ENST00000617555.4:p.Asn404Ser,ENST00000545200.5:p.Asn404Ser,ENST00000541778.5:p.Asn404Ser,ENST00000399466.6:p.Asn404Ser,ENST00000322166.9:p.Asn408Ser,ENST00000537442.5:p.Asn408Ser,ENST00000382421.7:p.Asn441Ser,ENST00000620535.4:p.Asn441Ser,*ENST00000616948.4:p.Asn408Ser 0 ENST00000616948.4:N408S 0.00324 0.958 ENST00000617555.4:N404S(0.954:0.00354),*ENST00000616948.4:N408S(0.958:0.00324),ENST00000541778.5:N404S(0.869:0.01488),ENST00000537442.5:N408S(0.956:0.00344),ENST00000399466.6:N404S(0.951:0.00374),ENST00000545200.5:N404S(0.953:0.00354),ENST00000620535.4:N441S(0.938:0.00536),ENST00000382421.7:N441S(0.938:0.00536),ENST00000322166.9:N408S(0.954:0.00354) 1 0.0 ../MuPIT_Interactive?gm=chr12:6561055 0 14340.8 . het 910 3868 0.235263702172 +3 VAR518_unknown chr12 110339630 + C T unknown ATP2A2 MS ENST00000539276.6 + EWGSGSDTLR EWGSGSDILR T557I ENST00000308664.10:T557I(MS),*ENST00000539276.6:T557I(MS) 0 0 NC_000012.10:g.110339630C>T ENST00000539276.6:p.Thr557Ile ENST00000308664.10:p.Thr557Ile,*ENST00000539276.6:p.Thr557Ile 0 ENST00000539276.6:T557I 0.00374 0.951 ENST00000308664.10:T557I(0.822:0.02459),*ENST00000539276.6:T557I(0.951:0.00374) 1 0.0 ../MuPIT_Interactive?gm=chr12:110339630 0 48828.1 . het 2447 9100 0.268901098901 +4 VAR520_unknown chr14 102011985 + A T unknown DYNC1H1 MS ENST00000360184.8 + ALERLEGVEGVAHIIDPK ALESLEGVEGVAHIIDPK R2243S *ENST00000360184.8:R2243S(MS) ENST00000360184 p.R2243S (large_intestine 1) 1 0 0 NC_000014.10:g.102011985A>T ENST00000360184.8:p.Arg2243Ser *ENST00000360184.8:p.Arg2243Ser 1 94da5dd8-5521-11e7-8f50-0ac135e8bacf Lissencephaly gene (LIS1) in neuronal migration and development ENST00000360184.8:R2243S 0.02307 0.829 *ENST00000360184.8:R2243S(0.829:0.02307) COSM2262213 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr14:102011985 0 12962.3 . het 642 2131 0.301267010793 +5 VAR521_unknown chr14 102083954 + C T unknown HSP90AA1 MS ENST00000334701.11 - GVVDSEDLPLNISR GVVDSENLPLNISR D515N ENST00000216281.12:D393N(MS),*ENST00000334701.11:D515N(MS) somatic NHL ENST00000334701 p.D515N (large_intestine 1) 1 0 0 NC_000014.10:g.102083954C>T ENST00000334701.11:p.Asp515Asn ENST00000216281.12:p.Asp393Asn,*ENST00000334701.11:p.Asp515Asn 15 3814fa62-5521-11e7-8f50-0ac135e8bacf,32ff1916-5521-11e7-8f50-0ac135e8bacf,a8411a5c-5521-11e7-8f50-0ac135e8bacf,541d7e20-5521-11e7-8f50-0ac135e8bacf,98ad85f0-5521-11e7-8f50-0ac135e8bacf,9697501e-5521-11e7-8f50-0ac135e8bacf,e6f69242-5521-11e7-8f50-0ac135e8bacf,603902ca-5521-11e7-8f50-0ac135e8bacf,bb3d7c4a-5521-11e7-8f50-0ac135e8bacf,b1ac7318-5521-11e7-8f50-0ac135e8bacf,cb348d72-5521-11e7-8f50-0ac135e8bacf,945aa686-5521-11e7-8f50-0ac135e8bacf,4c90f780-5521-11e7-8f50-0ac135e8bacf,e4e93610-5521-11e7-8f50-0ac135e8bacf,6f7a316e-5521-11e7-8f50-0ac135e8bacf Validated targets of C-MYC transcriptional activation@VEGFR1 specific signals@IL2 signaling events mediated by PI3K@Signaling events mediated by HDAC Class II@Integrin-linked kinase signaling@Integrins in angiogenesis@Class I PI3K signaling events mediated by Akt@Regulation of Telomerase@Glucocorticoid receptor regulatory network@Hypoxic and oxygen homeostasis regulation of HIF-1-alpha@ErbB receptor signaling network@LKB1 signaling events@Signaling events mediated by VEGFR1 and VEGFR2@Class I PI3K signaling events@Regulation of Androgen receptor activity ENST00000216281.12:D393N 0.02014 0.84 ENST00000334701.11:D515N(0.749:0.04989),*ENST00000216281.12:D393N(0.84:0.02014) COSM2262393 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr14:102083954 0 240809 . het 17374 85275 0.203740838464 +6 VAR522_unknown chr17 82082606 + C T unknown FASN MS ENST00000306749.3 - QGVQVQVSTSNISSLEGAR QGVQVQVSTSNINSLEGAR S1947N ENST00000634990.1:S1945N(MS),*ENST00000306749.3:S1947N(MS) ENST00000306749 p.S1947N (large_intestine 1) 1 0 0 NC_000017.10:g.82082606C>T ENST00000306749.3:p.Ser1947Asn ENST00000634990.1:p.Ser1945Asn,*ENST00000306749.3:p.Ser1947Asn 2 34a994cc-5521-11e7-8f50-0ac135e8bacf,812903c2-5521-11e7-8f50-0ac135e8bacf Validated transcriptional targets of deltaNp63 isoforms@p73 transcription factor network ENST00000634990.1:S1945N 0.18611 0.501 ENST00000306749.3:S1947N(0.493:0.19016),*ENST00000634990.1:S1945N(0.501:0.18611) COSM4648107 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr17:82082606 0 10374.8 . het 776 3826 0.202822791427 +7 VAR523_unknown chr19 17205335 + A T unknown MYO9B MS ENST00000594824.5 + IQSHCSYTYGRKGEPGVEPGHFGVCVDSLTSDK IQSHCSYTYGRMGEPGAEPGHFGVCVDSLTSDK K1688M ENST00000397274.6:K1688M(MS),ENST00000595618.5:K1688M(MS),*ENST00000594824.5:K1688M(MS) 0 0 NC_000019.10:g.17205335A>T ENST00000594824.5:p.Lys1688Met ENST00000397274.6:p.Lys1688Met,ENST00000595618.5:p.Lys1688Met,*ENST00000594824.5:p.Lys1688Met 1 60f3521c-5521-11e7-8f50-0ac135e8bacf Regulation of RhoA activity ENST00000397274.6:K1688M 0.20028 0.473 ENST00000594824.5:K1688M(0.464:0.20464),*ENST00000397274.6:K1688M(0.473:0.20028),ENST00000595618.5:K1688M(0.469:0.20231) 1 0.0 ../MuPIT_Interactive?gm=chr19:17205335 0 0.00158993 . het 2 7 0.285714285714 +9 VAR525_unknown chr19 17205973 + T C unknown MYO9B MS ENST00000594824.5 + IQSHCSYTYGRKGEPGVEPGHFGVCVDSLTSDK IQSHCSYTYGRMGEPGAEPGHFGVCVDSLTSDK V1693A ENST00000397274.6:V1693A(MS),ENST00000595618.5:V1693A(MS),*ENST00000594824.5:V1693A(MS) ENST00000319396 p.V1693A (thyroid 2) 2 0.399857 0.645631 NC_000019.10:g.17205973T>C ENST00000594824.5:p.Val1693Ala ENST00000397274.6:p.Val1693Ala,ENST00000595618.5:p.Val1693Ala,*ENST00000594824.5:p.Val1693Ala 1 60f3521c-5521-11e7-8f50-0ac135e8bacf Regulation of RhoA activity ENST00000397274.6:V1693A 0.95254 0.045 ENST00000594824.5:V1693A(0.025:0.98158),*ENST00000397274.6:V1693A(0.045:0.95254),ENST00000595618.5:V1693A(0.042:0.95749) COSM438878 thyroid(2) 1 rs7248508 0.522744 0.526958747465 0.685404424473 0.728029336735 0.352247807018 0.776672496721 0.453231381586 0.406255640183 0.486462728551 0.596900776808 190609063506732,203395363506733,206865653506734,208819603506735,224846277709335,224792027709336 19060906,20339536,20686565,20881960,22484627,22479202 LDL cholesterol(0.0152),HDL cholesterol(0.0279),Triglycerides(0.0141),Height(0.0104),Obesity with early age of onset (age >2)(0.0471),Adiponectin levels(0.0294) ../MuPIT_Interactive?gm=chr19:17205973 0.631589 12243.8 . hom 812 406 2.0 +10 VAR526_unknown chr19 18856059 + C T unknown UPF1 MS ENST00000599848.5 + EAIDSPVSFLALHNQIR EAIDSPVSFLVLHNQIR A571V ENST00000262803.9:A560V(MS),*ENST00000599848.5:A571V(MS) ENST00000262803 p.A560V (large_intestine 1) 1 0 0 NC_000019.10:g.18856059C>T ENST00000599848.5:p.Ala571Val ENST00000262803.9:p.Ala560Val,*ENST00000599848.5:p.Ala571Val 0 ENST00000262803.9:A560V 0.09372 0.662 ENST00000599848.5:A571V(0.643:0.10292),*ENST00000262803.9:A560V(0.662:0.09372) COSM3100527 large_intestine(1) 1 0.0 ../MuPIT_Interactive?gm=chr19:18856059 0 10269.5 . het 592 2379 0.248844052123