changeset 2:6cf9f7f6509c draft default tip

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/ensembl_longest_cds_per_gene commit 651fae48371f845578753052c6fe173e3bb35670
author earlhaminst
date Wed, 15 Mar 2017 20:23:13 -0400
parents a07680f3033a
children
files ensembl_longest_cds_per_gene.py ensembl_longest_cds_per_gene.xml test-data/Mus_musculus.GRCm38.cds.longest.fa
diffstat 3 files changed, 50 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/ensembl_longest_cds_per_gene.py	Tue Mar 07 11:12:55 2017 -0500
+++ b/ensembl_longest_cds_per_gene.py	Wed Mar 15 20:23:13 2017 -0400
@@ -1,7 +1,6 @@
 """
 This script reads a CDS FASTA file from Ensembl and outputs a FASTA file with
-only the longest CDS sequence for each gene. The header of the sequences in the
-output file will be the transcript id without version.
+only the longest CDS sequence for each gene.
 """
 from __future__ import print_function
 
@@ -33,7 +32,10 @@
     """
     Remove the optional '.VERSION' from an Ensembl id.
     """
-    return s.split('.')[0]
+    if s.startswith('ENS'):
+        return s.split('.')[0]
+    else:
+        return s
 
 
 parser = optparse.OptionParser()
@@ -52,7 +54,6 @@
 
 for entry in FASTAReader_gen(options.input_fasta_filename):
     transcript_id, rest = entry.header[1:].split(' ', 1)
-    transcript_id = remove_id_version(transcript_id)
     gene_id = None
     for s in rest.split(' '):
         if s.startswith('gene:'):
@@ -73,6 +74,6 @@
 
 with open(options.output_fasta_filename, 'w') as output_fasta_file:
     for entry in FASTAReader_gen(options.input_fasta_filename):
-        transcript_id = remove_id_version(entry.header[1:].split(' ')[0])
+        transcript_id = entry.header[1:].split(' ')[0]
         if transcript_id in selected_transcript_ids:
-            output_fasta_file.write(">%s\n%s\n" % (transcript_id, entry.sequence))
+            output_fasta_file.write("%s\n%s\n" % (entry.header, entry.sequence))
--- a/ensembl_longest_cds_per_gene.xml	Tue Mar 07 11:12:55 2017 -0500
+++ b/ensembl_longest_cds_per_gene.xml	Wed Mar 15 20:23:13 2017 -0400
@@ -1,4 +1,4 @@
-<tool id="ensembl_longest_cds_per_gene" name="Select longest CDS per gene" version="0.0.1">
+<tool id="ensembl_longest_cds_per_gene" name="Select longest CDS per gene" version="0.0.2">
     <description>from Ensembl CDS FASTA</description>
     <command detect_errors="exit_code"><![CDATA[
 python '$__tool_directory__/ensembl_longest_cds_per_gene.py' -f '$input' -o '$output'
@@ -22,6 +22,6 @@
 
     >ENSMUST00000177965.1 cds chromosome:GRCm38:12:113456720:113456736:-1 gene:ENSMUSG00000094057.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd2-7 description:immunoglobulin heavy diversity 2-7 [Source:MGI Symbol;Acc:MGI:4439866]
 
-Among the CDS sequences having the same gene identifier (ENSMUSG00000094057 in the example above), the tool will select the one with the longest sequence. The header of the sequences in the output dataset will contain only the transcript id without version (ENSMUST00000177965 in the example above).
+Among the CDS sequences having the same gene identifier (ENSMUSG00000094057 in the example above), the tool will select the one with the longest sequence.
     ]]></help>
 </tool>
--- a/test-data/Mus_musculus.GRCm38.cds.longest.fa	Tue Mar 07 11:12:55 2017 -0500
+++ b/test-data/Mus_musculus.GRCm38.cds.longest.fa	Wed Mar 15 20:23:13 2017 -0400
@@ -1,134 +1,134 @@
->ENSMUST00000196221
+>ENSMUST00000196221.1 cds chromosome:GRCm38:14:54113468:54113476:1 gene:ENSMUSG00000096749.2 gene_biotype:TR_D_gene transcript_biotype:TR_D_gene gene_symbol:Trdd1 description:T cell receptor delta diversity 1 [Source:MGI Symbol;Acc:MGI:4439547]
 ATGGCATAT
->ENSMUST00000177564
+>ENSMUST00000177564.1 cds chromosome:GRCm38:14:54122226:54122241:1 gene:ENSMUSG00000096176.1 gene_biotype:TR_D_gene transcript_biotype:TR_D_gene gene_symbol:Trdd2 description:T cell receptor delta diversity 2 [Source:MGI Symbol;Acc:MGI:4439546]
 ATCGGAGGGATACGAG
->ENSMUST00000178537
+>ENSMUST00000178537.1 cds chromosome:GRCm38:6:41533201:41533212:1 gene:ENSMUSG00000095668.1 gene_biotype:TR_D_gene transcript_biotype:TR_D_gene gene_symbol:Trbd1 description:T cell receptor beta, D region 1 [Source:MGI Symbol;Acc:MGI:4439571]
 GGGACAGGGGGC
->ENSMUST00000178862
+>ENSMUST00000178862.1 cds chromosome:GRCm38:6:41542163:41542176:1 gene:ENSMUSG00000094569.1 gene_biotype:TR_D_gene transcript_biotype:TR_D_gene gene_symbol:Trbd2 description:T cell receptor beta, D region 2 [Source:MGI Symbol;Acc:MGI:4439727]
 GGGACTGGGGGGGC
->ENSMUST00000179520
+>ENSMUST00000179520.1 cds chromosome:GRCm38:12:113430528:113430538:-1 gene:ENSMUSG00000094028.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd4-1 description:immunoglobulin heavy diversity 4-1 [Source:MGI Symbol;Acc:MGI:4439801]
 CTAACTGGGAC
->ENSMUST00000179883
+>ENSMUST00000179883.1 cds chromosome:GRCm38:12:113448214:113448229:-1 gene:ENSMUSG00000094552.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd3-2 description:immunoglobulin heavy diversity 3-2 [Source:MGI Symbol;Acc:MGI:4439707]
 AGACAGCTCAGGCTAC
->ENSMUST00000195858
+>ENSMUST00000195858.1 cds chromosome:GRCm38:12:113449588:113449597:-1 gene:ENSMUSG00000096420.2 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd5-6 description:immunoglobulin heavy diversity 5-6 [Source:MGI Symbol;Acc:MGI:4937234]
 GAATACCTAC
->ENSMUST00000180001
+>ENSMUST00000180001.1 cds chromosome:GRCm38:12:113450851:113450867:-1 gene:ENSMUSG00000095656.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd2-8 description:immunoglobulin heavy diversity 2-8 [Source:MGI Symbol;Acc:MGI:4439706]
 TCTACTATGGTAACTAC
->ENSMUST00000178815
+>ENSMUST00000178815.1 cds chromosome:GRCm38:12:113454942:113454951:-1 gene:ENSMUSG00000094957.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd5-5 description:immunoglobulin heavy diversity 5-5 [Source:MGI Symbol;Acc:MGI:4937334]
 GACTACCTAC
->ENSMUST00000177965
+>ENSMUST00000177965.1 cds chromosome:GRCm38:12:113456720:113456736:-1 gene:ENSMUSG00000094057.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd2-7 description:immunoglobulin heavy diversity 2-7 [Source:MGI Symbol;Acc:MGI:4439866]
 TCTACTATGGTTACGAC
->ENSMUST00000178909
+>ENSMUST00000178909.1 cds chromosome:GRCm38:12:113459864:113459892:-1 gene:ENSMUSG00000094268.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd5-8 description:immunoglobulin heavy diversity 5-8 [Source:MGI Symbol;Acc:MGI:4937171]
 AGACAGCTAGCCTCTGCAGTGCCACAACC
->ENSMUST00000177646
+>ENSMUST00000177646.1 cds chromosome:GRCm38:12:113460101:113460110:-1 gene:ENSMUSG00000096884.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd5-4 description:immunoglobulin heavy diversity 5-4 [Source:MGI Symbol;Acc:MGI:4937058]
 GAATACCTAC
->ENSMUST00000178230
+>ENSMUST00000178230.1 cds chromosome:GRCm38:12:113461369:113461385:-1 gene:ENSMUSG00000096250.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd2-6 description:immunoglobulin heavy diversity 2-6 [Source:MGI Symbol;Acc:MGI:4439865]
 CCTACTATAGTAACTAC
->ENSMUST00000178483
+>ENSMUST00000178483.1 cds chromosome:GRCm38:12:113464524:113464552:-1 gene:ENSMUSG00000095592.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd5-7 description:immunoglobulin heavy diversity 5-7 [Source:MGI Symbol;Acc:MGI:4936973]
 AGGCAGCTAGCCTCTGCAGTGCCACAACC
->ENSMUST00000179262
+>ENSMUST00000179262.1 cds chromosome:GRCm38:12:113464761:113464770:-1 gene:ENSMUSG00000093876.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd5-3 description:immunoglobulin heavy diversity 5-3 [Source:MGI Symbol;Acc:MGI:4937297]
 GAATACCTAC
->ENSMUST00000178549
+>ENSMUST00000178549.1 cds chromosome:GRCm38:12:113466027:113466043:-1 gene:ENSMUSG00000095897.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd2-5 description:immunoglobulin heavy diversity 2-5 [Source:MGI Symbol;Acc:MGI:4439705]
 CCTACTATAGTAACTAC
->ENSMUST00000193012
+>ENSMUST00000193012.1 cds chromosome:GRCm38:12:113469189:113469217:-1 gene:ENSMUSG00000103203.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Gm37327 description:predicted gene, 37327 [Source:MGI Symbol;Acc:MGI:5610555]
 AGGCAGCTAGCCTCTGCAGTGCCACAACC
->ENSMUST00000179166
+>ENSMUST00000179166.1 cds chromosome:GRCm38:12:113469426:113469435:-1 gene:ENSMUSG00000096396.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd5-2 description:immunoglobulin heavy diversity 5-2 [Source:MGI Symbol;Acc:MGI:4936898]
 GAATACCTAC
->ENSMUST00000179560
+>ENSMUST00000179560.1 cds chromosome:GRCm38:12:113470694:113470710:-1 gene:ENSMUSG00000095444.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd2-4 description:immunoglobulin heavy diversity 2-4 [Source:MGI Symbol;Acc:MGI:4439709]
 TCTACTATGATTACGAC
->ENSMUST00000177839
+>ENSMUST00000177839.1 cds chromosome:GRCm38:12:113475400:113475416:-1 gene:ENSMUSG00000096568.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd2-3 description:immunoglobulin heavy diversity 2-3 [Source:MGI Symbol;Acc:MGI:4439708]
 TCTATGATGGTTACTAC
->ENSMUST00000103439
+>ENSMUST00000103439.1 cds chromosome:GRCm38:12:113482170:113482192:-1 gene:ENSMUSG00000076630.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd1-1 description:immunoglobulin heavy diversity 1-1 [Source:MGI Symbol;Acc:MGI:4439871]
 TTTATTACTACGGTAGTAGCTAC
->ENSMUST00000180266
+>ENSMUST00000180266.1 cds chromosome:GRCm38:12:113525313:113525329:-1 gene:ENSMUSG00000093818.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Ighd3-1 description:immunoglobulin heavy diversity 3-1 [Source:MGI Symbol;Acc:MGI:4439891]
 GGCACAGCTCGGGCTAC
->ENSMUST00000103441
+>ENSMUST00000103441.1 cds chromosome:GRCm38:12:113528032:113528054:-1 gene:ENSMUSG00000076632.1 gene_biotype:IG_D_gene transcript_biotype:IG_D_gene gene_symbol:Gm16968 description:predicted gene, 16968 [Source:MGI Symbol;Acc:MGI:4439892]
 TATATAACTAAAGTGGTAGCTCA
->ENSMUST00000177622
+>ENSMUST00000177622.3 cds chromosome:GRCm38:14:53443249:53443839:1 gene:ENSMUSG00000096908.3 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav7-3 description:T cell receptor alpha variable 7-3 [Source:MGI Symbol;Acc:MGI:3649556]
 ATGAAATCCTTGAGTGTTTCCCTAGTGGTCCTGTGGCTCCAGTTAAACTGGGTGAACAGC
 CAGCAGAAGGTGCAGCAGAGCCCAGAATCCCTCATTGTCCCAGAGGGAGCCATGACCTCT
 CTCAACTGCACTTTCAGCGACAGTGCTTCTCAGTATTTTGCATGGTACAGACAGCATTCT
 GGGAAAGCCCCCAAGGCACTGATGTCCATCTTCTCCAATGGTGAAAAAGAAGAAGGCAGA
 TTCACAATTCACCTCAATAAAGCCAGTCTGCATTTCTCCCTGCACATCAGAGACTCCCAG
 CCCAGTGACTCTGCTCTCTACCTCTGTGCAGTGAGCA
->ENSMUST00000180711
+>ENSMUST00000180711.2 cds chromosome:GRCm38:14:53454296:53454784:1 gene:ENSMUSG00000094468.5 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav6-4 description:T cell receptor alpha variable 6-4 [Source:MGI Symbol;Acc:MGI:3702153]
 ATGAATACTTCTCCAGTTTTAGTAACTGTGATGCTGCTGTTCATGCTTGGGATGAGAAAG
 ACCCACGGAGATTCAGTGACCCAGAAACAAGGTCAAGTGACCCTTTCAGAAGATGACTTC
 CTATTTATAAATTGCACTTATTCTACCACAACATACCCAACTCTTTTGTGGTATGTCCAA
 TATCTTGGACAAGGTCCACAGCTCCTTCTGAAAGTGACAACTGCCAACAATAAGGGAATC
 AGCAGAGGCTTTGAAGCTACATATGACAAAGGGACCACGTCCTTCCACTTGCAGAAAGCC
 TCAGTGCAGGAGTCAGACTCAGCCGTGTACTTCTGTGCTCTGGTTGA
->ENSMUST00000181728
+>ENSMUST00000181728.2 cds chromosome:GRCm38:14:53461099:53461738:1 gene:ENSMUSG00000094766.3 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav7-4 description:T cell receptor alpha variable 7-4 [Source:MGI Symbol;Acc:MGI:3649611]
 ATGAAATCCTTGAGTGTTTCACTAGTGGTCCTGTGGCTCCAGTTAAACTGCGTGAGGAGC
 CAGCAGAAGGTGCAGCAGAGCCCAGAATCCCTCAGTGTCCCAGAGGGAGGCATGGCCTCT
 CTCAACTGCACTTCAAGTGATCGTAATTTTCAGTACTTCTGGTGGTACAGACAGCATTCT
 GGAGAAGGCCCCAAGGCACTGATGTCAATCTTCTCTGATGGTGACAAGAAAGAAGGCAGA
 TTCACAGCTCACCTCAATAAGGCCAGCCTGCATGTTTCCCTGCACATCAGAGACTCCCAG
 CCCAGTGACTCCGCTCTCTACTTCTGTGCAGCTAGTGAGCA
->ENSMUST00000103643
+>ENSMUST00000103643.3 cds chromosome:GRCm38:14:53469756:53470232:1 gene:ENSMUSG00000076831.3 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav8-1 description:T cell receptor alpha variable 8-1 [Source:MGI Symbol;Acc:MGI:3649608]
 ATGCACAGCCTCCTGGGGTTGTTGTTGTGGCTGCAACTGACAAGGGTGAATAGTCAACTA
 GCAGAAGAGAATTCGTGGGCCCTGAGCGTCCACGAGGGTGAAAGTGTCACGGTGAATTGT
 AGTTACAAGACATCCATAACTGCCCTACAGTGGTACAGACAGAAGTCAGGCAAAGGCCCT
 GCCCAGCTAATCTTAATACGTTCAAATGAGAGAGAGAAGCGCAATGGAAGACTCAGAGCC
 ACCCTTGACACCTCCAGCCAGAGCAGCTCCTTGTCCATCACTGCTACTCGGTGTGAAGAC
 ACCGCTGTGTACTTCTGTGCTACTGATG
->ENSMUST00000103581
+>ENSMUST00000103581.5 cds chromosome:GRCm38:14:53488045:53488567:1 gene:ENSMUSG00000096900.3 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav9-1 description:T cell receptor alpha  variable 9-1 [Source:MGI Symbol;Acc:MGI:3650356]
 ATGCTCCTGGTTCTCATCTCGTTCCTCGGGATACATTTCTTCCTGGATGTCCAAACACAG
 ACAGTTTCCCAGTCTGATGCCCATGTCACTGTCTTCGAAGGAGACTCGGTGGAGCTGAGA
 TGCAACTATTCCTATGGTGGATCCATTTACCTCTCCTGGTACATCCAGCACCATGGCCAT
 GGCCTCCAGTTTCTCCTCAAGTACTATTCGGGAAACCCAGTGGTTCAAGGAGTGAACGGC
 TTCGAGGCTGAGTTCAGCAAGAGCGACTCTTCCTTCCACCTTCGGAAAGCCTCTGTGCAC
 TGGAGCGACTCGGCTGTGTACTTCTGTGCTGTGAGCG
->ENSMUST00000181210
+>ENSMUST00000181210.2 cds chromosome:GRCm38:14:53491115:53491622:1 gene:ENSMUSG00000096149.5 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav6-5 description:T cell receptor alpha variable 6-5 [Source:MGI Symbol;Acc:MGI:3649609]
 ATGAACCTTTGTCCTGAACTGGGTATTCTACTCTTCCTAATGCTTGGAGAAAGCAATGGA
 GACTCAGTGACTCAGACAGAAGGCCCAGTGACACTGTCTGAAGGGACTTCTCTGACTGTG
 AACTGTTCCTATGAAACCAAACAGTACCCAACCCTGTTCTGGTATGTGCAGTATCCCGGA
 GAAGGTCCACAGCTCCTCTTTAAAGTCCCAAAGGCCAACGAGAAGGGAAGCAACAGAGGT
 TTTGAAGCTACATACAATAAAGAAGCCACCTCCTTCCACTTGCAGAAAGCCTCAGTGCAA
 GAGTCAGACTCGGCTGTGTACTACTGTGCTCTGAGTGA
->ENSMUST00000103583
+>ENSMUST00000103583.4 cds chromosome:GRCm38:14:53505727:53506286:1 gene:ENSMUSG00000096551.2 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav10 description:T cell receptor alpha variable 10 [Source:MGI Symbol;Acc:MGI:3704132]
 ATGAAGACATCCCTTCACACTGTATTCCTATTCTTGTGGCTATGGATGGACTGGGAGAGC
 CATGGAGAGAAGGTCGAGCAACATGAGTCTACACTGAGTGTTCGAGAGGGAGACAGCGCT
 GTCATCAACTGCACTTACACAGATACTGCTTCATCATACTTCCCTTGGTACAAGCAAGAA
 GCTGGAAAGAGTCTCCACTTTGTGATAGACATTCGTTCAAATGTGGACAGAAAACAGAGC
 CAAAGACTTATAGTTTTGTTGGATAAGAAAGCCAAACGATTCTCCCTGCACATCACAGCC
 ACACAGCCTGAAGATTCAGCCATCTACTTCTGTGCAGCAAGCA
->ENSMUST00000103584
+>ENSMUST00000103584.3 cds chromosome:GRCm38:14:53516929:53517366:1 gene:ENSMUSG00000095862.1 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav6-6 description:T cell receptor alpha variable 6-6 [Source:MGI Symbol;Acc:MGI:4439905]
 ATGGACTCTTCTCCAGGCTTCGTGGCTGTGATACTTCTCATACTTGGAAGGACCCACGGA
 GATTCCGTGACTCAAACAGAAGGCCCAGTGACCGTCTCAGAAAGCGAGTCCCTGATAATA
 AATTGCACGTATTCAGCCACAAGCATAGCTTACCCTAATCTTTTCTGGTATGTTCGATAT
 CCTGGAGAAGGTCTACAACTCCTCCTGAAAGTCATTACGGCTGGCCAGAAGGGAAGCAGC
 AGAGGGTTTGAAGCCACATACAATAAAGAAACCACCTCCTTCCACTTGCAGAAAGCCTCA
 GTGCAAGAGTCAGACTCGGCTGTGTACTACTGTGCTCTGGGTGA
->ENSMUST00000103585
+>ENSMUST00000103585.3 cds chromosome:GRCm38:14:53519303:53519859:1 gene:ENSMUSG00000096615.1 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav11 description:T cell receptor alpha variable 11 [Source:MGI Symbol;Acc:MGI:3642163]
 ATGAAAAAGCGCCTGAGTGCCTGCTGGGTGGTCTTGTGGCTGCATTATCAGTGGGTGGCT
 GGCAAGACCCAAGTGGAGCAGAGTCCTCAGTCCCTGGTTGTCCGTCAGGGAGAGAACTGC
 GTCCTTCAATGTAATTACAGTGTGACCCCCGACAACCACTTAAGGTGGTTCAAACAGGAC
 ACAGGCAAAGGTCTTGTGTCCCTGACAGTCCTGGTTGACCAAAAAGACAAAACGTCAAAT
 GGGAGATACTCAGCAACTCTGGATAAAGATGCTAAGCACAGCACGCTGCACATCACAGCC
 ACCCTGCTGGATGACACTGCCACCTACATCTGTGTGGTGGGCG
->ENSMUST00000200609
+>ENSMUST00000200609.1 cds chromosome:GRCm38:14:53530786:53531313:1 gene:ENSMUSG00000106620.1 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav7-5 description:T cell receptor alpha variable 7-5 [Source:MGI Symbol;Acc:MGI:3648929]
 ATGAAATCCTTGAGTGTTTCACTAGTGGTCCTGTGGCTCCAGTTTAATTGGGTGAGCAGC
 CAGCAGAAGGTGCAGCAGAGCCCAGAATCCCTCACTGTCTCAGAGGGAGCCATGGCCTCT
 CTCAACTGCACGTTCAGTGATGGTACTTCTAACAACTTCAGGTGGTACAGACAGCATTCT
 GCGAAAGGCCTTGAGGTGCTAGTGTCCATCTTCTCTGATGGTGAAAAGGAAGAAGGCAGA
 TTTACAGCTCACCTCAATAGAGCCAACTTGCATGTTTCCCTACACATCAGAGAACCACAA
 CCCAGTGACTCTGCTGTCTACCTCTGTGCAGTGAGCA
->ENSMUST00000200115
+>ENSMUST00000200115.1 cds chromosome:GRCm38:14:53538191:53538738:1 gene:ENSMUSG00000096825.2 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav12-1 description:T cell receptor alpha variable 12-1 [Source:MGI Symbol;Acc:MGI:4440525]
 ATGAACATGCGTCCTGTCACCTCCTCAGTTCTCGTGCTCCTCCTAATGCTCAGAAGGAGC
 AATGGAGACTCCGTGACCCAGACAGAAGGCCTGGTCACTGTCACCGAGGGGTTGCCTGTG
 AAGCTGAACTGCACCTATCAGACTACTTATTTAACTATTGCCTTTTTCTGGTATGTGCAA
 TATCTCAACGAAGCCCCTCAGGTACTCCTGAAGAGCTCCACAGACAACAAGAGGACCGAG
 CACCAAGGGTTCCACGCCACTCTCCATAAGAGCAGCAGCTCCTTCCATCTGCAGAAGTCC
 TCAGCGCAGCTGTCAGACTCTGCCCTGTACTACTGTGCTCTGAGTGA
->ENSMUST00000103651
+>ENSMUST00000103651.3 cds chromosome:GRCm38:14:53545014:53545525:1 gene:ENSMUSG00000076839.3 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav13-1 description:T cell receptor alpha variable 13-1 [Source:MGI Symbol;Acc:MGI:4439904]
 ATGAAGAGGCTGCTGAGCTCTCTGCTGGGGCTTCTGTGCACCCAGGTTTGCTGGGTGAAA
 GGACAGCAAGTGCAGCAGAGCCCCGCGTCCTTGGTTCTGCAGGAGGGGGAGAACGCAGAG
 CTGCAGTGTAACTTTTCCACATCTTTGAACAGTATGCAGTGGTTTTACCAACGTCCTGGG
 GGAAGTCTCGTCAGCCTGTTCTACAATCCTTCTGGGACAAAGCATAGTGGGAGACTGACA
 TCCACTACAGTCATCAAAGAACGTCGCAGCTCTTTGCACATTTCCTCCTCCCAGACAACA
 GACTCAGGCACTTATCTCTGTGCTTTGGAAC
->ENSMUST00000198297
+>ENSMUST00000198297.1 cds chromosome:GRCm38:14:53554022:53554558:1 gene:ENSMUSG00000076840.4 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav14-1 description:T cell receptor alpha variable 14-1 [Source:MGI Symbol;Acc:MGI:3646773]
 ATGGACAAGATTCTGACAGCATCATTTTTACTCCTAGGCCTTCACCTAGCTGGGGTGAAT
 GGCCAGCAGAAGGAGAAACATGACCAGCAGCAGGTGAGACAAAGTCCCCAATCTCTGACA
 GTCTGGGAAGGAGGAACCACAGTTCTGACCTGCAGTTATGAGGACAGCACTTTTAACTAC
@@ -136,35 +136,35 @@
 GTGTCCGATAAAAAGGAAGATGGACGATTCACAACCTTCTTCAATAAAAGGGAGAAAAAG
 CTCTCCTTGCACATCATAGACTCTCAGCCTGGAGACTCAGCCACCTACTTCTGTGCAGCA
 AGTG
->ENSMUST00000200101
+>ENSMUST00000200101.1 cds chromosome:GRCm38:14:53559632:53560247:1 gene:ENSMUSG00000094016.2 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav15-1-dv6-1 description:T cell receptor alpha variable 15-1-DV6-1 [Source:MGI Symbol;Acc:MGI:4439369]
 ATGCCTCCTCACAGCCTGCTCTGTGTGCTGGTGGCCTTGGCTTTCTCTGGATCTAATGTG
 GCCCAGAAAGTGATTCAGGTCTGGTCAACAACAAGCAGGCAGGAGGGCGAAAAACTCACA
 CTGGACTGTTCATATAAGACAAGTCAGGTCTTATACCATCTTTTCTGGTACAAGCACCTT
 CTTAGTGGAGAGATGGTTTTGCTTATTCGACAAATGCCTTCTACTATTGCAATAGAGAGG
 AGCGGCCGCTATTCTGTAGTCTTCCAGAAATCACGCAAATCCATCAGCCTTGTCATTTCA
 ACCTTACAACCAGACGATTCGGGAAAGTATTTCTGTGCTCTCTGGGAGCTGG
->ENSMUST00000103654
+>ENSMUST00000103654.2 cds chromosome:GRCm38:14:53590857:53591514:1 gene:ENSMUSG00000094966.2 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav9-2 description:T cell receptor alpha  variable 9-2 [Source:MGI Symbol;Acc:MGI:4439903]
 ATGCTCCTGGCGCTCCTCCCAGTGCTGGGGATACACTTTGTCCTGAGAGATGCCCAAGCT
 CAGTCAGTGACGCAGCCCGATGCTCGCGTCACTGTCTCTGAAGGAGCCTCTCTGCAGCTG
 AGATGCAAGTATTCCTACTCTGGGACACCTTATCTGTTCTGGTATGTCCAGTACCCGCGG
 CAGGGGCTGCAGCTGCTCCTCAAGTACTATTCAGGAGACCCAGTGGTTCAAGGAGTGAAT
 GGCTTCGAGGCTGAGTTCAGCAAGAGTAACTCTTCCTTCCACCTGCGGAAAGCCTCTGTG
 CACTGGAGCGACTCTGCTGTGTACTTCTGTGTTTTGAGCG
->ENSMUST00000103655
+>ENSMUST00000103655.2 cds chromosome:GRCm38:14:53598828:53599410:1 gene:ENSMUSG00000093966.2 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav4-3 description:T cell receptor alpha variable 4-3 [Source:MGI Symbol;Acc:MGI:4440478]
 ATGCAGAGGAACCTGGGAGCTGTGCTGGGGATTCTGTGGGTGCAGATTTGCTGGGTGAGC
 GGAGATAAGGTGAAACAAAGTCCCTCAGCGCTGAGTCTCCAAGAAGGAACCAATTCTGCT
 CTGAGATGCAATTTTTCTATCGCCGCGACAACTGTGCAGTGGTTCCTACAGAATCCCAGG
 GGCAGCCTCATCAATCTTTTTTACCTGGTTCCAGGAACAAAGGAGAATGGGAGGTTAAAG
 TCAGCATTCGATTCTAAGGAGAGCTACAGCACCCTGCACATCAGGGATGCCCAGCTGGAG
 GACTCAGGCACTTACTTCTGTGCTGCTGAGG
->ENSMUST00000180972
+>ENSMUST00000180972.2 cds chromosome:GRCm38:14:53616315:53616914:1 gene:ENSMUSG00000096656.6 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav12-2 description:T cell receptor alpha variable 12-2 [Source:MGI Symbol;Acc:MGI:5293447]
 ATGAACATGCGTCCTGACACCTGCTCAGTTCTTGTGCTCCTCTTAATGCTCAGAAGGAAC
 AATGGAGACTCTGTGACCCAGACAGAAGGCCTGGTCACTCTCACCGAGGGGTTGCCTGTG
 ATGCTGAACTGCACCTATCAGAGTACTTACTCACCTTTCCTTTTCTGGTATGTGCAACAT
 CTCAACGAAGCCCCTAAGCTACTTTTGAAGAGCTTCACAGACAACAAGAGGCCCGAGCAC
 CAAGGGTTCCACGCCACTCTCCATAAGAGCAGCAGCTCCTTCCATCTGCAGAAGTCCTCA
 GCGCAGCTGTCAGACTCTGCCCTGTACTACTGTGCTTTGAGTGA
->ENSMUST00000103657
+>ENSMUST00000103657.5 cds chromosome:GRCm38:14:53621657:53622245:1 gene:ENSMUSG00000095958.3 gene_biotype:TR_V_gene transcript_biotype:TR_V_gene gene_symbol:Trav12-3 description:T cell receptor alpha variable 12-3 [Source:MGI Symbol;Acc:MGI:3648633]
 ATGCGTCCTGGCACCTGCTCAGTTCTTGTGCTCCTCCTAATGCTCAGGAGGAGCAATGGA
 GATGGAGACTCAGTGACCCAGAAGGAAGGCCTGGTCACTCTCACCGAGGGGTTGCCTGTG
 ATGCTGAACTGCACCTATCAGACTATTTACTCAAATGCTTTCCTTTTCTGGTATGTGCAC