# HG changeset patch # User earlhaminst # Date 1572524211 14400 # Node ID e8e75a79de59c201ca0338ad56f862d3b8ee63b9 # Parent f4acbfe8d6feebe98640fb2152bf28a2c354a5ca "planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit 9c8611fee927883f50bc6955771aa69df1ce8457" diff -r f4acbfe8d6fe -r e8e75a79de59 gstf_preparation.py --- a/gstf_preparation.py Wed Oct 17 07:31:29 2018 -0400 +++ b/gstf_preparation.py Thu Oct 31 08:16:51 2019 -0400 @@ -2,6 +2,7 @@ import json import optparse +import os import sqlite3 import sys @@ -114,10 +115,7 @@ # a 5' UTR can be split among multiple exons # a CDS can be part of multiple transcripts for parent in d['Parent'].split(','): - if parent not in parent_dict: - parent_dict[parent] = [d] - else: - parent_dict[parent].append(d) + parent_dict.setdefault(parent, []).append(d) return d @@ -139,6 +137,8 @@ def add_transcript_to_dict(cols, species, transcript_dict): transcript = feature_to_dict(cols) + if 'biotype' in transcript and transcript['biotype'] != 'protein_coding': + return transcript.update({ 'object_type': 'Transcript', 'seq_region_name': cols[0], @@ -302,7 +302,7 @@ parser.add_option('--regions', default="", help='Comma-separated list of region IDs for which FASTA sequences should be filtered') parser.add_option('-o', '--output', help='Path of the output SQLite file') parser.add_option('--of', help='Path of the output FASTA file') - parser.add_option('--ff', help='Path of the filtered sequences output FASTA file') + parser.add_option('--ff', default=os.devnull, help='Path of the filtered sequences output FASTA file') options, args = parser.parse_args() if args: @@ -403,10 +403,7 @@ else: break - if gene_id in gene_transcripts_dict: - gene_transcripts_dict[gene_id].append((transcript_id, len(entry.sequence))) - else: - gene_transcripts_dict[gene_id] = [(transcript_id, len(entry.sequence))] + gene_transcripts_dict.setdefault(gene_id, []).append((transcript_id, len(entry.sequence))) if options.longestCDS: # For each gene, select the transcript with the longest sequence. diff -r f4acbfe8d6fe -r e8e75a79de59 gstf_preparation.xml --- a/gstf_preparation.xml Wed Oct 17 07:31:29 2018 -0400 +++ b/gstf_preparation.xml Thu Oct 31 08:16:51 2019 -0400 @@ -1,7 +1,6 @@ converts data for the workflow - - - + ]]> @@ -40,58 +38,56 @@ - + - - - + + + + regions + - + - + - - + - + - - + - + - - + - + - @@ -100,13 +96,22 @@ - + + + + + + + + + + + - - - + ]]> diff -r f4acbfe8d6fe -r e8e75a79de59 test-data/MGP_PahariEiJ_G0008413.1.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/MGP_PahariEiJ_G0008413.1.gff3 Thu Oct 31 08:16:51 2019 -0400 @@ -0,0 +1,139 @@ +##gff-version 3 +##sequence-region 13 1 96704406 +13 Ensembl gene 62596741 62686932 . + . ID=MGP_PahariEiJ_G0008413.1;Name=MGP_PahariEiJ_G0008413.1;biotype=polymorphic_pseudogene +13 Ensembl transcript 62596741 62626623 . + . ID=MGP_PahariEiJ_T0009933.1;Name=MGP_PahariEiJ_T0009933.1;Parent=MGP_PahariEiJ_G0008413.1;biotype=protein_coding +13 Ensembl transcript 62596741 62686932 . + . ID=MGP_PahariEiJ_T0009934.1;Name=MGP_PahariEiJ_T0009934.1;Parent=MGP_PahariEiJ_G0008413.1;biotype=polymorphic_pseudogene +13 Ensembl transcript 62596766 62625799 . + . ID=MGP_PahariEiJ_T0009935.1;Name=MGP_PahariEiJ_T0009935.1;Parent=MGP_PahariEiJ_G0008413.1;biotype=retained_intron +13 Ensembl transcript 62660839 62686932 . + . ID=MGP_PahariEiJ_T0009936.1;Name=MGP_PahariEiJ_T0009936.1;Parent=MGP_PahariEiJ_G0008413.1;biotype=processed_transcript +13 Ensembl transcript 62671962 62686919 . + . ID=MGP_PahariEiJ_T0009937.1;Name=MGP_PahariEiJ_T0009937.1;Parent=MGP_PahariEiJ_G0008413.1;biotype=processed_transcript +13 Ensembl transcript 62671962 62686918 . + . ID=MGP_PahariEiJ_T0009938.1;Name=MGP_PahariEiJ_T0009938.1;Parent=MGP_PahariEiJ_G0008413.1;biotype=protein_coding +13 Ensembl intron 62596975 62624027 . + . Name=intron00001;Parent=MGP_PahariEiJ_T0009933.1 +13 Ensembl intron 62624355 62626424 . + . Name=intron00002;Parent=MGP_PahariEiJ_T0009933.1 +13 Ensembl intron 62596975 62624027 . + . Name=intron00003;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62624355 62626424 . + . Name=intron00004;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62626620 62637349 . + . Name=intron00005;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62637436 62640660 . + . Name=intron00006;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62640768 62641046 . + . Name=intron00007;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62641179 62641725 . + . Name=intron00008;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62641854 62641961 . + . Name=intron00009;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62642215 62651556 . + . Name=intron00010;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62651793 62657150 . + . Name=intron00011;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62657340 62660197 . + . Name=intron00012;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62660808 62662195 . + . Name=intron00013;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62662303 62663623 . + . Name=intron00014;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62663751 62665451 . + . Name=intron00015;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62665637 62668991 . + . Name=intron00016;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62669299 62671283 . + . Name=intron00017;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62671361 62671958 . + . Name=intron00018;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62672085 62673958 . + . Name=intron00019;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62674160 62678497 . + . Name=intron00020;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62678579 62679702 . + . Name=intron00021;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62679808 62683727 . + . Name=intron00022;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62683916 62685193 . + . Name=intron00023;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl intron 62596975 62624027 . + . Name=intron00024;Parent=MGP_PahariEiJ_T0009935.1 +13 Ensembl intron 62660879 62662195 . + . Name=intron00025;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl intron 62662389 62663623 . + . Name=intron00026;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl intron 62663751 62665451 . + . Name=intron00027;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl intron 62665637 62668991 . + . Name=intron00028;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl intron 62669299 62671283 . + . Name=intron00029;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl intron 62671361 62671958 . + . Name=intron00030;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl intron 62672085 62673958 . + . Name=intron00031;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl intron 62674160 62678497 . + . Name=intron00032;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl intron 62678579 62679702 . + . Name=intron00033;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl intron 62679808 62683727 . + . Name=intron00034;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl intron 62683916 62685193 . + . Name=intron00035;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl intron 62672085 62674007 . + . Name=intron00036;Parent=MGP_PahariEiJ_T0009937.1 +13 Ensembl intron 62674160 62678497 . + . Name=intron00037;Parent=MGP_PahariEiJ_T0009937.1 +13 Ensembl intron 62678579 62679702 . + . Name=intron00038;Parent=MGP_PahariEiJ_T0009937.1 +13 Ensembl intron 62679808 62683727 . + . Name=intron00039;Parent=MGP_PahariEiJ_T0009937.1 +13 Ensembl intron 62683916 62685193 . + . Name=intron00040;Parent=MGP_PahariEiJ_T0009937.1 +13 Ensembl intron 62672085 62674007 . + . Name=intron00041;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl intron 62674160 62678497 . + . Name=intron00042;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl intron 62678579 62679702 . + . Name=intron00043;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl intron 62679808 62683727 . + . Name=intron00044;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl intron 62683916 62685193 . + . Name=intron00045;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl CDS 62596855 62596942 . + 0 Name=MGP_PahariEiJ_P0009933;Parent=MGP_PahariEiJ_T0009933.1 +13 Ensembl CDS 62596943 62596974 . + 2 Name=MGP_PahariEiJ_P0009933;Parent=MGP_PahariEiJ_T0009933.1 +13 Ensembl CDS 62624028 62624354 . + 0 Name=MGP_PahariEiJ_P0009933;Parent=MGP_PahariEiJ_T0009933.1 +13 Ensembl CDS 62626425 62626620 . + 0 Name=MGP_PahariEiJ_P0009933;Parent=MGP_PahariEiJ_T0009933.1 +13 Ensembl CDS 62596855 62596942 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62596943 62596974 . + 2 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62624028 62624354 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62626425 62626619 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62637350 62637435 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62640661 62640767 . + 1 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62641047 62641178 . + 2 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62641726 62641853 . + 2 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62641962 62642214 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62651557 62651792 . + 2 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62657151 62657339 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62660198 62660807 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62662196 62662302 . + 2 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62663624 62663750 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62665452 62665636 . + 2 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62668992 62669298 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62671284 62671360 . + 2 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62671959 62672084 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62673959 62674159 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62678498 62678578 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62679703 62679807 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62683728 62683915 . + 0 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62685194 62685509 . + 1 Name=MGP_PahariEiJ_P0009934;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl CDS 62674016 62674159 . + 0 Name=MGP_PahariEiJ_P0009938;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl CDS 62678498 62678578 . + 0 Name=MGP_PahariEiJ_P0009938;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl CDS 62679703 62679807 . + 0 Name=MGP_PahariEiJ_P0009938;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl CDS 62683728 62683915 . + 0 Name=MGP_PahariEiJ_P0009938;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl CDS 62685194 62685509 . + 1 Name=MGP_PahariEiJ_P0009938;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl exon 62596741 62596942 . + . Name=MGP_PahariEiJ_E0009933.1;Parent=MGP_PahariEiJ_T0009933.1 +13 Ensembl exon 62596943 62596974 . + . Name=MGP_PahariEiJ_E0009933.2;Parent=MGP_PahariEiJ_T0009933.1 +13 Ensembl exon 62624028 62624354 . + . Name=MGP_PahariEiJ_E0009933.3;Parent=MGP_PahariEiJ_T0009933.1 +13 Ensembl exon 62626425 62626623 . + . Name=MGP_PahariEiJ_E0009933.4;Parent=MGP_PahariEiJ_T0009933.1 +13 Ensembl exon 62596741 62596942 . + . Name=MGP_PahariEiJ_E0009933.1;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62596943 62596974 . + . Name=MGP_PahariEiJ_E0009933.2;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62624028 62624354 . + . Name=MGP_PahariEiJ_E0009933.3;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62626425 62626619 . + . Name=MGP_PahariEiJ_E0009934.4;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62637350 62637435 . + . Name=MGP_PahariEiJ_E0009934.5;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62640661 62640767 . + . Name=MGP_PahariEiJ_E0009934.6;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62641047 62641178 . + . Name=MGP_PahariEiJ_E0009934.7;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62641726 62641853 . + . Name=MGP_PahariEiJ_E0009934.8;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62641962 62642214 . + . Name=MGP_PahariEiJ_E0009934.9;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62651557 62651792 . + . Name=MGP_PahariEiJ_E0009934.10;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62657151 62657339 . + . Name=MGP_PahariEiJ_E0009934.11;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62660198 62660807 . + . Name=MGP_PahariEiJ_E0009934.12;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62662196 62662302 . + . Name=MGP_PahariEiJ_E0009934.13;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62663624 62663750 . + . Name=MGP_PahariEiJ_E0009934.14;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62665452 62665636 . + . Name=MGP_PahariEiJ_E0009934.15;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62668992 62669298 . + . Name=MGP_PahariEiJ_E0009934.16;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62671284 62671360 . + . Name=MGP_PahariEiJ_E0009934.17;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62671959 62672084 . + . Name=MGP_PahariEiJ_E0009934.18;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62673959 62674159 . + . Name=MGP_PahariEiJ_E0009934.19;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62678498 62678578 . + . Name=MGP_PahariEiJ_E0009934.20;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62679703 62679807 . + . Name=MGP_PahariEiJ_E0009934.21;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62683728 62683915 . + . Name=MGP_PahariEiJ_E0009934.22;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62685194 62686932 . + . Name=MGP_PahariEiJ_E0009934.23;Parent=MGP_PahariEiJ_T0009934.1 +13 Ensembl exon 62596766 62596974 . + . Name=MGP_PahariEiJ_E0009935.1;Parent=MGP_PahariEiJ_T0009935.1 +13 Ensembl exon 62624028 62625799 . + . Name=MGP_PahariEiJ_E0009935.2;Parent=MGP_PahariEiJ_T0009935.1 +13 Ensembl exon 62660839 62660878 . + . Name=MGP_PahariEiJ_E0009936.1;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl exon 62662196 62662388 . + . Name=MGP_PahariEiJ_E0009936.2;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl exon 62663624 62663750 . + . Name=MGP_PahariEiJ_E0009936.3;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl exon 62665452 62665636 . + . Name=MGP_PahariEiJ_E0009936.4;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl exon 62668992 62669298 . + . Name=MGP_PahariEiJ_E0009936.5;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl exon 62671284 62671360 . + . Name=MGP_PahariEiJ_E0009936.6;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl exon 62671959 62672084 . + . Name=MGP_PahariEiJ_E0009936.7;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl exon 62673959 62674159 . + . Name=MGP_PahariEiJ_E0009936.8;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl exon 62678498 62678578 . + . Name=MGP_PahariEiJ_E0009936.9;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl exon 62679703 62679807 . + . Name=MGP_PahariEiJ_E0009936.10;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl exon 62683728 62683915 . + . Name=MGP_PahariEiJ_E0009936.11;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl exon 62685194 62686932 . + . Name=MGP_PahariEiJ_E0009936.12;Parent=MGP_PahariEiJ_T0009936.1 +13 Ensembl exon 62671962 62672084 . + . Name=MGP_PahariEiJ_E0009937.1;Parent=MGP_PahariEiJ_T0009937.1 +13 Ensembl exon 62674008 62674159 . + . Name=MGP_PahariEiJ_E0009937.2;Parent=MGP_PahariEiJ_T0009937.1 +13 Ensembl exon 62678498 62678578 . + . Name=MGP_PahariEiJ_E0009936.9;Parent=MGP_PahariEiJ_T0009937.1 +13 Ensembl exon 62679703 62679807 . + . Name=MGP_PahariEiJ_E0009936.10;Parent=MGP_PahariEiJ_T0009937.1 +13 Ensembl exon 62683728 62683915 . + . Name=MGP_PahariEiJ_E0009936.11;Parent=MGP_PahariEiJ_T0009937.1 +13 Ensembl exon 62685194 62686919 . + . Name=MGP_PahariEiJ_E0009937.6;Parent=MGP_PahariEiJ_T0009937.1 +13 Ensembl exon 62671962 62672084 . + . Name=MGP_PahariEiJ_E0009937.1;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl exon 62674008 62674159 . + . Name=MGP_PahariEiJ_E0009938.2;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl exon 62678498 62678578 . + . Name=MGP_PahariEiJ_E0009934.20;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl exon 62679703 62679807 . + . Name=MGP_PahariEiJ_E0009934.21;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl exon 62683728 62683915 . + . Name=MGP_PahariEiJ_E0009934.22;Parent=MGP_PahariEiJ_T0009938.1 +13 Ensembl exon 62685194 62686918 . + . Name=MGP_PahariEiJ_E0009938.6;Parent=MGP_PahariEiJ_T0009938.1 diff -r f4acbfe8d6fe -r e8e75a79de59 test-data/Mus_pahari.PAHARI_EIJ_v1.1.cds.all.shortened.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Mus_pahari.PAHARI_EIJ_v1.1.cds.all.shortened.fa Thu Oct 31 08:16:51 2019 -0400 @@ -0,0 +1,99 @@ +>MGP_PahariEiJ_T0009933.1 cds chromosome:PAHARI_EIJ_v1.1:13:62596741:62626623:1 gene:MGP_PahariEiJ_G0008413.1 gene_biotype:polymorphic_pseudogene transcript_biotype:protein_coding gene_symbol:Atp10d description:ATPase, class V, type 10D [Source:MGI Symbol;Acc:MGI:2450125] +AAGACAAATGGCTGGCTTGGAAGCGTAACTCTCACCGCCCTTTGGATCCCTGCTCGCTTC +TCTTTTGGCACTTTGGGATCCGAGGTAACCATGCGGTGATGAGCGGCCCGGGAGGGACAG +ATCACCTGAACCAGCCGGGTCTCCCTGCGTCTTGGACATGACTGAGCTTCTGCAGTGGGC +CAGACATCACTGGCGTCGGCTGAGCCATGGGAGAACCCAGGGTGAAGATGAGAGGCCGTA +CAACTACGCCTCCCTGCTGGCCTGTGGGGGCAAGTCCCCCCGGACCCCCAGGCCTGCAGG +AAAGCACCGTGTCGTTATTCCTCACCTTCAGTGCTTCAGGGATGAGTACGAGAGGTTTTC +TGGAACCTACGTGAATAACCGGATACGGACGACCAAGTACACACTCCTGAACTTTGTGCC +AAGGAACTTATTTGAACAGTTTCACAGGGCTGCCAATTTATATTTCCTGTTCCTCGTGGT +CCTGAACTGGGTGCCTTTGGTAGAAGCCTTCCAAAAGGAAATCACCATGCTGCCTCTGGT +GGTGGTCCTCACAATTATTGCAATTAAAGATGGCTTGGAAGACTACCGGAAGTACAAAAT +TGACAAGCAGATCAACAACTTAATAACCAAGGTTTACAGTAGG +>MGP_PahariEiJ_T0009934.1 cds chromosome:PAHARI_EIJ_v1.1:13:62596741:62686932:1 gene:MGP_PahariEiJ_G0008413.1 gene_biotype:polymorphic_pseudogene transcript_biotype:polymorphic_pseudogene gene_symbol:Atp10d description:ATPase, class V, type 10D [Source:MGI Symbol;Acc:MGI:2450125] +AAGACAAATGGCTGGCTTGGAAGCGTAACTCTCACCGCCCTTTGGATCCCTGCTCGCTTC +TCTTTTGGCACTTTGGGATCCGAGGTAACCATGCGGTGATGAGCGGCCCGGGAGGGACAG +ATCACCTGAACCAGCCGGGTCTCCCTGCGTCTTGGACATGACTGAGCTTCTGCAGTGGGC +CAGACATCACTGGCGTCGGCTGAGCCATGGGAGAACCCAGGGTGAAGATGAGAGGCCGTA +CAACTACGCCTCCCTGCTGGCCTGTGGGGGCAAGTCCCCCCGGACCCCCAGGCCTGCAGG +AAAGCACCGTGTCGTTATTCCTCACCTTCAGTGCTTCAGGGATGAGTACGAGAGGTTTTC +TGGAACCTACGTGAATAACCGGATACGGACGACCAAGTACACACTCCTGAACTTTGTGCC +AAGGAACTTATTTGAACAGTTTCACAGGGCTGCCAATTTATATTTCCTGTTCCTCGTGGT +CCTGAACTGGGTGCCTTTGGTAGAAGCCTTCCAAAAGGAAATCACCATGCTGCCTCTGGT +GGTGGTCCTCACAATTATTGCAATTAAAGATGGCTTGGAAGACTACCGGAAGTACAAAAT +TGACAAGCAGATCAACAACTTAATAACCAAGGTTTACAGTAGGACTCTGAAGTTGATCCT +GAGAAGTTCTCCAGTAGGATAGAATGTGAAAGCCCAAACAATGACCTCAGCAGATTCCGA +GGTTTCCTGGAACATGCCAATAAAGACCGTGTGGGCCTCAGCAAAGAGAATTTATTGCTC +CGCGGGTGCACCATCAGAAACACAGAGGCTGTGGTGGGCATTGTGGTCTATGCAGGTCAT +GAAACCAAAGCAATGCTGAACAACAGTGGGCCACGGTATAAGCGCAGTAAGTTAGAGAGA +AGAGCAAATACAGACGTCCTCTGGTGTGTCCTGCTTCTGATCGTCATGTGCTTAACTGGT +GCACTGGGTCACGGCATATGGCTGAGCAGGTATGAGAACATGCTCTTTTTTAACATCCCT +GAGCCGGACGGACGTGTCCTATCACCTGTGCTGACTGGGTTCTATGTGTTCTGGACCATG +ATCATCTTGCTGCAGGTCCTGATCCCCATTTCTCTCTACGTGTCCATTGAGATCGTGAAG +CTGGGACAGATCTATTTCATCCAGAGCGATGTAGATTTCTACAACGAGAAAATGGATTCG +ACCATTCAGTGCCGAGCCCTAAACATCACTGAGGACCTTGGGCAGATTCAATACCTCTTT +TCTGATAAGACAGGAACCCTCACAGAGAATAAGATGGTGTTTCGGAGGTGCAGTGTAGCA +GGGTTTGACTACTGCCATGAAGAAAACGCCAGGAGGCTCGAGTCCTATCAGGAAGCTGTC +TCTGAAGAGGAGGAACGCGCAGACACTCTCGGCGGCTCCCTCAGCAACGTGGCGAGACCC +AGAGCCCAGGGCTGCAGGACAGTTCACAGTGGGCTTCCGGGAAAACCCCCGGCTCACCTC +TCCGGGAGCACCTCTGCTGTAGGAGACGCAGAAGGATCCGGGGAAGTGCCTCATTCCAGA +CAGGCTGCCTTCAGTAGTCCCATGGAAACAGACGTGGTACCAGATACCAGACTTTTAGAC +AAATTTAGCCAGATTACCCCTCAGCTGCTCACTGGACTGGATGGGACCTTGCAGAGCTCA +TCACTGGAGACCTTGTACATCATGGACTTCTTTATTGCACTGGCAATTTGCAACACGGTG +GTGGTTTCTGCCCCAAACCAACCTCGGCAAAAGATTGGGCTCTCCTCACTGGGTGGAATG +CCCATCAAGTCCTTGGACGAGATTAAAAACATCTTCCAGAAATTGTCTGTCCGGAGATCA +AGTTCACCATCCCTTGCCAGCGGGAAGGATTCATCCTCTGGGACTCCCTGTGCCTTTGTG +AGCAGAATCTCTTTCTTTAGTCGACCAAAACTGTCACCTCCTATGGAGGACGAGTCTTCC +CAAATGGATGAAATCCCCCAGGCCAGTAACTCAGCTTGCTGTACAGAAACGGAGGCACAA +AACAGTGCCTTAGGACTCAGCGTCGGCTCCGCGGAAGCCCTAAATGGACCACCGCCCTTG +GCTTCCAACCTGTGTTATGAGGCGGAGAGTCCAGATGAAGCAGCCTTGGTGTATGCCGCC +AGAGCTTATCATTGCACTTTACAGTCTCGGACCCCAGAGCAGGTCATGGTGGAGTTTGCA +GCTTTGGGCTCATTAACATTTCAACTCCTACACATCCTGCCCTTTGACTCAGTAAGGAAA +AGAATGTCGGTGGTGGTCCGGCACCCTCTTTCCAAACAAGTCGTGGTGTATACAAAAGGC +GCTGATTCCGTGATCATGGAGCTGCTGTCTATGGCTTCCTCGGATGGAACAAATCTGGAA +GAACAACAGATGATAATAAGGGAGAGAACGCAGAGGCACCTGGACGAGTATGCCAGACGA +GGGCTGCGCACTCTGTGTGTTGCAAAGAAGGTCATGAGTGACACGGAATATGCAGAGTGG +CTGAGGAATCACTTCCTAGCTGAAACCAGCATTGACAACAGGGAGGAGCTGCTAGTTGAG +TCTGCCATGAGACTAGAAAACAAACTCACGTTACTTGGTGCTACTGGCATTGAAGATCGT +CTGCAGGAGGGGGTCCCTGAGTCTATAGAAGCCCTTCACCAAGCTGGCATCAAGATCTGG +ATGCTGACAGGGGACAAGCAGGAGACAGCTGTCAACATAGCTTATGCATGCAGACTCCTG +GAACCAGATGACAAGCTCTTCATCCTCAATACACAAAGTGAGGATGCCTGTGGGATGCTG +ATGAGTGCAATTTTGGAAGAACTTCAGAAGAGAGCTCAGGTGTCTCCGGAGCTGGCATCA +CCAAGAAAGAACTTTCCTCAGCCCCCTGACCCTCAGGGCCAGGGACGTGCGGGACTTGTT +ATCACTGGGAAGAGCCTGGAGTTTGCCCTGCAGGAGAGTCTACAAAGACAGTTCCTTGAG +CTGACTGCATGGTGCCAAGCTGTGATCTGCTGCCGAGCCACCCCCCTTCAAAAGAGTGAG +GTGGTGAAATTGGTTCGAAACCATCTCCATGTGATGACCCTAGCCATTGGTGACGGTGCC +AATGATGTTAGCATGATACAAGTGGCTGACATTGGGATCGGTGTCTCAGGTCAAGAAGGC +ATGCAGGCTGTGATGGCCAGTGACTTCGCCATCTCTCAGTTCAGACATCTCAGCAAGCTT +CTCCTCGTGCACGGGCACTGGTGTTACACCCGGCTCTCCAACATGATTCTCTATTTTTTC +TACAAGAATGTGGCCTATGTGAATCTCCTTTTCTGGTACCAGTTCTTTTGTGGGTTTTCA +GGAACATCGATGACTGACTACTGGGTGCTGATCTTCTTCAACCTCCTCTTCACATCTGTC +CCCCCCATCATTTATGGCGTTTTGGAGAAAGATGTGTCAGCAGAGACCCTCCTGCAGCTG +CCTGAACTTTACCGGAGTGGTCAGCGATCAGAGGAATACTTGCCCGTCACTTTCTGGATC +ACCTTGTTGGATGCCTTTTATCAAAGCCTGGTCTGCTTCTTTGTGCCTTACTTTACCTAC +CAGGGCTCTGACATTGACATCTTTACCTTTGGGAATCCCCTGAACACGGCGGCTCTGTTC +ATCATTCTCCTCCACCTGGTGATCGAAAGCAAGAGTTTGACTTGGATCCACATGCTGGTC +ATTGTTGGGAGCATCTTGTCCTACTTTTTCTTTGCCTTGGCTTTTGGAGCCTTATGTGTC +ACTTGCAACCCACCCTCCAACCCCTACGGGATCATGCAGAAGCACATGCTAGACCCTGTG +TTCTACTTAGTTTGTGTTCTTACAACCTTCGTAGCACTCCTGCCCAGGTTTGCCTACCGA +GTTCTTCAGGGATCCATGTTTCCATCTCCAGTTCTCAGAGCCAAGTACTTTGACCGACTA +CCTCCAGAGGAGAGAGCTGAAGCTCTCAAGAGGTGGAGAGGGACTGCAAAGATCAATCAC +GTGGCATCTCAGCATGCCAGCCAATCAGCTGCTAAGTCAGGAAGACCCACGCCTGGGTCT +TCTGCTGTCCTTGCAATGAAGACAGCAACAGTGCGTACTGTTGAGCAGAGCACATGTGAA +ACTGCGCTAGACCATGGCTGCTCTGAACCTGGGGCCTCCAGGACGACTGGACCCTCAGCA +AGT +>MGP_PahariEiJ_T0009938.1 cds chromosome:PAHARI_EIJ_v1.1:13:62671962:62686918:1 gene:MGP_PahariEiJ_G0008413.1 gene_biotype:polymorphic_pseudogene transcript_biotype:protein_coding gene_symbol:Atp10d description:ATPase, class V, type 10D [Source:MGI Symbol;Acc:MGI:2450125] +ATGACTGACTACTGGGTGCTGATCTTCTTCAACCTCCTCTTCACATCTGTCCCCCCCATC +ATTTATGGCGTTTTGGAGAAAGATGTGTCAGCAGAGACCCTCCTGCAGCTGCCTGAACTT +TACCGGAGTGGTCAGCGATCAGAGGAATACTTGCCCGTCACTTTCTGGATCACCTTGTTG +GATGCCTTTTATCAAAGCCTGGTCTGCTTCTTTGTGCCTTACTTTACCTACCAGGGCTCT +GACATTGACATCTTTACCTTTGGGAATCCCCTGAACACGGCGGCTCTGTTCATCATTCTC +CTCCACCTGGTGATCGAAAGCAAGAGTTTGACTTGGATCCACATGCTGGTCATTGTTGGG +AGCATCTTGTCCTACTTTTTCTTTGCCTTGGCTTTTGGAGCCTTATGTGTCACTTGCAAC +CCACCCTCCAACCCCTACGGGATCATGCAGAAGCACATGCTAGACCCTGTGTTCTACTTA +GTTTGTGTTCTTACAACCTTCGTAGCACTCCTGCCCAGGTTTGCCTACCGAGTTCTTCAG +GGATCCATGTTTCCATCTCCAGTTCTCAGAGCCAAGTACTTTGACCGACTACCTCCAGAG +GAGAGAGCTGAAGCTCTCAAGAGGTGGAGAGGGACTGCAAAGATCAATCACGTGGCATCT +CAGCATGCCAGCCAATCAGCTGCTAAGTCAGGAAGACCCACGCCTGGGTCTTCTGCTGTC +CTTGCAATGAAGACAGCAACAGTGCGTACTGTTGAGCAGAGCACATGTGAAACTGCGCTA +GACCATGGCTGCTCTGAACCTGGGGCCTCCAGGACGACTGGACCCTCAGCAAGT diff -r f4acbfe8d6fe -r e8e75a79de59 test-data/test1.ns.fasta diff -r f4acbfe8d6fe -r e8e75a79de59 test-data/test4.ns.fasta diff -r f4acbfe8d6fe -r e8e75a79de59 test-data/test5.sqlite Binary file test-data/test5.sqlite has changed diff -r f4acbfe8d6fe -r e8e75a79de59 test-data/test6.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test6.fasta Thu Oct 31 08:16:51 2019 -0400 @@ -0,0 +1,15 @@ +>MGP_PahariEiJ_T0009938.1_muspahari +ATGACTGACTACTGGGTGCTGATCTTCTTCAACCTCCTCTTCACATCTGTCCCCCCCATC +ATTTATGGCGTTTTGGAGAAAGATGTGTCAGCAGAGACCCTCCTGCAGCTGCCTGAACTT +TACCGGAGTGGTCAGCGATCAGAGGAATACTTGCCCGTCACTTTCTGGATCACCTTGTTG +GATGCCTTTTATCAAAGCCTGGTCTGCTTCTTTGTGCCTTACTTTACCTACCAGGGCTCT +GACATTGACATCTTTACCTTTGGGAATCCCCTGAACACGGCGGCTCTGTTCATCATTCTC +CTCCACCTGGTGATCGAAAGCAAGAGTTTGACTTGGATCCACATGCTGGTCATTGTTGGG +AGCATCTTGTCCTACTTTTTCTTTGCCTTGGCTTTTGGAGCCTTATGTGTCACTTGCAAC +CCACCCTCCAACCCCTACGGGATCATGCAGAAGCACATGCTAGACCCTGTGTTCTACTTA +GTTTGTGTTCTTACAACCTTCGTAGCACTCCTGCCCAGGTTTGCCTACCGAGTTCTTCAG +GGATCCATGTTTCCATCTCCAGTTCTCAGAGCCAAGTACTTTGACCGACTACCTCCAGAG +GAGAGAGCTGAAGCTCTCAAGAGGTGGAGAGGGACTGCAAAGATCAATCACGTGGCATCT +CAGCATGCCAGCCAATCAGCTGCTAAGTCAGGAAGACCCACGCCTGGGTCTTCTGCTGTC +CTTGCAATGAAGACAGCAACAGTGCGTACTGTTGAGCAGAGCACATGTGAAACTGCGCTA +GACCATGGCTGCTCTGAACCTGGGGCCTCCAGGACGACTGGACCCTCAGCAAGT diff -r f4acbfe8d6fe -r e8e75a79de59 test-data/test6.sqlite Binary file test-data/test6.sqlite has changed