Mercurial > repos > vipints > deseq_hts
changeset 0:94a108763d9e draft
deseq-hts version 1.0 wraps the DESeq 1.6.0
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/README Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,75 @@ +--------------------------------------------------- +DESeq-hts: A Galaxy wrapper for DESeq version 1.6.0 +--------------------------------------------------- + +Description: + DESeq can be used as a web service embedded in a Galaxy instance. + We call it as DESeq-hts. + +Requirements: + MATLAB/OCTAVE and Python :- Preprocessing of sequencing reads and GFF files + R, Bio-conductor package :- Required for DESEQ + SCIPY, NUMPY :- for python + SAMTOOLS :- Read processing + +Contents: + [src] + All relevant scripts for DESeq-hts are located in the subdirectory + src. src/deseq.sh is the main script to start DESeq-hts. The + preprocessing of BAM and GFF file start before the R DESEQ script. + Please follow the shell script to understand the details. + + [galaxy] + Galaxy tool configuration file can be found galaxy folder. Please + make necessary editing for .xml file and remaining .sh files and + perform few tests. + + [setup_deseq-hts.sh] + Setup script for DESeq-hts. + + [mex] + matlab executable files. + + [bin] + Contains deseq_config.sh file which is used for the configuration of + DESeq-hts. According to your platform, the default file will be changed. + + [test_data] + This subsirectory contains all data for running a functional test in + Galaxy framework. You may need to move these test files into the test-data + directory. + + [tools] + A python based GFF parsing program. Also contains small utils programs. + +Getting started: + Check for all requirements first, then + + a) Run ./setup_deseq-hts.sh and setup paths and configuration options for DESeq-hts. + + b) Inside the mex folder execute the make file to create platform dependent .mex files + cd mex/Makefile + make [interpreter] + make octave for octave + make matlab for matlab + make all for octave and matlab + + c) Edit the Galaxy tool configuration file to adjust the path if necessary. + +Licenses: + If **DESeq** is used to obtain results for scientific publications it should be cited as [1]. + + This wrapper program (DESeq-hts) is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free Software Foundation; + either version 3 of the License, or (at your option) any later version. + + Written (W) 2009-2012 Jonas Behr, Regina Bohnert, Andre Kahles, Gunnar Raetsch, Vipin T. Sreedharan + Copyright (C) 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany and + 2012 cBio Memorial Sloan Kettering Cancer Center, New York City, USA. + +References: + [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`. + +Contact: + vipin@cbio.mskcc.org +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/bin/deseq_config.sh Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright (C) 2010-2012 Max Planck Society +export DESEQ_VERSION=1.6.0 +export DESEQ_PATH= +export DESEQ_SRC_PATH=$DESEQ_PATH/src +export DESEQ_BIN_PATH=$DESEQ_PATH/bin +export INTERPRETER= +export MATLAB_BIN_PATH= +export MATLAB_MEX_PATH= +export MATLAB_INCLUDE_DIR= +export OCTAVE_BIN_PATH= +export OCTAVE_MKOCT= +export SAMTOOLS_DIR= +export PYTHON_PATH= +export SCIPY_PATH= +export R_PATH= +export LD_LIBRARY_PATH= +export ENVIRONMENT=galaxy
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/bin/genarglist.sh Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,12 @@ +#/bin/bash +# Copyright (C) 2010-2012 Max Planck Society + +until [ -z $1 ] ; do + if [ $# != 1 ]; + then + echo -n "'$1', " + else + echo -n "'$1'" + fi + shift +done
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/bin/genes_cell2struct Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,7 @@ +#!/bin/bash +# deseq-hts wrapper script to start the interpreter with the correct list of arguments +# Copyright (C) 2010-2012 Max Planck Society +set -e +PROG=`basename $0` +DIR=`dirname $0` +exec ${DIR}/start_interpreter.sh ${PROG} "`${DIR}/genarglist.sh $@`"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/bin/get_read_counts Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,7 @@ +#!/bin/bash +# deseq-hts wrapper script to start the interpreter with the correct list of arguments +# Copyright (C) 2010-2012 Max Planck Society +set -e +PROG=`basename $0` +DIR=`dirname $0` +exec ${DIR}/start_interpreter.sh ${PROG} "`${DIR}/genarglist.sh $@`"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/bin/start_interpreter.sh Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,24 @@ +#/bin/bash +# Copyright (C) 2010-2012 Max Planck Society + +set -e + +. `dirname $0`/deseq_config.sh + +export MATLAB_RETURN_FILE=`tempfile` + +if [ "$INTERPRETER" == 'octave' ]; +then + echo exit | ${OCTAVE_BIN_PATH} --eval "global SHELL_INTERPRETER_INVOKE; SHELL_INTERPRETER_INVOKE=1; addpath $DESEQ_SRC_PATH; deseq_config; $1($2); exit;" || (echo starting Octave failed; rm -f $MATLAB_RETURN_FILE; exit -1) ; +fi + +if [ "$INTERPRETER" == 'matlab' ]; +then + echo exit | ${MATLAB_BIN_PATH} -nodisplay -r "global SHELL_INTERPRETER_INVOKE; SHELL_INTERPRETER_INVOKE=1; addpath $DESEQ_SRC_PATH; deseq_config; $1($2); exit;" || (echo starting Matlab failed; rm -f $MATLAB_RETURN_FILE; exit -1) ; +fi + +test -f $MATLAB_RETURN_FILE || exit 0 +ret=`cat $MATLAB_RETURN_FILE` ; +rm -f $MATLAB_RETURN_FILE +exit $ret +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/galaxy/deseq.xml Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,105 @@ +<tool id="deseq-hts" name="DESeq" version="1.6.0"> + <description>Determines differentially expressed transcripts from read alignments</description> + <command> +deseq-hts/src/deseq-hts.sh $anno_input_selected $deseq_out $deseq_out.extra_files_path/gene_map.mat +#for $i in $replicate_groups +#for $j in $i.replicates +$j.bam_alignment:#slurp +#end for +#end for + >> $Log_File </command> + <inputs> + <param format="gff3" name="anno_input_selected" type="data" label="Genome annotation in GFF3 file" help="A tab delimited format for storing sequence features and annotations"/> + <repeat name="replicate_groups" title="Replicate group" min="2"> + <repeat name="replicates" title="Replicate"> + <param format="bam" name="bam_alignment" type="data" label="BAM alignment file" help="BAM alignment file. Can be generated from SAM files using the SAM Tools."/> + </repeat> + </repeat> + </inputs> + + <outputs> + <data format="txt" name="deseq_out" label="DESeq result"/> + <data format="txt" name="Log_File" label="DESeq log file"/> + </outputs> + + <tests> + <test> + command: + ./deseq-hts.sh ../test_data/deseq_c_elegans_WS200-I-regions.gff3 ../test_data/deseq_c_elegans_WS200-I-regions_deseq.txt ../test_data/genes.mat ../test_data/deseq_c_elegans_WS200-I-regions-SRX001872.bam ../test_data/deseq_c_elegans_WS200-I-regions-SRX001875.bam + + <param name="anno_input_selected" value="deseq_c_elegans_WS200-I-regions.gff3" ftype="gff3" /> + <param name="bam_alignments1" value="deseq_c_elegans_WS200-I-regions-SRX001872.bam" ftype="bam" /> + <param name="bam_alignments2" value="deseq_c_elegans_WS200-I-regions-SRX001875.bam" ftype="bam" /> + <output name="deseq_out" file="deseq_c_elegans_WS200-I-regions_deseq.txt" /> + </test> + </tests> + + <help> + +.. class:: infomark + +**What it does** + +`DESeq` is a tool for differential expression testing of RNA-Seq data. + + +**Inputs** + +`DESeq` requires three input files to run: + +1. Annotation file in GFF3, containing the necessary information about the transcripts that are to be quantified. +2. The BAM alignment files grouped into replicate groups, each containing several replicates. BAM files store the read alignments in a compressed format. They can be generated using the `SAM-to-BAM` tool in the NGS: SAM Tools section. (The script will also work with only two groups containing only a single replicate each. However, this analysis has less statistical power and is therefor not recommended.) + +**Output** + +`DESeq` generates a text file containing the gene name and the p-value. + +------ + +**Licenses** + +If **DESeq** is used to obtain results for scientific publications it +should be cited as [1]_. + +**References** + +.. [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`_. + +.. _Differential expression analysis for sequence count data: http://dx.doi.org/10.1186/gb-2010-11-10-r106 + +------ + +.. class:: infomark + +**About formats** + + +**GFF3 format** General Feature Format is a format for describing genes +and other features associated with DNA, RNA and protein +sequences. GFF3 lines have nine tab-separated fields: + +1. seqid - The name of a chromosome or scaffold. +2. source - The program that generated this feature. +3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon". +4. start - The starting position of the feature in the sequence. The first base is numbered 1. +5. stop - The ending position of the feature (inclusive). +6. score - A score between 0 and 1000. If there is no score value, enter ".". +7. strand - Valid entries include '+', '-', or '.' (for don't know/care). +8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'. +9. attributes - All lines with the same group are linked together into a single item. + +For more information see http://www.sequenceontology.org/gff3.shtml + +**SAM/BAM format** The Sequence Alignment/Map (SAM) format is a +tab-limited text format that stores large nucleotide sequence +alignments. BAM is the binary version of a SAM file that allows for +fast and intensive data processing. The format specification and the +description of SAMtools can be found on +http://samtools.sourceforge.net/. + +------ + +DESeq-hts Wrapper Version 0.3 (Feb 2012) + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/mex/Makefile Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,38 @@ +include ../bin/deseq_config.sh + +MEX=${MATLAB_MEX_PATH} +MKOCTFILE=${OCTAVE_MKOCT} +MATLAB_INCL=${MATLAB_INCLUDE_DIR} +SAMDIR=${SAMTOOLS_DIR} + + +all: get_reads.mex get_bam_properties.mex interval_overlap.mex get_reads.mexa64 get_bam_properties.mexa64 interval_overlap.mexa64 +octave: get_reads.mex get_bam_properties.mex interval_overlap.mex +matlab: get_reads.mexa64 get_bam_properties.mexa64 interval_overlap.mexa64 + + +get_reads.mexa64: get_reads.cpp get_reads_direct.cpp mex_input.cpp read.cpp + rm -f *.o + ${MEX} -g -O get_reads.cpp get_reads_direct.cpp mex_input.cpp read.cpp -I${SAMDIR} -L${SAMDIR} -lbam -lz -lcurses -I$(MATLAB_INCL) + +get_bam_properties.mexa64: get_bam_properties.cpp + rm -f *.o + ${MEX} -g -O get_bam_properties.cpp -I$(MATLAB_INCL) + +interval_overlap.mexa64: interval_overlap.cpp + ${MEX} -g -O interval_overlap.cpp -I$(MATLAB_INCL) + +get_reads.mex: get_reads.cpp get_reads_direct.cpp mex_input.cpp read.cpp + rm -f *.o + ${MKOCTFILE} -g --mex get_reads.cpp get_reads_direct.cpp mex_input.cpp read.cpp -I${SAMDIR} -L${SAMDIR} -lbam -lz -lcurses + +get_bam_properties.mex: get_bam_properties.cpp + rm -f *.o + ${MKOCTFILE} -g --mex get_bam_properties.cpp + +interval_overlap.mex: interval_overlap.cpp + rm -f *.o + ${MKOCTFILE} -g --mex interval_overlap.cpp + +clean: + rm -f *.o *.mexa64 *.mex
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/mex/get_bam_properties.cpp Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,216 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2009-2011 Regina Bohnert +* Copyright (C) 2009-2011 Max Planck Society +*/ + + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <ctype.h> +#include <assert.h> + +#include <vector> + using std::vector; +#include <string> + using std::string; +#include <algorithm> + using std::find; + using std::min; + +#include <mex.h> + + +char *get_string(const mxArray *prhs); + +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; + +/* + * [read_len num_reads] = get_bam_properties(fname, path_samtools, contig_name) + * + * -- input -- + * prhs[0] file name of paired reads in BAM format (sorted by read id) + * prhs[1] path to samtools + * prhs[2] contig name + * + * -- output -- + * plhs[0] length of read + * plhs[1] number of unique reads +*/ +void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { + // checks for the right number of arguments + if (nrhs !=3 || nlhs > 2) { + mexErrMsgTxt("number of input and output args should be 3 and 2\nUSAGE:\n [read_len, num_reads] = get_bam_properties(fname, path_samtools, contig_name)\n"); + return; + } + + signal(SIGCHLD, SIG_IGN); // avoid zombies + + // read input arguments + char *fname = get_string(prhs[0]); + char *path_samtools = get_string(prhs[1]); + char *contig_name = get_string(prhs[2]); + char command[10000]; + + sprintf(command, "%s./samtools view %s %s 2>/dev/null", path_samtools, fname, contig_name); + //printf("%s\n", command); + + // get number of unique reads + int status; + uint32_t num_unique_reads = 0; + char command2[10000]; + sprintf(command2, "%s | cut -f 1 | sort -u | wc -l", command); + FILE* fp = popen(command2, "r"); + if (fp == NULL) { + mexErrMsgTxt("Error using popen\n"); + } + int num_scans = 1; + num_scans = fscanf(fp, "%d", &num_unique_reads); + if (num_scans != 1) { + rewind(fp); + char ret[1000]; + fgets(ret, 1000, fp); + fprintf(stdout, "%s", ret); + mexErrMsgTxt("Could not determine number of reads\n"); + } + status = pclose(fp); + //printf("%i", num_unique_reads); + + // select reads for given positions and strand + int num_rows_selected = min((int) num_unique_reads, 100); + sprintf(command, "%s | head -n %i | cut -f 1-11", command, num_rows_selected); + fp = popen(command, "r"); + if (fp == NULL) { + mexErrMsgTxt("Error using popen\n"); + } + /* SAM format + 1: read id, 2: flag, 3: reference name, 4: start (1-based, incl.), 5: mapping quality, + 6: CIGAR, 7: mate reference name, 8: mate start (1-based, incl.), 9: insert size, 10: read, 11: quality + 12+: additional tags + */ + uint32_t read_idx = 0, row_idx = 0, num_col = 0; + uint32_t flag = 0, start_pos = 0, map_score = 0, mate_end_pos = 0, num_matches = 0, num_del = 0, num_ins = 0, ins_size = 0; + char ri [1000], read_contig_name [1000], cg [1000], mate_read_id [1000], read [1000], read_qual [1000]; + string last_read_id; + vector<uint32_t> block_lengths, block_starts; + vector<string> read_ids; + vector<string>::iterator it; + + uint32_t read_len = 0; + bool empty_line = true; + int num_rows = 0; + while(empty_line && num_rows < num_rows_selected) { + num_col = fscanf(fp, "%s\t%i\t%s\t%i\t%i\t%s\t%s\t%i\t%i\t%s\t%s", &ri, &flag, &read_contig_name, &start_pos, &map_score, &cg, &mate_read_id, &mate_end_pos, &ins_size, &read, &read_qual); + if (num_col != 11) { + mexErrMsgTxt("error reading SAM line\n"); + } + + string cigar = (string) cg; + // ignore lines with reads w/o mapping information + if (start_pos == 0 || cigar.compare("*")==0) { + continue; + } + // parse CIGAR + uint last_c = 0; + string last_str; + num_matches = 0; + char *end = NULL; + uint32_t tmp_nm = 0, tmp_nd = 0, tmp_ni = 0; + uint32_t last_block_start = 0, last_block_length = 0, last_intron_len = 0; + block_lengths.clear(); block_starts.clear(); + + for (uint c = 0; c < cigar.size(); c++) { + switch (cigar[c]) { + case 'M': + last_str = cigar.substr(last_c, c-last_c); + tmp_nm = strtoul(last_str.c_str(), &end, 10); + if (*end != '\0') + mexErrMsgTxt("error: number of mismatches\n"); + end = NULL; + last_block_length += tmp_nm; + num_matches += tmp_nm; + last_c = c + 1; + break; + case 'I': + last_str = cigar.substr(last_c, c-last_c); + tmp_ni = strtoul(last_str.c_str(), &end, 10); + if (*end != '\0') + mexErrMsgTxt("error: number of insertions\n"); + end = NULL; + num_ins += tmp_ni; + last_c = c + 1; + break; + case 'D': + last_str = cigar.substr(last_c, c-last_c); + tmp_nd = strtoul(last_str.c_str(), &end, 10); + if (*end != '\0') + mexErrMsgTxt("error: number of deletions\n"); + end = NULL; + num_del += tmp_nd; + last_block_length += tmp_nd; + last_c = c + 1; + break; + case 'N': + last_str = cigar.substr(last_c, c-last_c); + last_intron_len = strtoul(last_str.c_str(), &end, 10); + end = NULL; + last_c = c + 1; + break; + case 'S': + break; + case 'H': + break; + case 'P': + break; + default: + break; + } + if (cigar[c] == 'N' || c==cigar.size()-1) { + block_starts.push_back(last_block_start); + last_block_start = last_block_start + last_block_length + last_intron_len; + last_intron_len = 0; + block_lengths.push_back(last_block_length); + last_block_length = 0; + } + } + read_len = 0; + for (uint n = 0; n < block_lengths.size(); n++) { + read_len += block_lengths[n]; + } + empty_line = false; + } // end of stream parsing + + status = pclose(fp); + + if (empty_line) + mexErrMsgTxt("Could not determine read length\n"); + + plhs[0] = mxCreateDoubleScalar((double) read_len); + plhs[1] = mxCreateDoubleScalar((double) num_unique_reads); + + return; +} + + +char *get_string(const mxArray *prhs) { + char *buf; + int buflen; + if (!prhs) + mexErrMsgTxt("get_string called with NULL pointer arg"); + if (!mxIsChar(prhs)) + mexErrMsgTxt("input is not a string"); + if (mxGetM(prhs) != 1) + mexErrMsgTxt("input is not a row vector"); + buflen = mxGetN(prhs) + 1; + buf = (char*) malloc(buflen); + /* copy the string from prhs into buf and add terminating NULL char */ + if (mxGetString(prhs, buf, buflen)) + mexErrMsgTxt("not enough space"); + return buf; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/mex/get_reads.cpp Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,293 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#include <stdio.h> +#include <string.h> +#include <signal.h> +#include <mex.h> +#include <algorithm> +#include <vector> + using std::vector; +#include "get_reads_direct.h" +#include "mex_input.h" +#include "read.h" + +#define MAXLINE 10000 + +/* + * input: + * 1 bam file + * 2 chromosome + * 3 region start (1-based index) + * 4 region end (1-based index) + * 5 strand (either '+' or '-' or '0') + * [6] collapse flag: if true the reads are collapsed to a coverage track + * [7] subsample percentage: percentage of reads to be subsampled (in per mill) + * [8] intron length filter + * [9] exon length filter + * [10] mismatch filter + * [11] bool: use mapped reads for coverage + * [12] bool: use spliced reads for coverage + * [13] return maxminlen + * [14] return pair coverage + * + * output: + * 1 coverage + * [2] intron cell array + * [3] pair coverage + * [4] pair list + * + * example call: + * [cov introns] = get_reads('polyA_left_I+_el15_mm1_spliced.bam', 'I', 10000, 12000, '-', 1, 30); + */ +void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { + + if (nrhs<5 || nrhs>14 || (nlhs<1 || nlhs>4)) { + fprintf(stderr, "usage: [x [introns] [pair]] = get_reads(fname, chr, start, end, strand, [collapse], [subsample], [max intron length], [min exonlength], [max mismatches], [mapped], [spliced], [maxminlen], [pair]);\n"); + return; + } + + /* obligatory arguments + * **********************/ + char *fname = get_string(prhs[0]); + //fprintf(stdout, "arg1: %s\n", fname); + char *chr = get_string(prhs[1]); + //fprintf(stdout, "arg2: %s\n", chr); + int from_pos = get_int(prhs[2]); + //fprintf(stdout, "arg3: %d\n", from_pos); + int to_pos = get_int(prhs[3]); + //fprintf(stdout, "arg4: %d\n", to_pos); + char *strand = get_string(prhs[4]); + //fprintf(stdout, "arg5: %s\n", strand); + + if (from_pos>to_pos) + mexErrMsgTxt("Start (arg 3) must be <= end (arg 4)\n"); + + if (strand[0]!='+' && strand[0]!='-' && strand[0]!='0') + mexErrMsgTxt("Unknown strand (arg 5): either + or - or 0"); + + /* optional arguments + * ******************/ + int collapse = 0; + if (nrhs>=6) + collapse = get_int(prhs[5]); + + int subsample = 1000; + if (nrhs>=7) + subsample = get_int(prhs[6]); + + int intron_len_filter = 1e9; + if (nrhs>=8) + intron_len_filter = get_int(prhs[7]); + + int exon_len_filter = -1; + if (nrhs>=9) + exon_len_filter = get_int(prhs[8]); + + int filter_mismatch = 1e9; + if (nrhs>=10) + filter_mismatch = get_int(prhs[9]); + + int mapped = 1; + if (nrhs>=11) + mapped = get_int(prhs[10]); + + int spliced = 1; + if (nrhs>=12) + spliced = get_int(prhs[11]); + + int maxminlen = 0; + if (nrhs>=13) + maxminlen = get_int(prhs[12]); + + int pair_cov = 0; + if (nrhs>=14) + pair_cov = get_int(prhs[13]); + + /* call function to get reads + * **************************/ + char region[MAXLINE]; + sprintf(region, "%s:%i-%i", chr, from_pos, to_pos); + + vector<CRead*> all_reads; + + get_reads_from_bam(fname, region, &all_reads, strand[0], subsample); + + /* filter reads + * **************/ + int left = 0; + int right = 0; + + vector<CRead*> reads; + for (int i=0; i<all_reads.size(); i++) { + if (all_reads[i]->left) + left++; + if (all_reads[i]->right) + right++; + if (all_reads[i]->max_intron_len()<intron_len_filter && all_reads[i]->min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch) + reads.push_back(all_reads[i]); + } + + + /* prepare output + * **************/ + int num_rows = reads.size(); + int num_pos = to_pos-from_pos+1; + + if (pair_cov==1 && nlhs>=3) { + // sort reads by read_id + printf("\n\nleft:%i right:%i \n\n", left, right); + sort(reads.begin(), reads.end(), CRead::compare_by_read_id); + } + + // read coverages collapsed + if (collapse) { + plhs[0] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL); + uint32_t *mask_ret = (uint32_t*) mxGetData(plhs[0]); + if (num_pos>0 && mask_ret==NULL) + mexErrMsgTxt("Error allocating memory\n"); + if (mapped && spliced) { + for (int i=0; i<reads.size(); i++) { + reads[i]->get_coverage(from_pos, to_pos, mask_ret); + } + } else { + for (int i=0; i<reads.size(); i++) { + ssize_t num_exons = reads[i]->block_starts.size(); + if ((num_exons==1 && mapped) || (num_exons>1 && spliced)) + reads[i]->get_coverage(from_pos, to_pos, mask_ret); + } + } + } + // reads not collapsed + else { + uint32_t nzmax = 0; // maximal number of nonzero elements + int len = to_pos-from_pos+1; + for (uint i=0; i<reads.size(); i++) { + for (uint n = 0; n < reads[i]->block_starts.size(); n++) { + uint32_t from, to; + if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos >= 0) + from = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos; + else + from = 0; + if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n] >= 0) + to = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n]; + else + to = 0; + for (int bp=from; bp<to&bp<len; bp++) { + nzmax++; + } + } + } + // 1st row: row indices + // 2nd row: column indices + plhs[0] = mxCreateDoubleMatrix(2, nzmax, mxREAL); + double *mask_ret = (double*) mxGetData(plhs[0]); + if (nzmax>0 && mask_ret==NULL) + mexErrMsgTxt("Error allocating memory\n"); + uint32_t mask_ret_c = 0; // counter + for (uint i=0; i<reads.size(); i++) { + reads[i]->get_reads_sparse(from_pos, to_pos, mask_ret, mask_ret_c, i); + } + if (mask_ret_c!=2*nzmax) + mexErrMsgTxt("Error filling index arrays for sparse matrix\n"); + } + // introns + if (maxminlen==0 && nlhs>=2) { + vector<int> intron_list; + for (int i=0; i<reads.size(); i++) { + reads[i]->get_introns(&intron_list); + } + + plhs[1] = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL); + uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]); + for (int p = 0; p<intron_list.size(); p++) { + p_intron_list[p] = intron_list[p]; + } + intron_list.clear(); + } else if (nlhs>=2) { + vector<uint32_t> intron_starts; + vector<uint32_t> intron_ends; + vector<uint32_t> block_len1; + vector<uint32_t> block_len2; + for (int i=0; i<reads.size(); i++) { + reads[i]->get_introns(&intron_starts, &intron_ends, &block_len1, &block_len2); + } + + plhs[1] = mxCreateNumericMatrix(4, intron_starts.size(), mxINT32_CLASS, mxREAL); + uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]); + for (int p = 0; p<intron_starts.size(); p++) { + p_intron_list[4*p] = intron_starts[p]; + p_intron_list[(4*p)+1] = intron_ends[p]; + p_intron_list[(4*p)+2] = block_len1[p]; + p_intron_list[(4*p)+3] = block_len2[p]; + } + intron_starts.clear() ; + intron_ends.clear() ; + block_len1.clear() ; + block_len2.clear() ; + } + if (pair_cov==1 && nlhs>=3) { + plhs[2] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL); + uint32_t *p_pair_map = (uint32_t*) mxGetData(plhs[2]); + if (num_pos>0 && p_pair_map==NULL) + mexErrMsgTxt("Error allocating memory\n"); + + vector<int> pair_ids; + + int take_cnt = 0; + int discard_cnt = 0; + // find consecutive reads with the same id + for (int i=0; i<((int) reads.size())-1; i++) { + int j = i+1; + while(j<reads.size() && strcmp(reads[i]->read_id, reads[j]->read_id) == 0) { + if ((reads[i]->left && reads[j]->right) || (reads[j]->left && reads[i]->right) && (reads[i]->reverse != reads[j]->reverse)) { + if (reads[i]->get_last_position()==-1 || reads[j]->get_last_position()==-1) + break; + if (reads[i]->get_last_position()<reads[j]->start_pos && reads[j]->start_pos-reads[i]->get_last_position()<60000) { + int from = std::max(0, reads[i]->get_last_position()-from_pos); + int to = std::min(num_pos-1, reads[j]->start_pos-from_pos); + pair_ids.push_back(i); + pair_ids.push_back(j); + for (int k=from; k<to; k++) + p_pair_map[k]++; + take_cnt++; + } else if (reads[i]->start_pos>reads[j]->get_last_position() && reads[j]->get_last_position()-reads[i]->start_pos<60000) { + int from = std::max(0, reads[j]->get_last_position()-from_pos); + int to = std::min(num_pos-1, reads[i]->start_pos-from_pos); + pair_ids.push_back(i); + pair_ids.push_back(j); + for (int k=from; k<to; k++) + p_pair_map[k]++; + take_cnt++; + } else + discard_cnt++; + } + else + discard_cnt++; + j++; + } + } + printf("take:%i, discard:%i \n", take_cnt, discard_cnt); + + if (nlhs>=4) { + plhs[3] = mxCreateNumericMatrix(2, pair_ids.size()/2, mxUINT32_CLASS, mxREAL); + uint32_t *pair_ids_ret = (uint32_t*) mxGetData(plhs[3]); + if (pair_ids.size()>0 && pair_ids_ret==NULL) + mexErrMsgTxt("Error allocating memory\n"); + for (int i=0; i<pair_ids.size(); i++) { + pair_ids_ret[i] = pair_ids[i]; + } + } + } + for (int i=0; i<all_reads.size(); i++) + delete all_reads[i]; +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/mex/get_reads_direct.cpp Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,298 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#include <stdio.h> +#include <assert.h> +#include "sam.h" +#include "get_reads_direct.h" + +#include <vector> + using std::vector; +#include <string> + using std::string; + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + int beg, end; + samfile_t *in; +} tmpstruct_t; + +typedef struct { + uint64_t u, v; +} pair64_t; + +static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b) +{ + uint32_t rbeg = b->core.pos; + uint32_t rend = b->core.n_cigar? bam_calend(&b->core, bam1_cigar(b)) : b->core.pos + 1; + return (rend > beg && rbeg < end); +} + +pair64_t * get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int* cnt_off); + + int bam_fetch_reads(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_header_t* header, vector<CRead*>* reads, char strand); + +// callback for bam_plbuf_init() +static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) +{ + //tmpstruct_t *tmp = (tmpstruct_t*)data; + //if ((int)pos >= tmp->beg && (int)pos < tmp->end) + // printf("%s\t%d\t%d\n", tmp->in->header->target_name[tid], pos + 1, n); + return 0; +} +#ifdef __cplusplus +} +#endif +int parse_sam_line(char* line, CRead* read); +//int set_strand(char c); +//void parse_cigar(bam1_t* b, CRead* read); +void parse_cigar(bam1_t* b, CRead* read, bam_header_t* header); + + +int get_reads_from_bam(char* filename, char* region, vector<CRead*>* reads, char strand, int lsubsample) +{ + subsample = lsubsample; + //set_strand(strand); + + srand (time(NULL)); + //srand (1234); + tmpstruct_t tmp; + tmp.in = samopen(filename, "rb", 0); + if (tmp.in == 0) { + fprintf(stderr, "Fail to open BAM file %s\n", filename); + return 1; + } + int ref; + bam_index_t *idx; + bam_plbuf_t *buf; + idx = bam_index_load(filename); // load BAM index + if (idx == 0) { + fprintf(stderr, "BAM indexing file is not available.\n"); + return 1; + } + bam_parse_region(tmp.in->header, region, &ref, + &tmp.beg, &tmp.end); // parse the region + if (ref < 0) { + fprintf(stderr, "Invalid region %s\n", region); + return 1; + } + + buf = bam_plbuf_init(pileup_func, &tmp); // initialize pileup + + bam_fetch_reads(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, buf, tmp.in->header, reads, strand); + //fprintf(stdout, "intron_list: %d \n", intron_list->size()); + + bam_plbuf_push(0, buf); // finalize pileup + bam_index_destroy(idx); + bam_plbuf_destroy(buf); + samclose(tmp.in); + return 0; +} + + +int bam_fetch_reads(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_header_t* header, vector<CRead*>* reads, char strand) +{ + int n_off; + pair64_t *off = get_chunk_coordinates(idx, tid, beg, end, &n_off); + if (off == 0) return 0; + { + // retrive alignments + uint64_t curr_off; + int i, ret, n_seeks; + n_seeks = 0; i = -1; curr_off = 0; + bam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t)); + for (;;) { + if (curr_off == 0 || curr_off >= off[i].v) { // then jump to the next chunk + if (i == n_off - 1) break; // no more chunks + if (i >= 0) assert(curr_off == off[i].v); // otherwise bug + if (i < 0 || off[i].v != off[i+1].u) { // not adjacent chunks; then seek + bam_seek(fp, off[i+1].u, SEEK_SET); + curr_off = bam_tell(fp); + ++n_seeks; + } + ++i; + } + if ((ret = bam_read1(fp, b)) > 0) { + curr_off = bam_tell(fp); + if (b->core.tid != tid || b->core.pos >= end) break; // no need to proceed + else if (is_overlap(beg, end, b)) + { + int rr = rand(); + if ((rr%1000 < subsample)) + { + CRead* read = new CRead(); + parse_cigar(b, read, header); + + if (strand == '0' || strand==read->strand[0] || read->strand[0]=='0') + { + read->left = (b->core.flag & left_flag_mask) >0; + read->right = (b->core.flag & right_flag_mask) >0; + read->reverse = (b->core.flag & reverse_flag_mask) >0; + reads->push_back(read); + } + else + { + delete read; + } + //else if (read->strand[0]=='0'&&((b->core.flag & g_flag_off) >0)) + //{ + // //fprintf(stdout, "(-)-strand; read->strand[0]==0, num_exons: %i \n", read->block_starts.size()); + // // this flag means that the read has been reversed for alignment + // // flag bit set and (-)-strand requested + // reads->push_back(read); + //} + //else if (read->strand[0]=='0'&&(g_flag_on>0&&(b->core.flag & g_flag_on)==0)) + //{ + // //fprintf(stdout, "(+)-strand; read->strand[0]==0, num_exons: %i \n", read->block_starts.size()); + // // (+)-strand requested and flag bit not set + // reads->push_back(read); + //} + } + } + } else break; // end of file + } +// fprintf(stderr, "[bam_fetch] # seek calls: %d\n", n_seeks); + bam_destroy1(b); + } + free(off); + return 0; +} + +void parse_cigar(bam1_t* b, CRead* read, bam_header_t* header) +{ + read->start_pos = b->core.pos+1; + read->set_strand('0'); + read->read_id = new char[100]; + sprintf(read->read_id, "%s\0", bam1_qname(b)); + + for (int k = 0; k < b->core.n_cigar; ++k) + { + int op = bam1_cigar(b)[k] & BAM_CIGAR_MASK; // operation + int l = bam1_cigar(b)[k] >> BAM_CIGAR_SHIFT; // length + //fprintf(stdout, "op:%d l:%d\n", op, l); + if (op == BAM_CMATCH) + { + if (k==0) + { + read->block_lengths.push_back(l); + read->block_starts.push_back(0); + } + else + { + int op_prev = bam1_cigar(b)[k-1] & BAM_CIGAR_MASK; + int l_prev = bam1_cigar(b)[k-1] >> BAM_CIGAR_SHIFT; + if (op_prev==BAM_CREF_SKIP)// intron before + { + if (read->block_lengths.size()>=1) + { + int last_block_start = (*(read->block_starts.end()-1)); + int intron_start = last_block_start+(*(read->block_lengths.end()-1)); + read->block_lengths.push_back(l); + read->block_starts.push_back(intron_start+l_prev); + } + else + { + // start of first block was not a match + read->block_lengths.push_back(l); + read->block_starts.push_back(0); + } + } + else + { + if (read->block_lengths.size()>=1 && op == BAM_CDEL)// if it is an insertion then the matching block is not inreased + (*(read->block_lengths.end()-1))+=l; + else + { + //char *samline = bam_format1(header, b); + //printf("header: %s \n", samline); + } + } + } + } + else if (op == BAM_CDEL) + { + if (k>0 && read->block_lengths.size()>=1) + (*(read->block_lengths.end()-1))+=l; + } + else if (op == BAM_CREF_SKIP)//intron + {} + else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) + {} + } + // parse auxiliary data + uint8_t* s = bam1_aux(b); + uint8_t* end = b->data + b->data_len; + while (s < end) + { + uint8_t type, key[2]; + key[0] = s[0]; key[1] = s[1]; + s += 2; type = *s; ++s; + //fprintf(stdout, "\n%c%c:%c\n", key[0], key[1], type); + if (type == 'A') + { + if ( key[0] =='X' && key[1] == 'S') + { + read->set_strand((char) *s); + } + ++s; + } + else if (type == 'C') + { + if ( key[0] =='H' && key[1] == '0') + { + uint8_t matches = *s; + read->matches = (int) matches; + } + if ( key[0] =='N' && key[1] == 'M') + { + uint8_t mismatches = *s; + read->mismatches = (int) mismatches; + } + if ( key[0] =='H' && key[1] == 'I') + { + uint8_t mai = *s; + read->multiple_alignment_index = (int) mai; + } + + ++s; + } + else if (type == 'c') { ++s; } + else if (type == 'S') { s += 2; } + else if (type == 's') { s += 2; } + else if (type == 'I') { s += 4; } + else if (type == 'i') { s += 4; } + else if (type == 'f') { s += 4; } + else if (type == 'd') { s += 8; } + else if (type == 'Z') { ++s; } + else if (type == 'H') { ++s; } + } +} + +//int set_strand(char c) +//{ +// if (c=='+') +// { +// char* fl = (char*) "0x0010"; +// g_flag_on = strtol(fl, 0, 0); +// g_flag_off = 0; +// } +// else if (c=='-') +// { +// char* fl = (char*) "0x0010"; +// g_flag_off = strtol(fl, 0, 0); +// g_flag_on = 0; +// } +// return 0; +//} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/mex/get_reads_direct.h Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,29 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#ifndef __GET_READS_DIRECT_H__ +#define __GET_READS_DIRECT_H__ + +#include <vector> + using std::vector; +#include "read.h" + +//static int g_flag_on = 0, g_flag_off = 0; +static int left_flag_mask = strtol((char*) "0x40", 0, 0); +static int right_flag_mask = strtol((char*) "0x80", 0, 0); +static int reverse_flag_mask = strtol((char*) "0x10", 0, 0); + +static int subsample = 1000; +//static int collapse = 0; + +int get_reads_from_bam(char* filename, char* region, vector<CRead*>* reads, char strand, int lsubsample); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/mex/interval_overlap.cpp Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,217 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#include <stdio.h> +#include <stdarg.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <ctype.h> +#include <sys/stat.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <sys/mman.h> +#include <time.h> +#include <math.h> +#include <limits> +#include <mex.h> +#include <assert.h> +#include <vector> + using std::vector; +#include <algorithm> + using std::sort; + using std::min; + using std::max; + +typedef struct { + int start; + int stop; + int idx; + int set_id; +} interval_t; + +bool compare (interval_t i, interval_t j) +{ + return (i.start<j.start); +} + +bool overlaps(interval_t a, interval_t b) +{ + int v = min(a.stop,b.stop) - max(a.start,b.start) + 1; + return (v >= 1); +} +bool leftOf(interval_t a, interval_t b) +{ + return (a.stop < b.start); +} + +void scan(interval_t f, vector<interval_t>* Wf, interval_t g, vector<interval_t>* Wg, vector<int>* overlap) +{ + vector<interval_t>::iterator i; + i=Wg->begin(); + while (i<Wg->end()) + { + interval_t g2 = *i; + if (leftOf(g2,f)) + { + Wg->erase(i);// inefficient if Wg is large + // this moves all elements, therefore i is not incremented + } + else if (overlaps(g2,f)) + { + if (g2.set_id==1) + { + //printf("overlap: [%i | %i, %i] [%i | %i, %i]\n", g2.idx, g2.start, g2.stop, f.idx, f.start, f.stop); + overlap->push_back(g2.idx); + overlap->push_back(f.idx); + } + else if (f.set_id==1) + { + //printf("overlap: [%i | %i, %i] [%i | %i, %i]\n", f.idx, f.start, f.stop, g2.idx, g2.start, g2.stop); + overlap->push_back(f.idx); + overlap->push_back(g2.idx); + } + i++; + } + else + { + printf("never happens??\n"); + i++; + } + } + if (!leftOf(f, g)) + { + Wf->push_back(f); + //printf("push: [%i, %i] size:%i\n", f.start, f.stop, Wf->size()); + } +} + +/* + * prhs[0] first interval set starts + * prhs[1] first interval set stops + * prhs[2] second interval set starts + * prhs[3] second interval set stops + * + * return: + * plhs[0] one based index in first interval set overlapping with a interval in the second set + * plhs[1] corresponding index in the second set + * +*/ +void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) +{ + if (nrhs!=4) + mexErrMsgTxt("Expected 4 arguments: starts1, stops1, starts2, stops2 \n"); + if (nlhs!=2) + mexErrMsgTxt("Expected 2 output arguments \n"); + + int num_intervals1 = mxGetNumberOfElements(prhs[0]); + assert(num_intervals1 == mxGetNumberOfElements(prhs[1])); + int num_intervals2 = mxGetNumberOfElements(prhs[2]); + assert(num_intervals2 == mxGetNumberOfElements(prhs[3])); + + //printf("num_intervals1: %i\n", num_intervals1); + //printf("num_intervals2: %i\n", num_intervals2); + + double* starts1 = mxGetPr(prhs[0]); + double* stops1 = mxGetPr(prhs[1]); + double* starts2 = mxGetPr(prhs[2]); + double* stops2 = mxGetPr(prhs[3]); + + vector<interval_t> intervals1; + for (int i=0; i<num_intervals1; i++) + { + interval_t interval; + interval.start = starts1[i]; + interval.stop = stops1[i]; + interval.set_id = 1; + interval.idx = i+1; + intervals1.push_back(interval); + //printf("int1: [%i, %i] \n",intervals1[i].start, intervals1[i].stop); + } + interval_t i; + i.start = std::numeric_limits<int>::max(); + i.stop = std::numeric_limits<int>::max(); + i.set_id = std::numeric_limits<int>::max(); + i.idx = std::numeric_limits<int>::max(); + intervals1.push_back(i); + + //printf("num_intervals1: %i\n", intervals1.size()); + vector<interval_t> intervals2; + for (int i=0; i<num_intervals2; i++) + { + interval_t interval; + interval.start = starts2[i]; + interval.stop = stops2[i]; + interval.set_id = 2; + interval.idx = i+1; + intervals2.push_back(interval); + //printf("int2: [%i, %i] \n",intervals2[i].start, intervals2[i].stop); + } + intervals2.push_back(i); + //printf("num_intervals2: %i\n", intervals2.size()); + + sort(intervals1.begin(), intervals1.end(), compare); + sort(intervals2.begin(), intervals2.end(), compare); + + + vector<int> overlap; + vector<interval_t> Wx; + vector<interval_t> Wy; + vector<interval_t>::iterator x = intervals1.begin(); + vector<interval_t>::iterator y = intervals2.begin(); + while (x<intervals1.end() && y<intervals2.end()) + { + //vector<interval_t>::iterator x; + //vector<interval_t>::iterator y; + //if (it1>intervals1.end()) + // x = inf_interval(); + //else + // x = it1; + //if (it2>intervals2.end()) + // y = inf_interval(); + //else + // y=it2; + + if (x->start <= y->start) + { + scan(*x, &Wx, *y, &Wy, &overlap); + x++; + } + else + { + if (y<=intervals2.end()) + { + scan(*y, &Wy, *x, &Wx, &overlap); + y++; + } + } + } + + plhs[0] = mxCreateDoubleMatrix(1, overlap.size()/2, mxREAL); + double* idx1 = mxGetPr(plhs[0]); + + plhs[1] = mxCreateDoubleMatrix(1, overlap.size()/2, mxREAL); + double* idx2 = mxGetPr(plhs[1]); + + for (int i=0; i<overlap.size(); i+=2) + { + //printf("ov: %i [%i, %i] \n", i, overlap[i], overlap[i+1]); + idx1[i/2] = (double) overlap[i]; + idx2[i/2] = (double) overlap[i+1]; + } +} + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/mex/mex_input.cpp Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,60 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#include <stdio.h> +#include <mex.h> +#include "mex_input.h" + +char *get_string(const mxArray *prhs) { + char *buf; + int buflen; + if (!prhs) + mexErrMsgTxt("get_string called with NULL pointer arg"); + if (!mxIsChar(prhs)) + mexErrMsgTxt("input is not a string"); + if (mxGetM(prhs) != 1) + mexErrMsgTxt("input is not a row vector"); + buflen = mxGetN(prhs) + 1; + buf = (char*) malloc(buflen); + /* copy the string from prhs into buf and add terminating NULL char */ + if (mxGetString(prhs, buf, buflen)) + mexErrMsgTxt("not enough space"); + return buf; +} + +bool get_bool(const mxArray *prhs) +{ + const int M = mxGetM(prhs); + const int N = mxGetN(prhs); + double *f = (double*) mxGetPr(prhs); + + if (!prhs) + mexErrMsgTxt("Arg is NULL pointer"); + if (M != 1 || N != 1) + mexErrMsgTxt("Arg is not a scalar"); + if (f[0] != 0) + return true; + return false; +} + +int get_int(const mxArray *prhs) +{ + const int M = mxGetM(prhs); + const int N = mxGetN(prhs); + double *f = (double*) mxGetPr(prhs); + + if (!prhs) + mexErrMsgTxt("Arg is NULL pointer"); + if (M != 1 || N != 1) + mexErrMsgTxt("Arg is not a scalar"); + + return (int) f[0]; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/mex/mex_input.h Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,20 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#include <stdio.h> +#include <mex.h> + +#ifndef __MEX_INPUT_h__ +#define __MEX_INPUT_h__ + char *get_string(const mxArray *prhs); + bool get_bool(const mxArray *prhs); + int get_int(const mxArray *prhs); +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/mex/read.cpp Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,214 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#include "read.h" + +CRead::CRead() { + read_id = NULL; + sam_line = NULL; + start_pos = 0; + matches = 0; + mismatches = 0; + multiple_alignment_index = 0; + strand = NULL; + left = false; + right = false; + reverse = false; +} + +CRead::~CRead() { + delete[] read_id; + delete[] sam_line; + delete[] strand; +} + +/* + * Augments 'coverage' array at the positions covered by the read in the queried interval. + */ +void CRead::get_coverage(int p_start_pos, int p_end_pos, uint32_t* coverage) +{ + // block1 block2 + // |=====|======|============|===========|======|====| + // ^ ^ ^ + // p_start_pos | p_end_pos + // start_pos + // |0000001111111111111000000000000111111100000| + // *coverage + int len = p_end_pos-p_start_pos+1; + for (uint32_t n = 0; n < block_starts.size(); n++) { + int32_t from, to; + from = block_starts[n]+start_pos-p_start_pos; + to = block_starts[n]+start_pos-p_start_pos+block_lengths[n]; + if (from < 0) + from = 0; + if (to < 0) + continue; + else if (to > len) + to = len; + for (int bp=from; bp<to; bp++) { + coverage[bp]++; + } + } +} +int CRead::get_last_position() +{ + if (block_starts.size()>0) // this if for some reason zero in case of softclips + return start_pos+block_starts.back()+block_lengths.back(); + return -1; +} + +/* + * Adds the column indices (= positions) covered by the read to 'reads' array in current row (= read). + * These indices can be used to build up a sparse matrix of reads x positions. + */ +void CRead::get_reads_sparse(int p_start_pos, int p_end_pos, double* reads, uint32_t & reads_c, uint32_t row_idx) { + int len = p_end_pos-p_start_pos+1; + for (uint32_t n = 0; n < block_starts.size(); n++) { + uint32_t from, to; + if (block_starts[n]+start_pos-p_start_pos >= 0) + from = block_starts[n]+start_pos-p_start_pos; + else + from = 0; + if (block_starts[n]+start_pos-p_start_pos+block_lengths[n] >= 0) + to = block_starts[n]+start_pos-p_start_pos+block_lengths[n]; + else + to = 0; + for (int bp=from; bp<to&bp<len; bp++) { + reads[reads_c] = row_idx+1; // row indices for sparse matrix + reads[reads_c+1] = bp+1; // column indices for sparse matrix + reads_c += 2; + } + } +} + +void CRead::get_acc_splice_sites(vector<int>* acc_pos) +{ + if (strand[0]=='+') + { + for (int k=1;k<block_starts.size(); k++) + acc_pos->push_back(start_pos+block_starts[k]-1); + } + else if (strand[0]=='-') + { + for (int k=1;k<block_starts.size(); k++) + acc_pos->push_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2); + } +} + +void CRead::get_don_splice_sites(vector<int>* don_pos) +{ + + if (strand[0]=='+') + { + for (int k=1;k<block_starts.size(); k++) + don_pos->push_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2); + } + else if (strand[0]=='-') + { + for (int k=1;k<block_starts.size(); k++) + don_pos->push_back(start_pos+block_starts[k]-1); + } +} + +int CRead::min_exon_len() +{ + int min = 1e8; + for (int k=0;k<block_starts.size(); k++) + if (block_lengths[k]<min) + min = block_lengths[k]; + return min; +} + +int CRead::max_intron_len() +{ + int max = 0; + for (int k=1;k<block_starts.size(); k++) + if (block_starts[k]-(block_starts[k-1]+block_lengths[k-1])>max) + max = block_starts[k]-(block_starts[k-1]+block_lengths[k-1]); + return max; +} + +/* + * Adds start and end of introns in the read consecutively to the 'introns' vector. + */ +void CRead::get_introns(vector<int>* introns) +{ + for (int i=1; i<block_starts.size(); i++) + { + int istart = block_starts[i-1]+block_lengths[i-1]+start_pos; + int iend = block_starts[i]+start_pos-1; + introns->push_back(istart); + introns->push_back(iend); + //fprintf(stdout, "%i intron: %d->%d\n", i, istart, iend); + } +} +void CRead::get_introns(vector<uint32_t>* intron_starts, vector<uint32_t>* intron_ends, vector<uint32_t>* block_len1, vector<uint32_t>* block_len2) +{ + for (int i=1; i<block_starts.size(); i++) + { + uint32_t istart = block_starts[i-1]+block_lengths[i-1]+start_pos; + uint32_t iend = block_starts[i]+start_pos-1; + intron_starts->push_back(istart); + intron_ends->push_back(iend); + block_len1->push_back(block_lengths[i-1]) ; + block_len2->push_back(block_lengths[i]) ; + } +} + +bool CRead::operator==(const CRead& read) const +{ + if (block_starts.size()!=read.block_starts.size()) + return false; + if (block_lengths.size()!=read.block_lengths.size()) + return false; + if (start_pos!=read.start_pos) + return false; + if (strand[0] != read.strand[0]) + return false; + for (int i=0; i<block_starts.size(); i++) + if (block_starts[i]!=read.block_starts[i]) + return false; + for (int i=0; i<block_lengths.size(); i++) + if (block_lengths[i]!=read.block_lengths[i]) + return false; + return true; +} + +void CRead::print() +{ + fprintf(stdout, "start_pos: %d\n", start_pos); + fprintf(stdout, "starts:"); + for (int i=0; i<block_starts.size(); i++) + { + fprintf(stdout, " %d", block_starts[i]); + } + fprintf(stdout, "\n"); + + fprintf(stdout, "lengths:"); + for (int i=0; i<block_starts.size(); i++) + { + fprintf(stdout, " %d", block_lengths[i]); + } + fprintf(stdout, "\n"); +} + +void CRead::set_strand(char s) +{ + delete[] strand; + strand = new char [2]; + strand[0] = s; + strand[1] = '0'; +} + +int CRead::get_mismatches() +{ + return mismatches ; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/mex/read.h Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,72 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#ifndef __READ_H__ +#define __READ_H__ + +#include <stdint.h> +#include <cctype> +#include <stdio.h> +#include <vector> + using std::vector; + + +class CRead { + public: + /** constructor + */ + CRead(); + ~CRead(); + + vector<int> block_starts; + vector<int> block_lengths; + char* read_id; + char* sam_line; + int start_pos; + char * strand; + int matches; + int mismatches; + int multiple_alignment_index; + bool left; + bool right; + bool reverse; + + void get_coverage(int p_start_pos, int p_end_pos, uint32_t* coverage); + int get_last_position(); + void get_reads_sparse(int p_start_pos, int p_end_pos, double* reads, uint32_t & reads_c, uint32_t row_idx); + void get_introns(vector<int>* introns); + void get_introns(vector<uint32_t>* intron_starts, vector<uint32_t>* intron_ends, vector<uint32_t>* block_len1, vector<uint32_t>* block_len2); + void get_acc_splice_sites(vector<int>* acc_pos); + void get_don_splice_sites(vector<int>* acc_pos); + int max_intron_len(); + int min_exon_len(); + bool operator==(const CRead& read) const; + void print(); + void set_strand(char s); + int get_mismatches(); + static bool compare_by_read_id(const CRead* read1, const CRead* read2) + { + if (!read1->read_id) + return true; + if (!read2->read_id) + return false; + + int cnt1=0; + while (read1->read_id[cnt1]!='\0') + cnt1++; + int cnt2 = 0; + while (read2->read_id[cnt2]!='\0') + cnt2++; + + return std::lexicographical_compare(read1->read_id,read1->read_id+cnt1,read2->read_id,read2->read_id+cnt2); + }; +}; +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/setup_deseq-hts.sh Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,192 @@ +#!/bin/bash +set -e + +. ./bin/deseq_config.sh + +echo ========================================== +echo DESeq-hts setup script \(DESeq version $DESEQ_VERSION\) +echo ========================================== +echo +echo DESeq-hts base directory \(currently set to \"$DESEQ_PATH\", suggest to set to \"`pwd`\", used if left empty\) +read DESEQ_PATH +if [ "$DESEQ_PATH" == "" ]; +then + DESEQ_PATH=`pwd` +fi +echo '=>' Setting DESeq-hts base directory to \"$DESEQ_PATH\" +echo + +echo SAMTools directory \(currently set to \"$SAMTOOLS_DIR\", system version used if left empty\) +read SAMTOOLS_DIR +if [ "$SAMTOOLS_DIR" == "" ]; +then + if [ "$(which samtools)" != "" ] ; + then + SAMTOOLS_DIR=$(dirname $(which samtools)) + else + echo samtools not found + exit -1 ; + fi +fi +echo '=>' Setting SAMTools directory to \"$SAMTOOLS_DIR\" +echo + +echo Path to the python binary \(currently set to \"$PYTHON_PATH\", system version used, if left empty\) +read PYTHON_PATH +if [ "$PYTHON_PATH" == "" ]; +then + PYTHON_PATH=`which python` + if [ "$PYTHON_PATH" == "" ]; + then + echo python not found + exit -1 + fi +fi +echo '=>' Setting Python path to \"$PYTHON_PATH\" +echo + +echo Path to the R binary \(currently set to \"$R_PATH\", system version used, if left empty\) +read R_PATH +if [ "$R_PATH" == "" ]; +then + R_PATH=`which R` + if [ "$R_PATH" == "" ]; + then + echo R not found + exit -1 + fi +fi +echo '=>' Setting R path to \"$R_PATH\" +echo + +echo Path to Scipy library files \(currently set to \"$SCIPY_PATH\", system version is used if left empty\) +read SCIPY_PATH +echo '=>' Setting Scipy path to \"$SCIPY_PATH\" +echo + +echo Which interpreter should be used \(\"octave\" or \"matlab\"\) +read INTERPRETER +if [ "$INTERPRETER" != 'octave' -a "$INTERPRETER" != 'matlab' ]; +then + echo Unrecognized choice: \"$INTERPRETER\" + echo Aborting + false +fi +echo '=>' Setting interpreter to \"$INTERPRETER\" +echo + +if [ "$INTERPRETER" == 'octave' ]; +then + echo Please enter the full path to octave \(currently set to \"$OCTAVE_BIN_PATH\", system version used, if left empty\) + read OCTAVE_BIN_PATH + if [ "$OCTAVE_BIN_PATH" == "" ]; + then + OCTAVE_BIN_PATH=`which octave` + if [ "$OCTAVE_BIN_PATH" == "" ]; + then + echo octave not found + exit -1 + fi + fi + echo '=>' Setting octave\'s path to \"$OCTAVE_BIN_PATH\" + echo + echo Please enter the full path to mkoctfile \(currently set to \"$OCTAVE_MKOCT\", system version used, if left empty\) + read OCTAVE_MKOCT + if [ "$OCTAVE_MKOCT" == "" ]; + then + OCTAVE_MKOCT=`which mkoctfile` + if [ "$OCTAVE_MKOCT" == "" ]; + then + OCTAVE_MKOCT=$(dirname $OCTAVE_BIN_PATH)/mkoctfile + if [ ! -f OCTAVE_MKOCT ]; + then + echo mkoctfile not found + exit -1 + fi + fi + fi + echo '=>' Setting mkoctfile\'s path to \"$OCTAVE_MKOCT\" + echo + MATLAB_BIN_PATH= +fi +if [ "$INTERPRETER" == 'matlab' ]; +then + echo Please enter the full path to matlab \(currently set to \"$MATLAB_BIN_PATH\", system version used, if left empty\) + read MATLAB_BIN_PATH + if [ "${MATLAB_BIN_PATH}" == "" ]; + then + MATLAB_BIN_PATH=`which matlab` + if [ "$MATLAB_BIN_PATH" == "" ]; + then + echo matlab not found + exit -1 + fi + fi + if [ ! -f $MATLAB_BIN_PATH ]; + then + echo matlab not found + exit -1 + fi + echo '=>' Setting matlab\'s path to \"$MATLAB_BIN_PATH\" + echo + echo Please enter the full path to mex binary \(currently set to \"$MATLAB_MEX_PATH\", system version used if left empty\) + read MATLAB_MEX_PATH + if [ "$MATLAB_MEX_PATH" == "" ]; + then + MATLAB_MEX_PATH=`which mex` + if [ "$MATLAB_MEX_PATH" == "" ]; + then + echo mex not found + exit -1 + fi + fi + if [ ! -f "$MATLAB_MEX_PATH" ]; + then + echo mex not found + exit -1 + fi + echo '=>' Setting mex\' path to \"$MATLAB_MEX_PATH\" + echo + echo Please enter the full path to the matlab include directory \(currently set to \"$MATLAB_INCLUDE_DIR\", system version used, if left empty\) + read MATLAB_INCLUDE_DIR + if [ "$MATLAB_INCLUDE_DIR" == "" ]; + then + MATLAB_INCLUDE_DIR=$(dirname $MATLAB_BIN_PATH)/../extern/include + fi + if [ ! -d "$MATLAB_INCLUDE_DIR" ]; + then + echo matlab include dir not found + exit -1 + fi + echo '=>' Setting matlab\'s include directory to \"$MATLAB_INCLUDE_DIR\" + echo + OCTAVE_BIN_PATH= +fi + +cp -p bin/deseq_config.sh bin/deseq_config.sh.bk +grep -v -e OCTAVE_BIN_PATH -e OCTAVE_MKOCT -e MATLAB_BIN_PATH -e MATLAB_MEX_PATH -e MATLAB_INCLUDE_DIR \ + -e DESEQ_PATH -e DESEQ_SRC_PATH -e DESEQ_BIN_PATH \ + -e INTERPRETER -e SAMTOOLS_DIR -e PYTHON_PATH -e SCIPY_PATH -e R_PATH -e $DESEQ_VERSION bin/deseq_config.sh.bk \ + > bin/deseq_config.sh +echo +echo +echo generating config file + +echo export DESEQ_VERSION=$DESEQ_VERSION >> bin/deseq_config.sh +echo export DESEQ_PATH=$DESEQ_PATH >> bin/deseq_config.sh +echo export DESEQ_SRC_PATH=${DESEQ_PATH}/src >> bin/deseq_config.sh +echo export DESEQ_BIN_PATH=${DESEQ_PATH}/bin >> bin/deseq_config.sh +echo export INTERPRETER=$INTERPRETER >> bin/deseq_config.sh +echo export MATLAB_BIN_PATH=$MATLAB_BIN_PATH >> bin/deseq_config.sh +echo export MATLAB_MEX_PATH=$MATLAB_MEX_PATH >> bin/deseq_config.sh +echo export MATLAB_INCLUDE_DIR=$MATLAB_INCLUDE_DIR >> bin/deseq_config.sh +echo export OCTAVE_BIN_PATH=$OCTAVE_BIN_PATH >> bin/deseq_config.sh +echo export OCTAVE_MKOCT=$OCTAVE_MKOCT >> bin/deseq_config.sh +echo export SAMTOOLS_DIR=$SAMTOOLS_DIR >> bin/deseq_config.sh +echo export PYTHON_PATH=$PYTHON_PATH >> bin/deseq_config.sh +echo export SCIPY_PATH=$SCIPY_PATH >> bin/deseq_config.sh +echo export R_PATH=$R_PATH >> bin/deseq_config.sh + +echo +echo Done. +echo
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/src/deseq-hts.sh Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,89 @@ +#/bin/bash +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Copyright (C) 2009-2012 Max Planck Society +# + +set -e + +PROG=`basename $0` +DIR=`dirname $0` + +. ${DIR}/../bin/deseq_config.sh + +echo +echo ${PROG}: FML http://galaxy.fml.mpg.de Galaxy wrapper for the DESeq version $DESEQ_VERSION. +echo +echo DESeq performs differential expression testing from RNA-Seq measurements. +echo + +ANNO_INPUT=${1} +shift +DESEQ_RES_FILE=${1} +shift +GENES_FN=${1} +shift + +mkdir -p `dirname $GENES_FN` + +echo %%%%%%%%%%%%%%%%%%%%%%% +echo % 1. Data preparation % +echo %%%%%%%%%%%%%%%%%%%%%%% +echo +echo load the genome annotation in GFF3 format and create an annotation object +echo +export PYTHONPATH=$PYTHONPATH:${SCIPY_PATH} +${PYTHON_PATH} ${DIR}/../tools/ParseGFF.py ${ANNO_INPUT} ${GENES_FN} +${DIR}/../bin/genes_cell2struct ${GENES_FN} 2>&1 +echo +echo genome annotation stored in $GENES_FN + +echo +echo %%%%%%%%%%%%%%%%%%%% +echo % 2. Read counting % +echo %%%%%%%%%%%%%%%%%%%% +echo + +echo counting reads overlapping exons using given alignments +for REPLICATE_GROUP in $@ +do + IFS=':' + for BAM_FILE in ${REPLICATE_GROUP} + do + echo + if [ ! -f ${BAM_FILE}.bai ] + then + echo "Indexing $BAM_FILE" + ${SAMTOOLS_DIR}/samtools index $BAM_FILE + else + echo "$BAM_FILE already indexed" + fi + echo + done +done +tmpfile=`mktemp --tmpdir=/tmp` + +echo "${DIR}/../bin/get_read_counts ${GENES_FN} $tmpfile $@" +${DIR}/../bin/get_read_counts ${GENES_FN} $tmpfile $@ 2>&1 + +echo +echo %%%%%%%%%%%%%%%%%%%%%%%%%%% +echo % 3. Differential testing % +echo %%%%%%%%%%%%%%%%%%%%%%%%%%% +echo + +echo testing genes for differential expression using given alignments + +echo "cat ${DIR}/../src/difftest_deseq.R | $R_PATH --slave --args $tmpfile ${DESEQ_RES_FILE} $#" +cat ${DIR}/../src/difftest_deseq.R | $R_PATH --slave --args $tmpfile ${DESEQ_RES_FILE} $# 2> /dev/null + +rm $tmpfile ${tmpfile}_COUNTS.tab ${tmpfile}_CONDITIONS.tab +echo +echo %%%%%%%% +echo % Done % +echo %%%%%%%% +echo
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/src/deseq_config.m Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,54 @@ +function deseq_config +% DESEQ_CONFIG Sets a few global variables with system dependent paths. +% +% +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% Written (W) 2009-2011 Regina Bohnert, Gunnar Raetsch +% Copyright (C) 2009-2011 Max Planck Society +% + +% paths +global DESEQ_PATH DESEQ_SRC_PATH + +% interpreter paths +global INTERPRETER MATLAB_BIN_PATH OCTAVE_BIN_PATH + +% SAMTools path +global SAMTOOLS_DIR + +% configuration (adapt to the user's configuration) +DESEQ_PATH = getenv('DESEQ_PATH'); +DESEQ_SRC_PATH = getenv('DESEQ_SRC_PATH'); +INTERPRETER = getenv('INTERPRETER'); +MATLAB_BIN_PATH = getenv('MATLAB_BIN_PATH'); +OCTAVE_BIN_PATH = getenv('OCTAVE_BIN_PATH'); +SAMTOOLS_DIR = getenv('SAMTOOLS_DIR'); + +% switch off a few expected warnings +addpath(sprintf('%s/tools', DESEQ_PATH)); +lserve=license; +if ~isequal(lserve, 'GNU General Public License'), + engine='matlab'; +else + engine='octave'; +end; +if isequal(engine, 'octave'), + warning('off', 'Octave:precedence-change'); + warning('off', 'Octave:function-name-clash'); + warning('off', ''); + warning('off', 'Octave:num-to-str'); + warning('off', 'Octave:function-name-clash'); + warning('off', 'Octave:divide-by-zero'); + warning('off', 'Octave:future-time-stamp'); + warning('off', 'Octave:assign-as-truth-value'); +else + warning('off', 'MATLAB:typeaheadBufferOverflow'); +end + +% make sure no process stops with a debug prompt +global g_ignore_keyboard +g_ignore_keyboard = 1;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/src/difftest_deseq.R Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,56 @@ +library( DESeq ) + +### get arguments 1: INFILE, 2: OUTFILE 3:SIZE +args <- commandArgs() +INFILE<-args[4] +OUTFILE<-args[5] + +INFILE_COUNTS=c(paste(INFILE, "_COUNTS.tab", sep="")) +INFILE_CONDS=c(paste(INFILE, "_CONDITIONS.tab", sep="")) + +### read count data from file +countsTable <- read.delim( INFILE_COUNTS, header=TRUE, stringsAsFactors=TRUE ) +condsTable <- read.delim( INFILE_CONDS, header=TRUE, stringsAsFactors=TRUE ) + +### use gene IDs as row names +rownames( countsTable ) <- countsTable$gene +countsTable <- countsTable[ , -1 ] +head( countsTable ) + +conds <- factor( condsTable[ , 2] ) +#head( countsTable ) + +cds <- newCountDataSet( round(countsTable), conds ) +#head( counts(cds) ) + +cds <- estimateSizeFactors( cds ) +#sizeFactors( cds ) + +### estimate variance function, use blind only, if no replicates are provided +if (length(levels(conds)) < length(conds)) +{ + cds <- estimateDispersions( cds ) +} else { + writeLines("\nYou did not enter any replicates! - The results may be less valuable without replicates!\n") + cds <- estimateDispersions( cds, method='blind', sharingMode='fit-only') +} +experiments <- levels(conds) + +res<-c() +table_col_names<-c() +for (i in 1:(length(experiments)-1)) +{ + for( j in (i+1):(length(experiments))) + { + print(c(i,j)) + tempres <- nbinomTest(cds,experiments[i],experiments[j]) + res = cbind(res,tempres[,7]) + #res = cbind(res,tempres[,8]) + table_col_names = cbind(table_col_names,paste('cond_', experiments[i], '_vs._cond_', experiments[j], sep='')) + } +} + +DiffTable<-res +rownames(DiffTable)<-rownames(countsTable) +colnames(DiffTable)<-table_col_names +write.table(DiffTable, file = OUTFILE, quote = FALSE, sep ="\t", eol ="\n", na = "1.000", dec = ".", row.names = TRUE,col.names =TRUE)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/src/genes_cell2struct.m Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,28 @@ +function genes_cell2struct(anno_fname) +% GENES_CELL2STRUCT Converts genes stored as a cell to struct. +% +% genes_cell2struct(anno_fname) +% +% -- input -- +% anno_fname: name of file where genes as cell are stored +% +% -- output -- +% genes as a struct + +load(anno_fname, 'genes'); +if iscell(genes) + genes_cell = genes; + clear genes; + for g = 1:length(genes_cell), + gene = genes_cell{g}; + for e = 1:length(gene.exons) + gene.exons{e} = double(gene.exons{e}); + end + gene.exons = reshape(gene.exons, 1, length(gene.exons)); + gene.id = double(gene.id); + gene.start = double(gene.start); + gene.stop = double(gene.stop); + genes(g) = gene; + end +save(anno_fname, 'genes'); +end
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/src/get_read_counts.m Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,172 @@ +function get_read_counts(anno_dir, outfile, varargin) +% +% -- input -- +% anno_dir: directory of genes +% outfile: output file +% varargin: list of BAM files (at least two) + +% DESeq paths +global DESEQ_PATH DESEQ_SRC_PATH + +% interpreter paths +global INTERPRETER MATLAB_BIN_PATH OCTAVE_BIN_PATH + +% SAMTools path +global SAMTOOLS_DIR + +%%%% paths +addpath(sprintf('%s/tools', DESEQ_PATH)); +addpath(sprintf('%s/mex', DESEQ_PATH)); +addpath(sprintf('%s', DESEQ_SRC_PATH)); + +deseq_config; + +%%% read list of replicate groups from variable length argument list +rg_list = cell(1,size(varargin, 2)); +file_list = cell(); +file_cond_ids = []; +file_rep_ids = []; +for idx = 1:size(varargin, 2) + rg_list(idx) = varargin(idx); +end +idx = strmatch('', rg_list, 'exact'); +rg_list(idx) = []; +for idx = 1:length(rg_list), + items = separate(rg_list{idx}, ':'); + for idx2 = 1:length(items) + if isempty(deblank(items{idx2})), + continue; + end; + file_list{end + 1} = items{idx2}; + file_cond_ids(end + 1) = idx; + file_rep_ids(end + 1) = idx2; + end; +end; +clear idx idx2; + +%%%%% adapt to number of input arguments +file_num = length(file_list); +RESULTS = cell(1, file_num); + +%%%% get annotation file +load(sprintf('%s', anno_dir)); + +%%%%% mask overlapping gene regions -> later not counted +[genes] = mask_dubl(genes,0); + +%%%% remove genes with no annotated exons or where no +idx = find(arrayfun(@(x)(~isempty(x.exons)*~isempty(x.start)*~isempty(x.stop)), genes)); +fprintf('removed %i of %i genes, which had either no exons annotated or lacked a start or stop position\n', size(genes, 2) - size(idx, 2), size(genes, 2)) +genes = genes(idx); +clear idx; + +%%%% check if genes have field chr_num +if ~isfield(genes, 'chr_num') + chrms = unique({genes(:).chr}); + for i = 1:length(genes) + genes(i).chr_num = strmatch(genes(i).chr, chrms, 'exact'); + end; +end; + +%%%% iterate over all given bam files +for f_idx = 1:file_num + expr1_bam = fullfile('', file_list{f_idx}); + STAT = cell(size(genes, 2),1); + for i=1:size(genes,2) + RESULT = cell(1,7); + gene = genes(i); + RESULT{4} = f_idx; + RESULT{1} = gene.name; + if isempty(gene.exons) + RESULT{2} = inf; + RESULT{3} = inf; + RESULT{5} = [inf,inf]; + STAT{i} = RESULT; + continue; + elseif or(isempty(gene.start),isempty(gene.stop)) + RESULT{2} = inf; + RESULT{3} = inf; + RESULT{5} = [inf,inf]; + STAT{i} = RESULT; + continue; + end + if ~isempty(gene.chr_num), + [mask1, read_intron_list] = get_reads(expr1_bam, gene.chr, gene.start, gene.stop, '0'); + clear read_intron_list; + else + mask1 = []; + end; + + if isempty(mask1) + reads1 = zeros(0,gene.stop-gene.start+1); + else + reads1 = sparse(mask1(1,:)',mask1(2,:)',ones(size(mask1,2),1),max(mask1(1,:)),gene.stop-gene.start+1); + end + if ~isempty(reads1); + [reads1,FLAG] = remove_reads_from_other_genes(reads1,gene); + end + L = size(reads1); + RESULT{2}=[size(reads1,1)]; % number of all reads falling in that gene + EXON_IDX=zeros(1,gene.stop-gene.start+1); + for t=1:size(gene.transcripts,2) + for e=1:size(gene.exons{t},1) + EXON_IDX((gene.exons{t}(e,1)-gene.start+1):(gene.exons{t}(e,2)-gene.start+1))=1; + end + end + reads1 = reads1(sum(reads1(:,find(EXON_IDX)),2)>0,:); + L1 = sum(EXON_IDX); + RESULT{3}=[size(reads1,1)]; % number of reads overlapping to exons + RESULT{5}=[L, L1]; % size of reads1, number of exonic positions + % old and weighted poisson new ,weighted regions reads and + % unexplained reads + clear reads1; + STAT{i} = RESULT; + end; + RESULTS{f_idx} = STAT; +end; + +S=size(genes,2); +READCOUNTS_ALL=zeros(S, file_num); +READCOUNTS_EXON=zeros(S, file_num); +LENGTH_ALL=zeros(S,file_num); +LEN_EXON=zeros(S, file_num); + +for j=1:file_num, + for i=1:S + T=RESULTS{j}{i}; + if isempty(T) + continue + else + READCOUNTS_ALL(i,j)=T{2}; + READCOUNTS_EXON(i,j)=T{3}; + LENGTH_ALL(i,j)=T{5}(1); + LEN_EXON(i,j)=T{5}(2); + end + end +end + +%%%%% write results for all bam files +fid_conditions = fopen(sprintf('%s_CONDITIONS.tab', outfile), 'w'); +fid_counts = fopen(sprintf('%s_COUNTS.tab', outfile) ,'w'); +fprintf(fid_counts,'gene'); +fprintf(fid_conditions, 'file\tcondition\treplicate\n'); +for j = 1:length(file_list) + fname = file_list{j} ; + fname = separate(fname, '/'); + fname = fname{end}; + fname = strrep(fname, '.bam', '') ; + fprintf(fid_counts,'\t%s', fname); + fprintf(fid_conditions, '%s\t%i\t%i\n', fname, file_cond_ids(j), file_rep_ids(j)); +end; +fprintf(fid_counts,'\n') ; + +for i = 1:size(genes,2) + fprintf(fid_counts,'%s',genes(i).name); + for j = 1:length(file_list), + fprintf(fid_counts,'\t%i', READCOUNTS_EXON(i,j)); + end + fprintf(fid_counts,'\n'); +end +fclose(fid_counts); +fclose(fid_conditions); +exit;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/src/mask_dubl.m Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,81 @@ +function [new_genes]=mask_dubl(genes,THRESH); + +CHROMOSOMES={}; +COUNTER=1; +for i=1:size(genes,2) + CHROMOSOMES{COUNTER}=genes(i).chr; + COUNTER=COUNTER+1; +end +CHROMOSOMES=unique(CHROMOSOMES); + + +INFO=zeros(size(genes,2),4); +for i=1:size(genes,2) + CHR_VAL=0; + for chr= 1:length(CHROMOSOMES) + if strcmp(genes(i).chr,CHROMOSOMES(chr)) + CHR_VAL=chr; + end + end + INFO(i,:)=[i,genes(i).start,genes(i).stop, CHR_VAL]; +end + +COUNTER=1; +new_genes=genes; +for chr= 1:length(CHROMOSOMES) + GENES_ON_CHR=INFO(INFO(:,4)==chr,:); + [TEMP,POS]=sort(GENES_ON_CHR(:,2)); + GENES_ON_CHR=GENES_ON_CHR(POS,:); + STARTS=GENES_ON_CHR(:,2); + STOPS=GENES_ON_CHR(:,3); + for i=1:(size(GENES_ON_CHR,1)) + MIN_START=find(STOPS>=STARTS(i),1,'first'); + MAX_STOP=find(STARTS<=STOPS(i),1,'last'); + if MIN_START==i + MIN_START=[]; + end + if MAX_STOP==i + MAX_STOP=[]; + end + EXONS=[]; + if not (isempty(MIN_START)) + for CURR=MIN_START:(i-1) + if(not(isempty(genes(GENES_ON_CHR(CURR,1)).transcripts))) + for tra=1:size(genes(GENES_ON_CHR(CURR,1)).transcripts,2) + if(not(isempty(genes(GENES_ON_CHR(CURR,1)).exons))) + EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).exons{tra}]; + else + EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop]; + end + end + else + EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop]; + end + end + end + if not (isempty(MAX_STOP)) + for CURR=(i+1):MAX_STOP + if(not(isempty(genes(GENES_ON_CHR(CURR,1)).transcripts))) + for tra=1:size(genes(GENES_ON_CHR(CURR,1)).transcripts,2) + if(not(isempty(genes(GENES_ON_CHR(CURR,1)).exons))) + EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).exons{tra}]; + else + EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop]; + end + end + else + EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop]; + end + + end + end + if not (isempty([MAX_STOP,MIN_START])) + EXONS=EXONS(EXONS(:,2)>=STARTS(i),:); + EXONS=EXONS(EXONS(:,1)<=STOPS(i),:); + new_genes(GENES_ON_CHR(i,1)).non_unique_regions=EXONS; + else + new_genes(GENES_ON_CHR(i,1)).non_unique_regions=[]; + end + end + COUNTER=COUNTER+1; +end
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/src/remove_reads_from_other_genes.m Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,21 @@ +function [READS_OUT,FLAG]=remove_reads_from_other_genes(READS,GENE) +%This funtion removes the reads in READS which could ome from other +%annotated genes. FLAG is 1 if this was sucsesfull and 0 otherwise +READS_IN=READS; +if isfield(GENE,'non_unique_regions') + EXONS=GENE.non_unique_regions; + IDX=zeros(1,GENE.stop-GENE.start+1); + + for i=1:size(EXONS,1) + START=max(EXONS(i,1),GENE.start)-GENE.start+1; + STOP=min(EXONS(i,2),GENE.stop)-GENE.start+1; + IDX(START:STOP)=1; + end + READS=READS(not(sum(READS(:,IDX>0),2)==sum(READS,2)),:); + FLAG=1; + READS_OUT=READS; +else + READS_OUT=READS_IN; + FLAG=0; +end +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions.gff3 Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,747 @@ +##gff-version 3 +##Seqid Source Type Start End Score Phase Attributes +I Coding_transcript gene 11495 16831 . + . ID=Gene:Gene:Y74C9A.2.2 +I Coding_transcript mRNA 11495 16793 . + . ID=Transcript:Gene:Y74C9A.2.2.1;Parent=Gene:Gene:Y74C9A.2.2 +I Coding_transcript five_prime_UTR 11495 11561 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1 +I Coding_transcript five_prime_UTR 11618 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1 +I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1 +I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1 +I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1 +I Coding_transcript three_prime_UTR 16702 16793 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1 +I Coding_transcript mRNA 11499 16790 . + . ID=Transcript:Gene:Y74C9A.2.2.2;Parent=Gene:Gene:Y74C9A.2.2 +I Coding_transcript five_prime_UTR 11499 11557 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2 +I Coding_transcript five_prime_UTR 11618 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2 +I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2 +I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2 +I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2 +I Coding_transcript three_prime_UTR 16586 16790 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2 +I Coding_transcript mRNA 11499 16831 . + . ID=Transcript:Gene:Y74C9A.2.2.3;Parent=Gene:Gene:Y74C9A.2.2 +I Coding_transcript five_prime_UTR 11499 11561 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3 +I Coding_transcript five_prime_UTR 11618 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3 +I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3 +I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3 +I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3 +I Coding_transcript three_prime_UTR 16586 16831 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3 +I Coding_transcript mRNA 11505 16790 . + . ID=Transcript:Gene:Y74C9A.2.2.4;Parent=Gene:Gene:Y74C9A.2.2 +I Coding_transcript five_prime_UTR 11505 11561 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4 +I Coding_transcript five_prime_UTR 11623 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4 +I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4 +I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4 +I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4 +I Coding_transcript three_prime_UTR 16586 16790 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4 +I Coding_transcript mRNA 11618 16804 . + . ID=Transcript:Gene:Y74C9A.2.2.5;Parent=Gene:Gene:Y74C9A.2.2 +I Coding_transcript five_prime_UTR 11618 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5 +I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5 +I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5 +I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5 +I Coding_transcript three_prime_UTR 16586 16804 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5 +I Coding_transcript gene 47472 49416 . + . ID=Gene:Gene:Y48G1C.12 +I Coding_transcript mRNA 47472 49416 . + . ID=Transcript:Gene:Y48G1C.12.1;Parent=Gene:Gene:Y48G1C.12 +I Coding_transcript CDS 47472 47610 . + 0 ID=CDS:Gene:Y48G1C.12.1;Parent=Transcript:Gene:Y48G1C.12.1 +I Coding_transcript CDS 47696 47858 . + 2 ID=CDS:Gene:Y48G1C.12.1;Parent=Transcript:Gene:Y48G1C.12.1 +I Coding_transcript CDS 48348 48530 . + 1 ID=CDS:Gene:Y48G1C.12.1;Parent=Transcript:Gene:Y48G1C.12.1 +I Coding_transcript CDS 49251 49416 . + 1 ID=CDS:Gene:Y48G1C.12.1;Parent=Transcript:Gene:Y48G1C.12.1 +I Coding_transcript gene 71858 81071 . + . ID=Gene:Gene:Y48G1C.2.1 +I Coding_transcript mRNA 71858 81071 . + . ID=Transcript:Gene:Y48G1C.2.1.1;Parent=Gene:Gene:Y48G1C.2.1 +I Coding_transcript five_prime_UTR 71858 71932 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript five_prime_UTR 72511 72590 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript five_prime_UTR 72647 72648 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 72649 72897 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 72941 73017 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 73605 73766 . + 1 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 74625 74808 . + 1 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 75171 75490 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 75951 76112 . + 1 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 76949 77153 . + 1 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 77713 77799 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 79313 79447 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 80306 80344 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript three_prime_UTR 80345 81071 . + . ID=three_prime_UTR:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript mRNA 71878 81063 . + . ID=Transcript:Gene:Y48G1C.2.1.2;Parent=Gene:Gene:Y48G1C.2.1 +I Coding_transcript five_prime_UTR 71878 71932 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript five_prime_UTR 72511 72590 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript five_prime_UTR 72647 72648 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 72649 72897 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 72941 73017 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 73605 73766 . + 1 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 74625 74808 . + 1 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 75171 75490 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 75951 76112 . + 1 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 76949 77153 . + 1 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 77713 77799 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 79313 79447 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 80306 80344 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript three_prime_UTR 80345 80561 . + . ID=three_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript three_prime_UTR 80814 81063 . + . ID=three_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript mRNA 72511 80344 . + . ID=Transcript:Gene:Y48G1C.2.1.3;Parent=Gene:Gene:Y48G1C.2.1 +I Coding_transcript five_prime_UTR 72511 72590 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript five_prime_UTR 72647 72648 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 72649 72897 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 72941 73017 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 73605 73766 . + 1 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 74625 74808 . + 1 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 75171 75490 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 75951 76112 . + 1 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 76949 77153 . + 1 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 77713 77799 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 79313 79447 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 80306 80344 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript gene 86841 90607 . + . ID=Gene:Gene:Y48G1C.11 +I Coding_transcript mRNA 86841 90607 . + . ID=Transcript:Gene:Y48G1C.11.1;Parent=Gene:Gene:Y48G1C.11 +I Coding_transcript CDS 86841 86904 . + 0 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript CDS 87034 87223 . + 2 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript CDS 87520 87734 . + 1 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript CDS 88268 88500 . + 2 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript CDS 88566 88706 . + 0 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript CDS 89372 89584 . + 0 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript CDS 90419 90607 . + 0 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript gene 91380 92877 . + . ID=Gene:Gene:Y48G1C.9.1 +I Coding_transcript mRNA 91380 92654 . + . ID=Transcript:Gene:Y48G1C.9.1.1;Parent=Gene:Gene:Y48G1C.9.1 +I Coding_transcript five_prime_UTR 91380 91408 . + . ID=five_prime_UTR:Gene:Y48G1C.9.1.1;Parent=Transcript:Gene:Y48G1C.9.1.1 +I Coding_transcript CDS 91409 91594 . + 0 ID=CDS:Gene:Y48G1C.9.1.1;Parent=Transcript:Gene:Y48G1C.9.1.1 +I Coding_transcript CDS 92523 92648 . + 0 ID=CDS:Gene:Y48G1C.9.1.1;Parent=Transcript:Gene:Y48G1C.9.1.1 +I Coding_transcript three_prime_UTR 92649 92654 . + . ID=three_prime_UTR:Gene:Y48G1C.9.1.1;Parent=Transcript:Gene:Y48G1C.9.1.1 +I Coding_transcript mRNA 91404 92877 . + . ID=Transcript:Gene:Y48G1C.9.1.2;Parent=Gene:Gene:Y48G1C.9.1 +I Coding_transcript five_prime_UTR 91404 91408 . + . ID=five_prime_UTR:Gene:Y48G1C.9.1.2;Parent=Transcript:Gene:Y48G1C.9.1.2 +I Coding_transcript CDS 91409 91594 . + 0 ID=CDS:Gene:Y48G1C.9.1.2;Parent=Transcript:Gene:Y48G1C.9.1.2 +I Coding_transcript CDS 92523 92648 . + 0 ID=CDS:Gene:Y48G1C.9.1.2;Parent=Transcript:Gene:Y48G1C.9.1.2 +I Coding_transcript three_prime_UTR 92649 92877 . + . ID=three_prime_UTR:Gene:Y48G1C.9.1.2;Parent=Transcript:Gene:Y48G1C.9.1.2 +I Coding_transcript gene 113807 114681 . + . ID=Gene:Gene:F53G12.8 +I Coding_transcript mRNA 113807 114681 . + . ID=Transcript:Gene:F53G12.8.1;Parent=Gene:Gene:F53G12.8 +I Coding_transcript CDS 113807 113863 . + 0 ID=CDS:Gene:F53G12.8.1;Parent=Transcript:Gene:F53G12.8.1 +I Coding_transcript CDS 114085 114423 . + 0 ID=CDS:Gene:F53G12.8.1;Parent=Transcript:Gene:F53G12.8.1 +I Coding_transcript CDS 114544 114681 . + 0 ID=CDS:Gene:F53G12.8.1;Parent=Transcript:Gene:F53G12.8.1 +I Coding_transcript gene 115739 117438 . + . ID=Gene:Gene:F53G12.7 +I Coding_transcript mRNA 115739 117438 . + . ID=Transcript:Gene:F53G12.7.1;Parent=Gene:Gene:F53G12.7 +I Coding_transcript CDS 115739 115915 . + 0 ID=CDS:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1 +I Coding_transcript CDS 116432 116666 . + 0 ID=CDS:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1 +I Coding_transcript CDS 116719 116974 . + 2 ID=CDS:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1 +I Coding_transcript CDS 117086 117401 . + 1 ID=CDS:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1 +I Coding_transcript three_prime_UTR 117402 117438 . + . ID=three_prime_UTR:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1 +I Coding_transcript gene 127297 134263 . + . ID=Gene:Gene:F53G12.5b +I Coding_transcript mRNA 127297 134263 . + . ID=Transcript:Gene:F53G12.5b.1;Parent=Gene:Gene:F53G12.5b +I Coding_transcript CDS 127297 127336 . + 0 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 127385 127436 . + 2 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 128697 128896 . + 1 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 128945 129124 . + 2 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 129176 129333 . + 2 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 132103 132553 . + 0 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 132612 132693 . + 2 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 133414 133582 . + 1 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript three_prime_UTR 133583 134263 . + . ID=three_prime_UTR:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript mRNA 128693 133809 . + . ID=Transcript:Gene:F53G12.5b.2;Parent=Gene:Gene:F53G12.5b +I Coding_transcript five_prime_UTR 128693 128697 . + . ID=five_prime_UTR:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript CDS 128698 128896 . + 0 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript CDS 128945 129124 . + 2 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript CDS 129167 129333 . + 2 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript CDS 132103 132553 . + 0 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript CDS 132612 132693 . + 2 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript CDS 133414 133582 . + 1 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript three_prime_UTR 133583 133809 . + . ID=three_prime_UTR:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript mRNA 128697 133582 . + . ID=Transcript:Gene:F53G12.5b.3;Parent=Gene:Gene:F53G12.5b +I Coding_transcript five_prime_UTR 128697 128697 . + . ID=five_prime_UTR:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript CDS 128698 128896 . + 0 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript CDS 128945 129124 . + 2 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript CDS 129167 129333 . + 2 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript CDS 132103 132553 . + 0 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript CDS 132612 132693 . + 2 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript CDS 133414 133582 . + 1 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript gene 134337 137282 . + . ID=Gene:Gene:F53G12.4 +I Coding_transcript mRNA 134337 137282 . + . ID=Transcript:Gene:F53G12.4.1;Parent=Gene:Gene:F53G12.4 +I Coding_transcript five_prime_UTR 134337 134353 . + . ID=five_prime_UTR:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1 +I Coding_transcript CDS 134354 134428 . + 0 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1 +I Coding_transcript CDS 134506 134581 . + 0 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1 +I Coding_transcript CDS 135549 135898 . + 2 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1 +I Coding_transcript CDS 136235 136712 . + 0 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1 +I Coding_transcript CDS 137209 137282 . + 2 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1 +I Coding_transcript gene 137845 144565 . + . ID=Gene:Gene:F53G12.3 +I Coding_transcript mRNA 137845 144565 . + . ID=Transcript:Gene:F53G12.3.1;Parent=Gene:Gene:F53G12.3 +I Coding_transcript CDS 137845 137886 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 138017 138143 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 138193 138351 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 138393 138782 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 138829 139032 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 139080 139331 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 139378 139669 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 139769 139982 . + 1 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 140136 140292 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 140371 140496 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 140554 140870 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 140916 141213 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 141681 141854 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 141900 142023 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 142067 142230 . + 1 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 142278 142477 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 142547 142671 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 142729 142939 . + 1 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 143007 143684 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 144265 144396 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 144440 144565 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript gene 173412 175988 . + . ID=Gene:Gene:F56C11.6b +I Coding_transcript mRNA 173412 175932 . + . ID=Transcript:Gene:F56C11.6b.1;Parent=Gene:Gene:F56C11.6b +I Coding_transcript five_prime_UTR 173412 173508 . + . ID=five_prime_UTR:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript five_prime_UTR 173561 173725 . + . ID=five_prime_UTR:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript five_prime_UTR 173775 173873 . + . ID=five_prime_UTR:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript CDS 173874 174312 . + 0 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript CDS 174605 174832 . + 2 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript CDS 174878 175053 . + 2 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript CDS 175097 175241 . + 0 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript CDS 175579 175708 . + 2 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript CDS 175755 175932 . + 1 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript mRNA 173412 175988 . + . ID=Transcript:Gene:F56C11.6b.2;Parent=Gene:Gene:F56C11.6b +I Coding_transcript five_prime_UTR 173412 173421 . + . ID=five_prime_UTR:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 173422 173508 . + 0 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 173561 173725 . + 0 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 173775 174312 . + 0 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 174605 174832 . + 2 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 174878 175053 . + 2 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 175097 175241 . + 0 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 175579 175708 . + 2 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 175755 175932 . + 1 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript three_prime_UTR 175933 175988 . + . ID=three_prime_UTR:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript gene 178538 182159 . + . ID=Gene:Gene:F56C11.5b +I Coding_transcript mRNA 178538 182159 . + . ID=Transcript:Gene:F56C11.5b.1;Parent=Gene:Gene:F56C11.5b +I Coding_transcript five_prime_UTR 178538 178566 . + . ID=five_prime_UTR:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript CDS 178567 178620 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript CDS 180600 180698 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript CDS 181251 181514 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript CDS 181663 181767 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript CDS 181814 181966 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript three_prime_UTR 181967 182159 . + . ID=three_prime_UTR:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript mRNA 180394 181966 . + . ID=Transcript:Gene:F56C11.5b.2;Parent=Gene:Gene:F56C11.5b +I Coding_transcript CDS 180394 180465 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2 +I Coding_transcript CDS 180600 180698 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2 +I Coding_transcript CDS 181251 181514 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2 +I Coding_transcript CDS 181663 181767 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2 +I Coding_transcript CDS 181814 181966 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2 +I Coding_transcript gene 216005 219099 . + . ID=Gene:Gene:Y48G1BL.1 +I Coding_transcript mRNA 216005 219099 . + . ID=Transcript:Gene:Y48G1BL.1.1;Parent=Gene:Gene:Y48G1BL.1 +I Coding_transcript five_prime_UTR 216005 216092 . + . ID=five_prime_UTR:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript five_prime_UTR 216145 216180 . + . ID=five_prime_UTR:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript CDS 216181 216277 . + 0 ID=CDS:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript CDS 216331 216910 . + 2 ID=CDS:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript CDS 218110 218205 . + 1 ID=CDS:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript CDS 218772 218955 . + 1 ID=CDS:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript three_prime_UTR 218956 219099 . + . ID=three_prime_UTR:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript gene 291216 305461 . + . ID=Gene:Gene:C53D5.1c.1 +I Coding_transcript mRNA 291216 305081 . + . ID=Transcript:Gene:C53D5.1c.1.1;Parent=Gene:Gene:C53D5.1c.1 +I Coding_transcript five_prime_UTR 291216 291308 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript five_prime_UTR 295793 295851 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript five_prime_UTR 302596 302671 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript CDS 302672 302705 . + 0 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript CDS 302762 302831 . + 2 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript CDS 303204 303729 . + 1 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript CDS 303789 303910 . + 0 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript CDS 304094 304256 . + 1 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript CDS 304929 305081 . + 0 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript mRNA 291216 305461 . + . ID=Transcript:Gene:C53D5.1c.1.2;Parent=Gene:Gene:C53D5.1c.1 +I Coding_transcript five_prime_UTR 291216 291248 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 291249 291308 . + 0 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 295793 295851 . + 0 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 302596 302705 . + 1 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 302762 302831 . + 2 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 303204 303729 . + 1 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 303789 303910 . + 0 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 304094 304256 . + 1 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 304929 305081 . + 0 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript three_prime_UTR 305082 305461 . + . ID=three_prime_UTR:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript mRNA 295364 305081 . + . ID=Transcript:Gene:C53D5.1c.1.3;Parent=Gene:Gene:C53D5.1c.1 +I Coding_transcript five_prime_UTR 295364 295367 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 295368 295421 . + 0 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 295793 295851 . + 0 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 302596 302705 . + 1 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 302762 302831 . + 2 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 303204 303729 . + 1 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 303789 303910 . + 0 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 304094 304256 . + 1 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 304929 305081 . + 0 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript mRNA 302596 305081 . + . ID=Transcript:Gene:C53D5.1c.1.4;Parent=Gene:Gene:C53D5.1c.1 +I Coding_transcript five_prime_UTR 302596 302671 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript CDS 302672 302705 . + 0 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript CDS 302762 302831 . + 2 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript CDS 303204 303729 . + 1 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript CDS 303789 303910 . + 0 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript CDS 304094 304256 . + 1 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript CDS 304929 305081 . + 0 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript gene 347484 348360 . + . ID=Gene:Gene:Y48G1A.2 +I Coding_transcript mRNA 347484 348360 . + . ID=Transcript:Gene:Y48G1A.2.1;Parent=Gene:Gene:Y48G1A.2 +I Coding_transcript five_prime_UTR 347484 347577 . + . ID=five_prime_UTR:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1 +I Coding_transcript CDS 347578 347584 . + 0 ID=CDS:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1 +I Coding_transcript CDS 347633 347709 . + 2 ID=CDS:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1 +I Coding_transcript CDS 347765 347986 . + 0 ID=CDS:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1 +I Coding_transcript CDS 348045 348152 . + 0 ID=CDS:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1 +I Coding_transcript three_prime_UTR 348153 348360 . + . ID=three_prime_UTR:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1 +I Coding_transcript gene 364522 368511 . + . ID=Gene:Gene:R119.7 +I Coding_transcript mRNA 364522 368511 . + . ID=Transcript:Gene:R119.7.1;Parent=Gene:Gene:R119.7 +I Coding_transcript CDS 364522 364682 . + 0 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript CDS 364739 365194 . + 1 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript CDS 365534 365599 . + 1 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript CDS 365647 365963 . + 1 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript CDS 366656 367212 . + 2 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript CDS 367749 367943 . + 0 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript three_prime_UTR 367944 368511 . + . ID=three_prime_UTR:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript gene 382836 388540 . + . ID=Gene:Gene:R119.6 +I Coding_transcript mRNA 382836 388540 . + . ID=Transcript:Gene:R119.6.1;Parent=Gene:Gene:R119.6 +I Coding_transcript CDS 382836 382916 . + 0 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 382992 383067 . + 0 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 383124 383377 . + 2 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 383432 383567 . + 0 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 384628 385228 . + 2 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 386079 386339 . + 1 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 388049 388153 . + 1 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 388380 388437 . + 1 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript three_prime_UTR 388438 388540 . + . ID=three_prime_UTR:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript gene 488619 489908 . + . ID=Gene:Gene:W04C9.2 +I Coding_transcript mRNA 488619 489908 . + . ID=Transcript:Gene:W04C9.2.1;Parent=Gene:Gene:W04C9.2 +I Coding_transcript CDS 488619 488726 . + 0 ID=CDS:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1 +I Coding_transcript CDS 488771 488836 . + 0 ID=CDS:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1 +I Coding_transcript CDS 488885 488947 . + 0 ID=CDS:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1 +I Coding_transcript CDS 489703 489774 . + 0 ID=CDS:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1 +I Coding_transcript three_prime_UTR 489775 489908 . + . ID=three_prime_UTR:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1 +I Coding_transcript gene 489987 496153 . + . ID=Gene:Gene:W04C9.1 +I Coding_transcript mRNA 489987 496153 . + . ID=Transcript:Gene:W04C9.1.1;Parent=Gene:Gene:W04C9.1 +I Coding_transcript five_prime_UTR 489987 490053 . + . ID=five_prime_UTR:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 490054 490205 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 491200 491305 . + 1 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 491353 491562 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 491613 491727 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 491839 492163 . + 2 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 492228 492354 . + 1 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 492870 493559 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 494263 494346 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 494395 494709 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 495831 496070 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript three_prime_UTR 496071 496153 . + . ID=three_prime_UTR:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript gene 534461 535347 . + . ID=Gene:Gene:Y65B4BR.8 +I Coding_transcript mRNA 534461 535347 . + . ID=Transcript:Gene:Y65B4BR.8.1;Parent=Gene:Gene:Y65B4BR.8 +I Coding_transcript CDS 534461 534572 . + 0 ID=CDS:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1 +I Coding_transcript CDS 534621 534727 . + 2 ID=CDS:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1 +I Coding_transcript CDS 534775 534973 . + 0 ID=CDS:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1 +I Coding_transcript CDS 535034 535197 . + 2 ID=CDS:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1 +I Coding_transcript three_prime_UTR 535198 535347 . + . ID=three_prime_UTR:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1 +I Coding_transcript gene 2931462 2947153 . + . ID=Gene:Gene:Y71F9AM.4a +I Coding_transcript mRNA 2931462 2947153 . + . ID=Transcript:Gene:Y71F9AM.4a.1;Parent=Gene:Gene:Y71F9AM.4a +I Coding_transcript CDS 2931462 2931549 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2931617 2931783 . + 2 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2932810 2932974 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2933904 2934137 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2935626 2935717 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2935988 2936131 . + 1 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2936561 2936882 . + 1 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2937759 2937919 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2938443 2938677 . + 1 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2942825 2943021 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2943072 2943204 . + 1 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2944077 2944223 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2944275 2944382 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2946747 2946938 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript three_prime_UTR 2946939 2947153 . + . ID=three_prime_UTR:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript mRNA 2935624 2944611 . + . ID=Transcript:Gene:Y71F9AM.4a.2;Parent=Gene:Gene:Y71F9AM.4a +I Coding_transcript five_prime_UTR 2935624 2935717 . + . ID=five_prime_UTR:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript five_prime_UTR 2935988 2936042 . + . ID=five_prime_UTR:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2936043 2936131 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2936561 2936882 . + 1 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2937759 2937919 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2938443 2938677 . + 1 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2942825 2943021 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2943072 2943204 . + 1 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2944077 2944223 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2944275 2944424 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript three_prime_UTR 2944425 2944611 . + . ID=three_prime_UTR:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript gene 2929778 2931362 . + . ID=Gene:Gene:Y71F9AM.5b +I Coding_transcript mRNA 2929778 2931362 . + . ID=Transcript:Gene:Y71F9AM.5b.1;Parent=Gene:Gene:Y71F9AM.5b +I Coding_transcript five_prime_UTR 2929778 2929840 . + . ID=five_prime_UTR:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1 +I Coding_transcript CDS 2929841 2929885 . + 0 ID=CDS:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1 +I Coding_transcript CDS 2929947 2930071 . + 0 ID=CDS:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1 +I Coding_transcript CDS 2930175 2930367 . + 1 ID=CDS:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1 +I Coding_transcript CDS 2931224 2931256 . + 0 ID=CDS:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1 +I Coding_transcript three_prime_UTR 2931257 2931362 . + . ID=three_prime_UTR:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1 +I Coding_transcript mRNA 2929785 2931344 . + . ID=Transcript:Gene:Y71F9AM.5b.2;Parent=Gene:Gene:Y71F9AM.5b +I Coding_transcript five_prime_UTR 2929785 2929840 . + . ID=five_prime_UTR:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2 +I Coding_transcript CDS 2929841 2929885 . + 0 ID=CDS:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2 +I Coding_transcript CDS 2929947 2930089 . + 0 ID=CDS:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2 +I Coding_transcript CDS 2930175 2930367 . + 1 ID=CDS:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2 +I Coding_transcript CDS 2931224 2931256 . + 0 ID=CDS:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2 +I Coding_transcript three_prime_UTR 2931257 2931344 . + . ID=three_prime_UTR:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2 +I Coding_transcript gene 537125 542200 . + . ID=Gene:Gene:Y65B4BR.4b +I Coding_transcript mRNA 537125 541634 . + . ID=Transcript:Gene:Y65B4BR.4b.1;Parent=Gene:Gene:Y65B4BR.4b +I Coding_transcript five_prime_UTR 537125 537140 . + . ID=five_prime_UTR:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 537141 537246 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 537306 537563 . + 2 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 537609 537838 . + 2 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 538705 538914 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 539456 539730 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 539804 540011 . + 1 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 540067 540387 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 540726 540986 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 541067 541288 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 541347 541634 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript mRNA 537125 542200 . + . ID=Transcript:Gene:Y65B4BR.4b.2;Parent=Gene:Gene:Y65B4BR.4b +I Coding_transcript five_prime_UTR 537125 537140 . + . ID=five_prime_UTR:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 537141 537246 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 537306 537563 . + 2 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 537609 537838 . + 2 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 538705 538914 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 539456 539730 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 539798 540011 . + 1 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 540067 540387 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 540726 540986 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 541067 541288 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 541347 541634 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript three_prime_UTR 541635 542200 . + . ID=three_prime_UTR:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript gene 562739 565184 . + . ID=Gene:Gene:Y65B4BR.1 +I Coding_transcript mRNA 562739 565184 . + . ID=Transcript:Gene:Y65B4BR.1.1;Parent=Gene:Gene:Y65B4BR.1 +I Coding_transcript five_prime_UTR 562739 562765 . + . ID=five_prime_UTR:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript CDS 562766 562872 . + 0 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript CDS 562923 563122 . + 1 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript CDS 563167 563333 . + 2 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript CDS 563377 563460 . + 0 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript CDS 563510 563583 . + 0 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript CDS 564612 565113 . + 1 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript three_prime_UTR 565114 565184 . + . ID=three_prime_UTR:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript gene 618210 621466 . + . ID=Gene:Gene:F56A6.1b +I Coding_transcript mRNA 618210 621424 . + . ID=Transcript:Gene:F56A6.1b.1;Parent=Gene:Gene:F56A6.1b +I Coding_transcript CDS 618210 618360 . + 0 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 618418 618671 . + 2 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 618716 618915 . + 0 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 618962 619021 . + 1 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 619072 619190 . + 1 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 619250 619692 . + 2 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 619741 620027 . + 0 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 620073 620469 . + 1 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 620516 621175 . + 0 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript three_prime_UTR 621176 621424 . + . ID=three_prime_UTR:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript mRNA 618210 621466 . + . ID=Transcript:Gene:F56A6.1b.2;Parent=Gene:Gene:F56A6.1b +I Coding_transcript CDS 618210 618360 . + 0 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 618418 618671 . + 2 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 618716 618915 . + 0 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 618962 619021 . + 1 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 619072 619190 . + 1 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 619250 619692 . + 2 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 619741 620027 . + 0 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 620073 620469 . + 1 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 620516 621171 . + 0 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 621218 621314 . + 1 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript three_prime_UTR 621315 621466 . + . ID=three_prime_UTR:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript gene 720014 731077 . + . ID=Gene:Gene:Y18H1A.6 +I Coding_transcript mRNA 720014 731077 . + . ID=Transcript:Gene:Y18H1A.6.1;Parent=Gene:Gene:Y18H1A.6 +I Coding_transcript five_prime_UTR 720014 720080 . + . ID=five_prime_UTR:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 720081 720256 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 720301 720435 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 720489 720602 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 720647 720787 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 720831 720942 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 722106 722204 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 722247 722358 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 724573 724769 . + 2 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 725112 725228 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 725289 725395 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 726504 726873 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 730284 730535 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 730839 730940 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript three_prime_UTR 730941 731077 . + . ID=three_prime_UTR:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript gene 763627 770707 . + . ID=Gene:Gene:T06A4.3a +I Coding_transcript mRNA 763627 770707 . + . ID=Transcript:Gene:T06A4.3a.1;Parent=Gene:Gene:T06A4.3a +I Coding_transcript five_prime_UTR 763627 763676 . + . ID=five_prime_UTR:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 763677 763826 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 765140 765276 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 765434 765647 . + 1 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 766050 766151 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 766328 766447 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 766529 766643 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 767585 767736 . + 2 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 768167 768226 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 768276 768450 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 768502 768630 . + 2 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 770464 770627 . + 2 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript three_prime_UTR 770628 770707 . + . ID=three_prime_UTR:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript mRNA 763677 768919 . + . ID=Transcript:Gene:T06A4.3a.2;Parent=Gene:Gene:T06A4.3a +I Coding_transcript CDS 763677 763826 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 765140 765276 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 765434 765647 . + 1 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 766050 766151 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 766328 766447 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 766529 766643 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 767585 767736 . + 2 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 768167 768226 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 768276 768450 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 768502 768630 . + 2 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 768801 768814 . + 2 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript three_prime_UTR 768815 768919 . + . ID=three_prime_UTR:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript gene 770894 776356 . + . ID=Gene:Gene:T06A4.1b +I Coding_transcript mRNA 770894 776346 . + . ID=Transcript:Gene:T06A4.1b.1;Parent=Gene:Gene:T06A4.1b +I Coding_transcript five_prime_UTR 770894 770968 . + . ID=five_prime_UTR:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 770969 771060 . + 0 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 771453 771568 . + 1 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 771808 771913 . + 2 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 771970 772156 . + 1 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 773166 773387 . + 0 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 773437 773548 . + 0 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 773702 773989 . + 2 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 774970 775066 . + 2 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 775112 775574 . + 1 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 776166 776303 . + 0 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript three_prime_UTR 776304 776346 . + . ID=three_prime_UTR:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript mRNA 770969 776356 . + . ID=Transcript:Gene:T06A4.1b.2;Parent=Gene:Gene:T06A4.1b +I Coding_transcript CDS 770969 771060 . + 0 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 771453 771568 . + 1 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 771808 771913 . + 2 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 771970 772156 . + 1 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 773166 773387 . + 0 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 773437 773548 . + 0 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 773702 773989 . + 2 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 774970 775066 . + 2 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 775112 775389 . + 1 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 775471 775574 . + 2 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 776166 776303 . + 0 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript three_prime_UTR 776304 776356 . + . ID=three_prime_UTR:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript gene 853461 854133 . + . ID=Gene:Gene:Y95B8A.8 +I Coding_transcript mRNA 853461 854133 . + . ID=Transcript:Gene:Y95B8A.8.1;Parent=Gene:Gene:Y95B8A.8 +I Coding_transcript CDS 853461 853489 . + 0 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1 +I Coding_transcript CDS 853563 853628 . + 1 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1 +I Coding_transcript CDS 853699 853771 . + 1 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1 +I Coding_transcript CDS 853830 853932 . + 0 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1 +I Coding_transcript CDS 853988 854133 . + 2 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1 +I Coding_transcript gene 858489 871831 . + . ID=Gene:Gene:Y95B8A.7 +I Coding_transcript mRNA 858489 871831 . + . ID=Transcript:Gene:Y95B8A.7.1;Parent=Gene:Gene:Y95B8A.7 +I Coding_transcript five_prime_UTR 858489 858568 . + . ID=five_prime_UTR:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 858569 858837 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 861488 861921 . + 1 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 864406 864821 . + 2 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 865733 865936 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 866765 866860 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 868612 868723 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 870196 870326 . + 2 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 871656 871820 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript three_prime_UTR 871821 871831 . + . ID=three_prime_UTR:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript gene 882920 890209 . + . ID=Gene:Gene:Y95B8A.6a.2 +I Coding_transcript mRNA 882920 889792 . + . ID=Transcript:Gene:Y95B8A.6a.2.1;Parent=Gene:Gene:Y95B8A.6a.2 +I Coding_transcript five_prime_UTR 882920 883166 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript five_prime_UTR 884906 885042 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript five_prime_UTR 886509 886608 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript CDS 886609 886709 . + 0 ID=CDS:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript CDS 887882 888060 . + 1 ID=CDS:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript CDS 888152 888348 . + 2 ID=CDS:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript CDS 889676 889792 . + 0 ID=CDS:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript mRNA 886213 890209 . + . ID=Transcript:Gene:Y95B8A.6a.2.2;Parent=Gene:Gene:Y95B8A.6a.2 +I Coding_transcript five_prime_UTR 886213 886608 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2 +I Coding_transcript CDS 886609 886709 . + 0 ID=CDS:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2 +I Coding_transcript CDS 887882 888060 . + 1 ID=CDS:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2 +I Coding_transcript CDS 888152 888348 . + 2 ID=CDS:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2 +I Coding_transcript CDS 889676 889792 . + 0 ID=CDS:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2 +I Coding_transcript three_prime_UTR 889793 890209 . + . ID=three_prime_UTR:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2 +I Coding_transcript mRNA 887841 889870 . + . ID=Transcript:Gene:Y95B8A.6a.2.3;Parent=Gene:Gene:Y95B8A.6a.2 +I Coding_transcript five_prime_UTR 887841 888153 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.3;Parent=Transcript:Gene:Y95B8A.6a.2.3 +I Coding_transcript CDS 888154 888348 . + 0 ID=CDS:Gene:Y95B8A.6a.2.3;Parent=Transcript:Gene:Y95B8A.6a.2.3 +I Coding_transcript CDS 889676 889792 . + 0 ID=CDS:Gene:Y95B8A.6a.2.3;Parent=Transcript:Gene:Y95B8A.6a.2.3 +I Coding_transcript three_prime_UTR 889793 889870 . + . ID=three_prime_UTR:Gene:Y95B8A.6a.2.3;Parent=Transcript:Gene:Y95B8A.6a.2.3 +I Coding_transcript gene 897288 905906 . + . ID=Gene:Gene:Y95B8A.5 +I Coding_transcript mRNA 897288 905906 . + . ID=Transcript:Gene:Y95B8A.5.1;Parent=Gene:Gene:Y95B8A.5 +I Coding_transcript five_prime_UTR 897288 897291 . + . ID=five_prime_UTR:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 897292 897357 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 898181 898351 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 900615 900802 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 903105 903192 . + 1 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 903255 903331 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 903384 903513 . + 1 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 904613 904846 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 905736 905855 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript three_prime_UTR 905856 905906 . + . ID=three_prime_UTR:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript gene 1003891 1010767 . + . ID=Gene:Gene:C54G6.2 +I Coding_transcript mRNA 1003891 1010767 . + . ID=Transcript:Gene:C54G6.2.1;Parent=Gene:Gene:C54G6.2 +I Coding_transcript CDS 1003891 1004091 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1004703 1005062 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1006029 1006324 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1006461 1006548 . + 1 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1007013 1007136 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1007188 1007273 . + 2 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1008082 1008464 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1009524 1009755 . + 1 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1009820 1009891 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1010207 1010293 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1010594 1010767 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript gene 1018837 1019221 . + . ID=Gene:Gene:Y34D9A.11 +I Coding_transcript mRNA 1018837 1019221 . + . ID=Transcript:Gene:Y34D9A.11.1;Parent=Gene:Gene:Y34D9A.11 +I Coding_transcript five_prime_UTR 1018837 1018844 . + . ID=five_prime_UTR:Gene:Y34D9A.11.1;Parent=Transcript:Gene:Y34D9A.11.1 +I Coding_transcript CDS 1018845 1019159 . + 0 ID=CDS:Gene:Y34D9A.11.1;Parent=Transcript:Gene:Y34D9A.11.1 +I Coding_transcript three_prime_UTR 1019160 1019221 . + . ID=three_prime_UTR:Gene:Y34D9A.11.1;Parent=Transcript:Gene:Y34D9A.11.1 +I Coding_transcript gene 1034474 1040870 . + . ID=Gene:Gene:Y34D9A.3 +I Coding_transcript mRNA 1034474 1040870 . + . ID=Transcript:Gene:Y34D9A.3.1;Parent=Gene:Gene:Y34D9A.3 +I Coding_transcript five_prime_UTR 1034474 1034474 . + . ID=five_prime_UTR:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1034475 1034499 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1034549 1034646 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1034693 1034881 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1034934 1035065 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1035123 1035246 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1035306 1035398 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1037314 1037453 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1037508 1037715 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1039055 1039296 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1039340 1039883 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1040116 1040648 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1040700 1040765 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript three_prime_UTR 1040766 1040870 . + . ID=three_prime_UTR:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript gene 1049596 1050714 . + . ID=Gene:Gene:Y34D9A.6 +I Coding_transcript mRNA 1049596 1050714 . + . ID=Transcript:Gene:Y34D9A.6.1;Parent=Gene:Gene:Y34D9A.6 +I Coding_transcript five_prime_UTR 1049596 1049604 . + . ID=five_prime_UTR:Gene:Y34D9A.6.1;Parent=Transcript:Gene:Y34D9A.6.1 +I Coding_transcript CDS 1049605 1049688 . + 0 ID=CDS:Gene:Y34D9A.6.1;Parent=Transcript:Gene:Y34D9A.6.1 +I Coding_transcript CDS 1050407 1050640 . + 0 ID=CDS:Gene:Y34D9A.6.1;Parent=Transcript:Gene:Y34D9A.6.1 +I Coding_transcript three_prime_UTR 1050641 1050714 . + . ID=three_prime_UTR:Gene:Y34D9A.6.1;Parent=Transcript:Gene:Y34D9A.6.1 +I Coding_transcript gene 1062295 1065271 . + . ID=Gene:Gene:Y34D9A.2 +I Coding_transcript mRNA 1062295 1065271 . + . ID=Transcript:Gene:Y34D9A.2.1;Parent=Gene:Gene:Y34D9A.2 +I Coding_transcript five_prime_UTR 1062295 1062448 . + . ID=five_prime_UTR:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1 +I Coding_transcript CDS 1062449 1062757 . + 0 ID=CDS:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1 +I Coding_transcript CDS 1063602 1064008 . + 0 ID=CDS:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1 +I Coding_transcript CDS 1064656 1064920 . + 1 ID=CDS:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1 +I Coding_transcript three_prime_UTR 1064921 1065271 . + . ID=three_prime_UTR:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1 +I Coding_transcript gene 1068593 1075012 . + . ID=Gene:Gene:R06A10.2.1 +I Coding_transcript mRNA 1068593 1075012 . + . ID=Transcript:Gene:R06A10.2.1.1;Parent=Gene:Gene:R06A10.2.1 +I Coding_transcript five_prime_UTR 1068593 1068997 . + . ID=five_prime_UTR:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1068998 1069090 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1069416 1069577 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1070370 1070489 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1071472 1071569 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1071623 1071751 . + 1 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1071801 1071935 . + 1 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1072675 1072918 . + 1 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1074199 1074345 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript three_prime_UTR 1074346 1075012 . + . ID=three_prime_UTR:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript mRNA 1068613 1074345 . + . ID=Transcript:Gene:R06A10.2.1.2;Parent=Gene:Gene:R06A10.2.1 +I Coding_transcript five_prime_UTR 1068613 1068893 . + . ID=five_prime_UTR:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript five_prime_UTR 1068976 1068997 . + . ID=five_prime_UTR:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1068998 1069090 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1069416 1069577 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1070370 1070489 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1071472 1071569 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1071623 1071751 . + 1 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1071801 1071935 . + 1 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1072675 1072918 . + 1 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1074199 1074345 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript gene 1128326 1131739 . + . ID=Gene:Gene:ZK993.1 +I Coding_transcript mRNA 1128326 1131739 . + . ID=Transcript:Gene:ZK993.1.1;Parent=Gene:Gene:ZK993.1 +I Coding_transcript five_prime_UTR 1128326 1128360 . + . ID=five_prime_UTR:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript CDS 1128361 1128428 . + 0 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript CDS 1129212 1129396 . + 1 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript CDS 1129808 1130016 . + 2 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript CDS 1131224 1131289 . + 0 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript CDS 1131475 1131636 . + 0 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript three_prime_UTR 1131637 1131739 . + . ID=three_prime_UTR:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript gene 1337021 1338121 . + . ID=Gene:Gene:K12C11.6 +I Coding_transcript mRNA 1337021 1338121 . + . ID=Transcript:Gene:K12C11.6.1;Parent=Gene:Gene:K12C11.6 +I Coding_transcript CDS 1337021 1337117 . + 0 ID=CDS:Gene:K12C11.6.1;Parent=Transcript:Gene:K12C11.6.1 +I Coding_transcript CDS 1337256 1337373 . + 2 ID=CDS:Gene:K12C11.6.1;Parent=Transcript:Gene:K12C11.6.1 +I Coding_transcript CDS 1337938 1338121 . + 1 ID=CDS:Gene:K12C11.6.1;Parent=Transcript:Gene:K12C11.6.1 +I Coding_transcript gene 1340679 1341259 . + . ID=Gene:Gene:K12C11.2.1 +I Coding_transcript mRNA 1340679 1341259 . + . ID=Transcript:Gene:K12C11.2.1.1;Parent=Gene:Gene:K12C11.2.1 +I Coding_transcript five_prime_UTR 1340679 1340703 . + . ID=five_prime_UTR:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1 +I Coding_transcript CDS 1340704 1340769 . + 0 ID=CDS:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1 +I Coding_transcript CDS 1340826 1340926 . + 0 ID=CDS:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1 +I Coding_transcript CDS 1340977 1341085 . + 1 ID=CDS:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1 +I Coding_transcript three_prime_UTR 1341086 1341259 . + . ID=three_prime_UTR:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1 +I Coding_transcript mRNA 1340704 1341253 . + . ID=Transcript:Gene:K12C11.2.1.2;Parent=Gene:Gene:K12C11.2.1 +I Coding_transcript CDS 1340704 1340769 . + 0 ID=CDS:Gene:K12C11.2.1.2;Parent=Transcript:Gene:K12C11.2.1.2 +I Coding_transcript CDS 1340826 1340926 . + 0 ID=CDS:Gene:K12C11.2.1.2;Parent=Transcript:Gene:K12C11.2.1.2 +I Coding_transcript CDS 1340977 1341085 . + 1 ID=CDS:Gene:K12C11.2.1.2;Parent=Transcript:Gene:K12C11.2.1.2 +I Coding_transcript three_prime_UTR 1341086 1341253 . + . ID=three_prime_UTR:Gene:K12C11.2.1.2;Parent=Transcript:Gene:K12C11.2.1.2 +I Coding_transcript gene 1394570 1402943 . + . ID=Gene:Gene:Y92H12BR.7 +I Coding_transcript mRNA 1394570 1402943 . + . ID=Transcript:Gene:Y92H12BR.7.1;Parent=Gene:Gene:Y92H12BR.7 +I Coding_transcript CDS 1394570 1394781 . + 0 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1 +I Coding_transcript CDS 1395760 1395875 . + 1 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1 +I Coding_transcript CDS 1395940 1396113 . + 2 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1 +I Coding_transcript CDS 1397517 1397708 . + 2 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1 +I Coding_transcript CDS 1399169 1399348 . + 2 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1 +I Coding_transcript CDS 1402093 1402943 . + 2 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1 +I Coding_transcript gene 1405945 1413072 . + . ID=Gene:Gene:Y92H12BR.6 +I Coding_transcript mRNA 1405945 1413072 . + . ID=Transcript:Gene:Y92H12BR.6.1;Parent=Gene:Gene:Y92H12BR.6 +I Coding_transcript CDS 1405945 1406373 . + 0 ID=CDS:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1 +I Coding_transcript CDS 1408458 1408637 . + 0 ID=CDS:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1 +I Coding_transcript CDS 1411442 1411616 . + 0 ID=CDS:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1 +I Coding_transcript CDS 1412612 1413033 . + 2 ID=CDS:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1 +I Coding_transcript three_prime_UTR 1413034 1413072 . + . ID=three_prime_UTR:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1 +I Coding_transcript gene 1416592 1425131 . + . ID=Gene:Gene:Y92H12BR.3b +I Coding_transcript mRNA 1416592 1424609 . + . ID=Transcript:Gene:Y92H12BR.3b.1;Parent=Gene:Gene:Y92H12BR.3b +I Coding_transcript CDS 1416592 1416738 . + 0 ID=CDS:Gene:Y92H12BR.3b.1;Parent=Transcript:Gene:Y92H12BR.3b.1 +I Coding_transcript CDS 1416796 1416961 . + 0 ID=CDS:Gene:Y92H12BR.3b.1;Parent=Transcript:Gene:Y92H12BR.3b.1 +I Coding_transcript CDS 1422689 1422993 . + 2 ID=CDS:Gene:Y92H12BR.3b.1;Parent=Transcript:Gene:Y92H12BR.3b.1 +I Coding_transcript CDS 1424412 1424609 . + 0 ID=CDS:Gene:Y92H12BR.3b.1;Parent=Transcript:Gene:Y92H12BR.3b.1 +I Coding_transcript mRNA 1416592 1425131 . + . ID=Transcript:Gene:Y92H12BR.3b.2;Parent=Gene:Gene:Y92H12BR.3b +I Coding_transcript CDS 1416592 1416738 . + 0 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2 +I Coding_transcript CDS 1416796 1416961 . + 0 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2 +I Coding_transcript CDS 1420212 1421309 . + 2 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2 +I Coding_transcript CDS 1422689 1422993 . + 2 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2 +I Coding_transcript CDS 1424412 1424609 . + 0 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2 +I Coding_transcript three_prime_UTR 1424610 1425131 . + . ID=three_prime_UTR:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2 +I Coding_transcript gene 1483084 1490474 . + . ID=Gene:Gene:F47G6.1 +I Coding_transcript mRNA 1483084 1490474 . + . ID=Transcript:Gene:F47G6.1.1;Parent=Gene:Gene:F47G6.1 +I Coding_transcript five_prime_UTR 1483084 1483106 . + . ID=five_prime_UTR:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1483107 1483236 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1484720 1484877 . + 2 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1484924 1485060 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1485927 1486012 . + 1 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1486057 1486252 . + 2 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1487180 1487411 . + 1 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1487770 1488096 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1488425 1488676 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1489069 1489282 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1490282 1490322 . + 2 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript three_prime_UTR 1490323 1490474 . + . ID=three_prime_UTR:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript gene 1514763 1527350 . + . ID=Gene:Gene:Y92H12A.4 +I Coding_transcript mRNA 1514763 1527350 . + . ID=Transcript:Gene:Y92H12A.4.1;Parent=Gene:Gene:Y92H12A.4 +I Coding_transcript CDS 1514763 1514900 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1514955 1515059 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1515111 1515186 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1515233 1515396 . + 2 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1517534 1518121 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1520159 1520271 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1522175 1522459 . + 1 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1524299 1524644 . + 1 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1525846 1526006 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1526978 1527350 . + 1 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript gene 1610391 1619944 . + . ID=Gene:Gene:Y73E7A.9 +I Coding_transcript mRNA 1610391 1619944 . + . ID=Transcript:Gene:Y73E7A.9.1;Parent=Gene:Gene:Y73E7A.9 +I Coding_transcript CDS 1610391 1610504 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1610564 1610901 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1611487 1611595 . + 1 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1611747 1611828 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1613478 1613896 . + 2 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1616100 1616261 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1617834 1617969 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1619628 1619944 . + 2 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript gene 1629004 1633494 . + . ID=Gene:Gene:Y73E7A.1a.1 +I Coding_transcript mRNA 1629004 1633494 . + . ID=Transcript:Gene:Y73E7A.1a.1.1;Parent=Gene:Gene:Y73E7A.1a.1 +I Coding_transcript five_prime_UTR 1629004 1629022 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript CDS 1629023 1629118 . + 0 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript CDS 1629164 1629226 . + 0 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript CDS 1629283 1629410 . + 0 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript CDS 1630554 1630794 . + 1 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript CDS 1633222 1633365 . + 0 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript three_prime_UTR 1633366 1633494 . + . ID=three_prime_UTR:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript mRNA 1629006 1633365 . + . ID=Transcript:Gene:Y73E7A.1a.1.2;Parent=Gene:Gene:Y73E7A.1a.1 +I Coding_transcript five_prime_UTR 1629006 1629022 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2 +I Coding_transcript CDS 1629023 1629118 . + 0 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2 +I Coding_transcript CDS 1629164 1629226 . + 0 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2 +I Coding_transcript CDS 1629283 1629410 . + 0 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2 +I Coding_transcript CDS 1630554 1630794 . + 1 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2 +I Coding_transcript CDS 1633222 1633365 . + 0 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2 +I Coding_transcript mRNA 1629010 1633365 . + . ID=Transcript:Gene:Y73E7A.1a.1.3;Parent=Gene:Gene:Y73E7A.1a.1 +I Coding_transcript five_prime_UTR 1629010 1629118 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3 +I Coding_transcript five_prime_UTR 1629164 1629225 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3 +I Coding_transcript five_prime_UTR 1629281 1629354 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3 +I Coding_transcript CDS 1629355 1629410 . + 0 ID=CDS:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3 +I Coding_transcript CDS 1630554 1630794 . + 1 ID=CDS:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3 +I Coding_transcript CDS 1633222 1633365 . + 0 ID=CDS:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3 +I Coding_transcript gene 1652917 1655337 . + . ID=Gene:Gene:Y71G12B.18 +I Coding_transcript mRNA 1652917 1655337 . + . ID=Transcript:Gene:Y71G12B.18.1;Parent=Gene:Gene:Y71G12B.18 +I Coding_transcript CDS 1652917 1653000 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1 +I Coding_transcript CDS 1653438 1653755 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1 +I Coding_transcript CDS 1653901 1653981 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1 +I Coding_transcript CDS 1654754 1654975 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1 +I Coding_transcript CDS 1655026 1655334 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1 +I Coding_transcript three_prime_UTR 1655335 1655337 . + . ID=three_prime_UTR:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1 +I Coding_transcript gene 1681814 1685064 . + . ID=Gene:Gene:Y71G12B.16 +I Coding_transcript mRNA 1681814 1685064 . + . ID=Transcript:Gene:Y71G12B.16.1;Parent=Gene:Gene:Y71G12B.16 +I Coding_transcript CDS 1681814 1681899 . + 0 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1 +I Coding_transcript CDS 1682831 1683432 . + 1 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1 +I Coding_transcript CDS 1683480 1683521 . + 2 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1 +I Coding_transcript CDS 1684382 1684450 . + 2 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1 +I Coding_transcript CDS 1684829 1685064 . + 2 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions.info Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,5 @@ +The files contain example data from 50 genes of the C. elegans genome on chromosome I. + +These files are: +* the genome annotation in GFF3 format +* two small set of aligned reads in SAM format (generated with PalMapper) from SRX001872 (http://www.ncbi.nlm.nih.gov/sra/SRX001872?report=full) and SRX001875 (http://www.ncbi.nlm.nih.gov/sra/SRX001875?report=full)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions_deseq.txt Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,51 @@ +deseq_c_elegans_WS200.I.regions.SRX001872_vs._deseq_c_elegans_WS200.I.regions.SRX001875 +Gene:Gene:Y48G1BL.1 0.298162984580380 +Gene:Gene:F53G12.8 0.0100203770664889 +Gene:Gene:F56A6.1b 0.196356415924958 +Gene:Gene:F56C11.6b 0.131729472518535 +Gene:Gene:Y95B8A.7 0.82068108848155 +Gene:Gene:Y34D9A.2 0.0853090936357318 +Gene:Gene:R06A10.2.1 0.0729242320545049 +Gene:Gene:W04C9.2 0.566684334319464 +Gene:Gene:Y48G1C.2.1 0.600568879247855 +Gene:Gene:Y71G12B.18 0.436992089489852 +Gene:Gene:Y48G1C.9.1 0.653680190916246 +Gene:Gene:Y48G1C.12 0.962705668820165 +Gene:Gene:Y92H12A.4 0.71745387890079 +Gene:Gene:Y34D9A.6 0.554353505200952 +Gene:Gene:Y74C9A.2.2 0.193317121924372 +Gene:Gene:Y73E7A.1a.1 1 +Gene:Gene:Y48G1C.11 0.343541645863403 +Gene:Gene:ZK993.1 0.160531710602269 +Gene:Gene:Y73E7A.9 0.505459732270159 +Gene:Gene:Y34D9A.11 0.0963318383936614 +Gene:Gene:Y92H12BR.7 0.640348677865408 +Gene:Gene:Y92H12BR.3b 0.91026935144141 +Gene:Gene:Y65B4BR.8 0.838122463966569 +Gene:Gene:Y71F9AM.5b 0.971282320939981 +Gene:Gene:R119.7 0.0391196294523181 +Gene:Gene:F56C11.5b 0.457982589114528 +Gene:Gene:Y92H12BR.6 0.487521663730281 +Gene:Gene:Y95B8A.6a.2 0.927779341088235 +Gene:Gene:F53G12.7 0.196705810975026 +Gene:Gene:Y48G1A.2 0.453586101344411 +Gene:Gene:C53D5.1c.1 0.391738974840997 +Gene:Gene:Y34D9A.3 0.71006993005907 +Gene:Gene:Y18H1A.6 0.329792105744889 +Gene:Gene:T06A4.1b 0.295665526024382 +Gene:Gene:F53G12.3 0.631494248315874 +Gene:Gene:R119.6 0.651145530807306 +Gene:Gene:Y95B8A.5 0.512001544838653 +Gene:Gene:Y65B4BR.4b 0.904179315054261 +Gene:Gene:K12C11.2.1 0.364903241182862 +Gene:Gene:F53G12.5b 0.537007770889288 +Gene:Gene:Y95B8A.8 0.59137956260358 +Gene:Gene:T06A4.3a 0.171238758638891 +Gene:Gene:F53G12.4 0.949563214223464 +Gene:Gene:C54G6.2 0.0780422099961616 +Gene:Gene:W04C9.1 0.223549535364483 +Gene:Gene:K12C11.6 0.301231157430176 +Gene:Gene:F47G6.1 0.196694866924542 +Gene:Gene:Y71F9AM.4a 0.728480588565213 +Gene:Gene:Y71G12B.16 0.900935024006703 +Gene:Gene:Y65B4BR.1 0.475324346757848
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/tools/ParseGFF.py Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,362 @@ +#!/usr/bin/env python +""" +Extract genome annotation from a GFF3 (a tab delimited format +for storing sequence features and annotations: +http://www.sequenceontology.org/gff3.shtml) file. + +Usage: ParseGFF.py in.gff3 out.mat + +Requirements: + Scipy :- http://scipy.org/ + Numpy :- http://numpy.org/ + +Copyright (C) 2010-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany +""" +import re, sys, os +import scipy.io as sio +import numpy as np + +def createExon(strand_p, five_p_utr, cds_cod, three_p_utr): + """Create exon cordinates from UTR's and CDS region + """ + exon_pos = [] + if strand_p == '+': + utr5_start, utr5_end = 0, 0 + if five_p_utr != []: + utr5_start, utr5_end = five_p_utr[-1][0], five_p_utr[-1][1] + cds_5start, cds_5end = cds_cod[0][0], cds_cod[0][1] + jun_exon = [] + if cds_5start-utr5_end == 0 or cds_5start-utr5_end == 1: + jun_exon = [utr5_start, cds_5end] + if len(cds_cod) == 1: + five_prime_flag = 0 + if jun_exon != []: + five_p_utr = five_p_utr[:-1] + five_prime_flag = 1 + for utr5 in five_p_utr: + exon_pos.append(utr5) + jun_exon = [] + utr3_start, utr3_end = 0, 0 + if three_p_utr != []: + utr3_start = three_p_utr[0][0] + utr3_end = three_p_utr[0][1] + if utr3_start-cds_5end == 0 or utr3_start-cds_5end == 1: + jun_exon = [cds_5start, utr3_end] + three_prime_flag = 0 + if jun_exon != []: + cds_cod = cds_cod[:-1] + three_p_utr = three_p_utr[1:] + three_prime_flag = 1 + if five_prime_flag == 1 and three_prime_flag == 1: + exon_pos.append([utr5_start, utr3_end]) + if five_prime_flag == 1 and three_prime_flag == 0: + exon_pos.append([utr5_start, cds_5end]) + cds_cod = cds_cod[:-1] + if five_prime_flag == 0 and three_prime_flag == 1: + exon_pos.append([cds_5start, utr3_end]) + for cds in cds_cod: + exon_pos.append(cds) + for utr3 in three_p_utr: + exon_pos.append(utr3) + else: + if jun_exon != []: + five_p_utr = five_p_utr[:-1] + cds_cod = cds_cod[1:] + for utr5 in five_p_utr: + exon_pos.append(utr5) + exon_pos.append(jun_exon) if jun_exon != [] else '' + jun_exon = [] + utr3_start, utr3_end = 0, 0 + if three_p_utr != []: + utr3_start = three_p_utr[0][0] + utr3_end = three_p_utr[0][1] + cds_3start = cds_cod[-1][0] + cds_3end = cds_cod[-1][1] + if utr3_start-cds_3end == 0 or utr3_start-cds_3end == 1: + jun_exon = [cds_3start, utr3_end] + if jun_exon != []: + cds_cod = cds_cod[:-1] + three_p_utr = three_p_utr[1:] + for cds in cds_cod: + exon_pos.append(cds) + exon_pos.append(jun_exon) if jun_exon != [] else '' + for utr3 in three_p_utr: + exon_pos.append(utr3) + elif strand_p == '-': + utr3_start, utr3_end = 0, 0 + if three_p_utr != []: + utr3_start = three_p_utr[-1][0] + utr3_end = three_p_utr[-1][1] + cds_3start = cds_cod[0][0] + cds_3end = cds_cod[0][1] + jun_exon = [] + if cds_3start-utr3_end == 0 or cds_3start-utr3_end == 1: + jun_exon = [utr3_start, cds_3end] + if len(cds_cod) == 1: + three_prime_flag = 0 + if jun_exon != []: + three_p_utr = three_p_utr[:-1] + three_prime_flag = 1 + for utr3 in three_p_utr: + exon_pos.append(utr3) + jun_exon = [] + (utr5_start, utr5_end) = (0, 0) + if five_p_utr != []: + utr5_start = five_p_utr[0][0] + utr5_end = five_p_utr[0][1] + if utr5_start-cds_3end == 0 or utr5_start-cds_3end == 1: + jun_exon = [cds_3start, utr5_end] + five_prime_flag = 0 + if jun_exon != []: + cds_cod = cds_cod[:-1] + five_p_utr = five_p_utr[1:] + five_prime_flag = 1 + if three_prime_flag == 1 and five_prime_flag == 1: + exon_pos.append([utr3_start, utr5_end]) + if three_prime_flag == 1 and five_prime_flag == 0: + exon_pos.append([utr3_start, cds_3end]) + cds_cod = cds_cod[:-1] + if three_prime_flag == 0 and five_prime_flag == 1: + exon_pos.append([cds_3start, utr5_end]) + for cds in cds_cod: + exon_pos.append(cds) + for utr5 in five_p_utr: + exon_pos.append(utr5) + else: + if jun_exon != []: + three_p_utr = three_p_utr[:-1] + cds_cod = cds_cod[1:] + for utr3 in three_p_utr: + exon_pos.append(utr3) + if jun_exon != []: + exon_pos.append(jun_exon) + jun_exon = [] + (utr5_start, utr5_end) = (0, 0) + if five_p_utr != []: + utr5_start = five_p_utr[0][0] + utr5_end = five_p_utr[0][1] + cds_5start = cds_cod[-1][0] + cds_5end = cds_cod[-1][1] + if utr5_start-cds_5end == 0 or utr5_start-cds_5end == 1: + jun_exon = [cds_5start, utr5_end] + if jun_exon != []: + cds_cod = cds_cod[:-1] + five_p_utr = five_p_utr[1:] + for cds in cds_cod: + exon_pos.append(cds) + if jun_exon != []: + exon_pos.append(jun_exon) + for utr5 in five_p_utr: + exon_pos.append(utr5) + return exon_pos + +def init_gene(): + """Initializing the gene structure + """ + gene_details=dict(chr='', + exons=[], + gene_info={}, + id='', + is_alt_spliced=0, + name='', + source='', + start='', + stop='', + strand='', + transcripts=[]) + return gene_details + +def FeatureValueFormat(singlegene): + """Make feature value compactable to write in a .mat format + """ + comp_exon = np.zeros((len(singlegene['exons']),), dtype=np.object) + for i in range(len(singlegene['exons'])): + comp_exon[i]= np.array(singlegene['exons'][i]) + singlegene['exons'] = comp_exon + comp_transcripts = np.zeros((len(singlegene['transcripts']),), dtype=np.object) + for i in range(len(singlegene['transcripts'])): + comp_transcripts[i] = np.array(singlegene['transcripts'][i]) + singlegene['transcripts'] = comp_transcripts + return singlegene + +def CreateGeneModels(genes_cmpt, transcripts_cmpt, exons_cmpt, utr3_cmpt, utr5_cmpt, cds_cmpt): + """Creating Coding/Non-coding gene models from parsed GFF objects. + """ + gene_counter, gene_models=1, [] + for gene_entry in genes_cmpt: ## Figure out the genes and transcripts associated feature + if gene_entry in transcripts_cmpt: + gene=init_gene() + gene['id']=gene_counter + gene['name']=gene_entry[1] + gene['chr']=genes_cmpt[gene_entry]['chr'] + gene['source']=genes_cmpt[gene_entry]['source'] + gene['start']=genes_cmpt[gene_entry]['start'] + gene['stop']=genes_cmpt[gene_entry]['stop'] + gene['strand']=genes_cmpt[gene_entry]['strand'] + if not gene['strand'] in ['+', '-']: + gene['strand']='.' # Strand info not known replaced with a dot symbol instead of None, ?, . etc. + gene['gene_info']=dict(ID=gene_entry[1]) + if len(transcripts_cmpt[gene_entry])>1: + gene['is_alt_spliced'] = 1 + for tids in transcripts_cmpt[gene_entry]: ## transcript section related tags + gene['transcripts'].append(tids['ID']) + if len(exons_cmpt) != 0: + if (gene['chr'], tids['ID']) in exons_cmpt: + exon_cod=[[feat_exon['start'], feat_exon['stop']] for feat_exon in exons_cmpt[(gene['chr'], tids['ID'])]] + else: ## build exon coordinates from UTR3, UTR5 and CDS + utr5_pos, cds_pos, utr3_pos = [], [], [] + if (gene['chr'], tids['ID']) in utr5_cmpt: + utr5_pos=[[feat_utr5['start'], feat_utr5['stop']] for feat_utr5 in utr5_cmpt[(gene['chr'], tids['ID'])]] + if (gene['chr'], tids['ID']) in cds_cmpt: + cds_pos=[[feat_cds['start'], feat_cds['stop']] for feat_cds in cds_cmpt[(gene['chr'], tids['ID'])]] + if (gene['chr'], tids['ID']) in utr3_cmpt: + utr3_pos=[[feat_utr3['start'], feat_utr3['stop']] for feat_utr3 in utr3_cmpt[(gene['chr'], tids['ID'])]] + exon_cod=createExon(gene['strand'], utr5_pos, cds_pos, utr3_pos) + if gene['strand']=='-': + if len(exon_cod) >1: + if exon_cod[0][0] > exon_cod[-1][0]: + exon_cod.reverse() + if exon_cod: + gene['exons'].append(exon_cod) + gene=FeatureValueFormat(gene) # get prepare for MAT writing + gene_counter+=1 + gene_models.append(gene) + return gene_models + +def GFFParse(gff_file): + """Parsing GFF file based on feature relationship. + """ + genes, utr5, exons=dict(), dict(), dict() + transcripts, utr3, cds=dict(), dict(), dict() + # TODO Include growing key words of different non-coding/coding transcripts + features=['mRNA', 'transcript', 'ncRNA', 'miRNA', 'pseudogenic_transcript', 'rRNA', 'snoRNA', 'snRNA', 'tRNA', 'scRNA'] + gff_handle=open(gff_file, "rU") + for gff_line in gff_handle: + gff_line=gff_line.strip('\n\r').split('\t') + if re.match(r'#|>', gff_line[0]): # skip commented line and fasta identifier line + continue + if len(gff_line)==1: # skip fasta sequence/empty line if present + continue + assert len(gff_line)==9, '\t'.join(gff_line) # not found 9 tab-delimited fields in this line + if '' in gff_line: # skip this line if there any field with an empty value + print 'Skipping..', '\t'.join(gff_line) + continue + if gff_line[-1][-1]==';': # trim the last ';' character + gff_line[-1]=gff_line[-1].strip(';') + if gff_line[2] in ['gene', 'pseudogene']: + gid, gene_info=None, dict() + gene_info['start']=int(gff_line[3]) + gene_info['stop']=int(gff_line[4]) + gene_info['chr']=gff_line[0] + gene_info['source']=gff_line[1] + gene_info['strand']=gff_line[6] + for attb in gff_line[-1].split(';'): + attb=attb.split('=') # gff attributes are separated by key=value pair + if attb[0]=='ID': + gid=attb[1] + break + genes[(gff_line[0], gid)]=gene_info # store gene information based on the chromosome and gene symbol. + elif gff_line[2] in features: + gid, mrna_info=None, dict() + mrna_info['start']=int(gff_line[3]) + mrna_info['stop']=int(gff_line[4]) + mrna_info['chr']=gff_line[0] + mrna_info['strand']=gff_line[6] + for attb in gff_line[-1].split(';'): + attb=attb.split('=') + if attb[0]=='Parent': + gid=attb[1] + elif attb[0]=='ID': + mrna_info[attb[0]]=attb[1] + for fid in gid.split(','): # child may be mapped to multiple parents ex: Parent=AT01,AT01-1-Protein + if (gff_line[0], fid) in transcripts: + transcripts[(gff_line[0], fid)].append(mrna_info) + else: + transcripts[(gff_line[0], fid)]=[mrna_info] + elif gff_line[2] in ['exon']: + tids, exon_info=None, dict() + exon_info['start']=int(gff_line[3]) + exon_info['stop']=int(gff_line[4]) + exon_info['chr']=gff_line[0] + exon_info['strand']=gff_line[6] + for attb in gff_line[-1].split(';'): + attb=attb.split('=') + if attb[0]=='Parent': + tids=attb[1] + break + for tid in tids.split(','): + if (gff_line[0], tid) in exons: + exons[(gff_line[0], tid)].append(exon_info) + else: + exons[(gff_line[0], tid)]=[exon_info] + elif gff_line[2] in ['five_prime_UTR']: + utr5_info, tids=dict(), None + utr5_info['start']=int(gff_line[3]) + utr5_info['stop']=int(gff_line[4]) + utr5_info['chr']=gff_line[0] + utr5_info['strand']=gff_line[6] + for attb in gff_line[-1].split(';'): + attb=attb.split('=') + if attb[0]=='Parent': + tids=attb[1] + break + for tid in tids.split(','): + if (gff_line[0], tid) in utr5: + utr5[(gff_line[0], tid)].append(utr5_info) + else: + utr5[(gff_line[0], tid)]=[utr5_info] + elif gff_line[2] in ['CDS']: + cds_info, tids=dict(), None + cds_info['start']=int(gff_line[3]) + cds_info['stop']=int(gff_line[4]) + cds_info['chr']=gff_line[0] + cds_info['strand']=gff_line[6] + for attb in gff_line[-1].split(';'): + attb=attb.split('=') + if attb[0]=='Parent': + tids=attb[1] + break + for tid in tids.split(','): + if (gff_line[0], tid) in cds: + cds[(gff_line[0], tid)].append(cds_info) + else: + cds[(gff_line[0], tid)]=[cds_info] + elif gff_line[2] in ['three_prime_UTR']: + utr3_info, tids=dict(), None + utr3_info['start']=int(gff_line[3]) + utr3_info['stop']=int(gff_line[4]) + utr3_info['chr']=gff_line[0] + utr3_info['strand']=gff_line[6] + for attb in gff_line[-1].split(';'): + attb=attb.split('=') + if attb[0]=='Parent': + tids=attb[1] + break + for tid in tids.split(','): + if (gff_line[0], tid) in utr3: + utr3[(gff_line[0], tid)].append(utr3_info) + else: + utr3[(gff_line[0], tid)]=[utr3_info] + gff_handle.close() + return genes, transcripts, exons, utr3, utr5, cds + +def __main__(): + """This function provides a best way to extract genome feature + information from a GFF3 file for the rQuant downstream processing. + """ + try: + gff_file = sys.argv[1] + mat_file = sys.argv[2] + except: + print __doc__ + sys.exit(-1) + genes, transcripts, exons, utr3, utr5, cds=GFFParse(gff_file) + gene_models=CreateGeneModels(genes, transcripts, exons, utr3, utr5, cds) + # TODO Write to matlab/octave struct instead of cell arrays. + sio.savemat(mat_file, + mdict=dict(genes=gene_models), + format='5', + oned_as='row') + +if __name__=='__main__': + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/tools/determine_engine.m Wed May 09 20:43:47 2012 -0400 @@ -0,0 +1,10 @@ +function engine = determine_engine() + +lserve=license; +if ~isequal(lserve, 'GNU General Public License'), + engine='matlab'; +else + engine='octave'; +end; + +return
