Mercurial > repos > devteam > count_gff_features
changeset 0:fabda887a71f draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 28 Jul 2014 11:56:10 -0400 |
parents | |
children | 188392a0d0a8 |
files | count_gff_features.py count_gff_features.xml test-data/count_gff_features_out1.txt test-data/count_gff_features_out2.txt test-data/gff2bed_in2.gff test-data/gff_filter_by_feature_count_out1.gff tool_dependencies.xml |
diffstat | 7 files changed, 110 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/count_gff_features.py Mon Jul 28 11:56:10 2014 -0400 @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# This tool takes a gff file as input and counts the number of features in it. + +import sys, fileinput +from galaxy import eggs +from galaxy.datatypes.util.gff_util import GFFReaderWrapper +from bx.intervals.io import GenomicInterval + +# Get args. +input_file = sys.argv[1:] + +# Count features. +count = 0 +for feature in GFFReaderWrapper( fileinput.FileInput( input_file ), fix_strand=True ): + if isinstance( feature, GenomicInterval ): + count += 1 + +print count \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/count_gff_features.xml Mon Jul 28 11:56:10 2014 -0400 @@ -0,0 +1,29 @@ +<tool id="count_gff_features" name="Count GFF Features" version="0.1"> + <description></description> + <requirements> + <requirement type="package" version="0.7.1">bx-python</requirement> + </requirements> + <command interpreter="python"> + count_gff_features.py $input > $output + </command> + <inputs> + <param format="gff" name="input" type="data" label="GFF Dataset to Filter"/> + </inputs> + <outputs> + <data format="txt" name="output"/> + </outputs> + <tests> + <test> + <param name="input" value="gff2bed_in2.gff"/> + <output name="output" file="count_gff_features_out1.txt"/> + </test> + <test> + <param name="input" value="gff_filter_by_feature_count_out1.gff"/> + <output name="output" file="count_gff_features_out2.txt"/> + </test> + </tests> + <help> + Counts the number of features in a GFF dataset. GFF features are often spread across multiple lines; this tool counts the number of + features in dataset rather than the number of lines. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/count_gff_features_out1.txt Mon Jul 28 11:56:10 2014 -0400 @@ -0,0 +1,1 @@ +3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/count_gff_features_out2.txt Mon Jul 28 11:56:10 2014 -0400 @@ -0,0 +1,1 @@ +14
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gff2bed_in2.gff Mon Jul 28 11:56:10 2014 -0400 @@ -0,0 +1,13 @@ +chr1 Cufflinks exon 3204563 3207049 1000 - . gene_id "Xkr4"; transcript_id "Xkr4"; exon_number "1"; FPKM "0.3924207844"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.845549"; cov "0.022289"; +chr1 Cufflinks exon 3411783 3411982 1000 - . gene_id "Xkr4"; transcript_id "Xkr4"; exon_number "2"; FPKM "0.3924207844"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.845549"; cov "0.022289"; +chr1 Cufflinks exon 3660633 3661579 1000 - . gene_id "Xkr4"; transcript_id "Xkr4"; exon_number "3"; FPKM "0.3924207844"; frac "1.000000"; conf_lo "0.000000"; conf_hi "0.845549"; cov "0.022289"; +chr1 Cufflinks exon 4481009 4482749 1000 - . gene_id "Sox17"; transcript_id "Sox17"; exon_number "1"; FPKM "1.5186976896"; frac "1.000000"; conf_lo "0.558189"; conf_hi "2.479206"; cov "0.086262"; +chr1 Cufflinks exon 4483181 4483547 1000 - . gene_id "Sox17"; transcript_id "Sox17"; exon_number "2"; FPKM "1.5186976896"; frac "1.000000"; conf_lo "0.558189"; conf_hi "2.479206"; cov "0.086262"; +chr1 Cufflinks exon 4483853 4483944 1000 - . gene_id "Sox17"; transcript_id "Sox17"; exon_number "3"; FPKM "1.5186976896"; frac "1.000000"; conf_lo "0.558189"; conf_hi "2.479206"; cov "0.086262"; +chr1 Cufflinks exon 4485217 4486023 1000 - . gene_id "Sox17"; transcript_id "Sox17"; exon_number "4"; FPKM "1.5186976896"; frac "1.000000"; conf_lo "0.558189"; conf_hi "2.479206"; cov "0.086262"; +chr1 Cufflinks exon 4486372 4486494 1000 - . gene_id "Sox17"; transcript_id "Sox17"; exon_number "5"; FPKM "1.5186976896"; frac "1.000000"; conf_lo "0.558189"; conf_hi "2.479206"; cov "0.086262"; +chr1 Cufflinks exon 4763279 4766882 1000 - . gene_id "Mrpl15"; transcript_id "Mrpl15"; exon_number "1"; FPKM "9.1596238357"; frac "1.000000"; conf_lo "7.124003"; conf_hi "11.195245"; cov "0.520266"; +chr1 Cufflinks exon 4767606 4767729 1000 - . gene_id "Mrpl15"; transcript_id "Mrpl15"; exon_number "2"; FPKM "9.1596238357"; frac "1.000000"; conf_lo "7.124003"; conf_hi "11.195245"; cov "0.520266"; +chr1 Cufflinks exon 4772649 4772814 1000 - . gene_id "Mrpl15"; transcript_id "Mrpl15"; exon_number "3"; FPKM "9.1596238357"; frac "1.000000"; conf_lo "7.124003"; conf_hi "11.195245"; cov "0.520266"; +chr1 Cufflinks exon 4774032 4774186 1000 - . gene_id "Mrpl15"; transcript_id "Mrpl15"; exon_number "4"; FPKM "9.1596238357"; frac "1.000000"; conf_lo "7.124003"; conf_hi "11.195245"; cov "0.520266"; +chr1 Cufflinks exon 4775654 4775807 1000 - . gene_id "Mrpl15"; transcript_id "Mrpl15"; exon_number "5"; FPKM "9.1596238357"; frac "1.000000"; conf_lo "7.124003"; conf_hi "11.195245"; cov "0.520266";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gff_filter_by_feature_count_out1.gff Mon Jul 28 11:56:10 2014 -0400 @@ -0,0 +1,42 @@ +chr13 Cufflinks transcript 3565855 3566203 1000 - . gene_id "CUFF.50195"; transcript_id "CUFF.50195.1"; FPKM "29.8710998584"; frac "1.000000"; conf_lo "7.290671"; conf_hi "52.451529"; cov "1.909091"; +chr13 Cufflinks exon 3565855 3565913 1000 - . gene_id "CUFF.50195"; transcript_id "CUFF.50195.1"; exon_number "1"; FPKM "29.8710998584"; frac "1.000000"; conf_lo "7.290671"; conf_hi "52.451529"; cov "1.909091"; +chr13 Cufflinks exon 3566164 3566203 1000 - . gene_id "CUFF.50195"; transcript_id "CUFF.50195.1"; exon_number "2"; FPKM "29.8710998584"; frac "1.000000"; conf_lo "7.290671"; conf_hi "52.451529"; cov "1.909091"; +chr13 Cufflinks transcript 3606116 3613028 1000 - . gene_id "CUFF.50207"; transcript_id "CUFF.50207.1"; FPKM "19.6171377865"; frac "1.000000"; conf_lo "0.936995"; conf_hi "38.297281"; cov "1.253750"; +chr13 Cufflinks exon 3606116 3606146 1000 - . gene_id "CUFF.50207"; transcript_id "CUFF.50207.1"; exon_number "1"; FPKM "19.6171377865"; frac "1.000000"; conf_lo "0.936995"; conf_hi "38.297281"; cov "1.253750"; +chr13 Cufflinks exon 3612965 3613028 1000 - . gene_id "CUFF.50207"; transcript_id "CUFF.50207.1"; exon_number "2"; FPKM "19.6171377865"; frac "1.000000"; conf_lo "0.936995"; conf_hi "38.297281"; cov "1.253750"; +chr13 Cufflinks transcript 4594319 4594938 1000 - . gene_id "CUFF.50261"; transcript_id "CUFF.50261.1"; FPKM "29.3887094260"; frac "1.000000"; conf_lo "8.607754"; conf_hi "50.169665"; cov "1.878261"; +chr13 Cufflinks exon 4594319 4594400 1000 - . gene_id "CUFF.50261"; transcript_id "CUFF.50261.1"; exon_number "1"; FPKM "29.3887094260"; frac "1.000000"; conf_lo "8.607754"; conf_hi "50.169665"; cov "1.878261"; +chr13 Cufflinks exon 4594906 4594938 1000 - . gene_id "CUFF.50261"; transcript_id "CUFF.50261.1"; exon_number "2"; FPKM "29.3887094260"; frac "1.000000"; conf_lo "8.607754"; conf_hi "50.169665"; cov "1.878261"; +chr13 Cufflinks transcript 4596799 4598059 1000 - . gene_id "CUFF.50263"; transcript_id "CUFF.50263.1"; FPKM "22.8358215134"; frac "1.000000"; conf_lo "0.000000"; conf_hi "45.671643"; cov "1.459459"; +chr13 Cufflinks exon 4596799 4596828 1000 - . gene_id "CUFF.50263"; transcript_id "CUFF.50263.1"; exon_number "1"; FPKM "22.8358215134"; frac "1.000000"; conf_lo "0.000000"; conf_hi "45.671643"; cov "1.459459"; +chr13 Cufflinks exon 4598016 4598059 1000 - . gene_id "CUFF.50263"; transcript_id "CUFF.50263.1"; exon_number "2"; FPKM "22.8358215134"; frac "1.000000"; conf_lo "0.000000"; conf_hi "45.671643"; cov "1.459459"; +chr13 Cufflinks transcript 5861035 5872268 1000 - . gene_id "CUFF.50289"; transcript_id "CUFF.50289.1"; FPKM "7.5439767500"; frac "1.000000"; conf_lo "0.000000"; conf_hi "18.212771"; cov "0.482143"; +chr13 Cufflinks exon 5861035 5861117 1000 - . gene_id "CUFF.50289"; transcript_id "CUFF.50289.1"; exon_number "1"; FPKM "7.5439767500"; frac "1.000000"; conf_lo "0.000000"; conf_hi "18.212771"; cov "0.482143"; +chr13 Cufflinks exon 5872240 5872268 1000 - . gene_id "CUFF.50289"; transcript_id "CUFF.50289.1"; exon_number "2"; FPKM "7.5439767500"; frac "1.000000"; conf_lo "0.000000"; conf_hi "18.212771"; cov "0.482143"; +chr13 Cufflinks transcript 5865442 5866941 1000 + . gene_id "CUFF.50297"; transcript_id "CUFF.50297.1"; FPKM "13.2019593124"; frac "1.000000"; conf_lo "0.000000"; conf_hi "28.446269"; cov "0.843750"; +chr13 Cufflinks exon 5865442 5865510 1000 + . gene_id "CUFF.50297"; transcript_id "CUFF.50297.1"; exon_number "1"; FPKM "13.2019593124"; frac "1.000000"; conf_lo "0.000000"; conf_hi "28.446269"; cov "0.843750"; +chr13 Cufflinks exon 5866915 5866941 1000 + . gene_id "CUFF.50297"; transcript_id "CUFF.50297.1"; exon_number "2"; FPKM "13.2019593124"; frac "1.000000"; conf_lo "0.000000"; conf_hi "28.446269"; cov "0.843750"; +chr13 Cufflinks transcript 6583845 6585843 1000 - . gene_id "CUFF.50339"; transcript_id "CUFF.50339.1"; FPKM "163.2242242265"; frac "1.000000"; conf_lo "127.815919"; conf_hi "198.632530"; cov "10.431818"; +chr13 Cufflinks exon 6583845 6583946 1000 - . gene_id "CUFF.50339"; transcript_id "CUFF.50339.1"; exon_number "1"; FPKM "163.2242242265"; frac "1.000000"; conf_lo "127.815919"; conf_hi "198.632530"; cov "10.431818"; +chr13 Cufflinks exon 6585726 6585843 1000 - . gene_id "CUFF.50339"; transcript_id "CUFF.50339.1"; exon_number "2"; FPKM "163.2242242265"; frac "1.000000"; conf_lo "127.815919"; conf_hi "198.632530"; cov "10.431818"; +chr13 Cufflinks transcript 6586295 6587966 1000 - . gene_id "CUFF.50341"; transcript_id "CUFF.50341.1"; FPKM "82.5011329424"; frac "1.000000"; conf_lo "60.835274"; conf_hi "104.166992"; cov "5.272727"; +chr13 Cufflinks exon 6586295 6586359 1000 - . gene_id "CUFF.50341"; transcript_id "CUFF.50341.1"; exon_number "1"; FPKM "82.5011329424"; frac "1.000000"; conf_lo "60.835274"; conf_hi "104.166992"; cov "5.272727"; +chr13 Cufflinks exon 6587735 6587966 1000 - . gene_id "CUFF.50341"; transcript_id "CUFF.50341.1"; exon_number "2"; FPKM "82.5011329424"; frac "1.000000"; conf_lo "60.835274"; conf_hi "104.166992"; cov "5.272727"; +chr13 Cufflinks transcript 6580385 6581757 1000 - . gene_id "CUFF.50365"; transcript_id "CUFF.50365.1"; FPKM "324.9135847836"; frac "1.000000"; conf_lo "293.684884"; conf_hi "356.142286"; cov "20.765542"; +chr13 Cufflinks exon 6580385 6580838 1000 - . gene_id "CUFF.50365"; transcript_id "CUFF.50365.1"; exon_number "1"; FPKM "324.9135847836"; frac "1.000000"; conf_lo "293.684884"; conf_hi "356.142286"; cov "20.765542"; +chr13 Cufflinks exon 6581649 6581757 1000 - . gene_id "CUFF.50365"; transcript_id "CUFF.50365.1"; exon_number "2"; FPKM "324.9135847836"; frac "1.000000"; conf_lo "293.684884"; conf_hi "356.142286"; cov "20.765542"; +chr13 Cufflinks transcript 8803760 8819743 1000 + . gene_id "CUFF.50481"; transcript_id "CUFF.50481.1"; FPKM "15.1783005269"; frac "1.000000"; conf_lo "2.785270"; conf_hi "27.571331"; cov "0.970060"; +chr13 Cufflinks exon 8803760 8803879 1000 + . gene_id "CUFF.50481"; transcript_id "CUFF.50481.1"; exon_number "1"; FPKM "15.1783005269"; frac "1.000000"; conf_lo "2.785270"; conf_hi "27.571331"; cov "0.970060"; +chr13 Cufflinks exon 8819697 8819743 1000 + . gene_id "CUFF.50481"; transcript_id "CUFF.50481.1"; exon_number "2"; FPKM "15.1783005269"; frac "1.000000"; conf_lo "2.785270"; conf_hi "27.571331"; cov "0.970060"; +chr13 Cufflinks transcript 8855128 8864773 1000 - . gene_id "CUFF.50497"; transcript_id "CUFF.50497.1"; FPKM "6.4009499697"; frac "1.000000"; conf_lo "0.000000"; conf_hi "19.202850"; cov "0.409091"; +chr13 Cufflinks exon 8855128 8855158 1000 - . gene_id "CUFF.50497"; transcript_id "CUFF.50497.1"; exon_number "1"; FPKM "6.4009499697"; frac "1.000000"; conf_lo "0.000000"; conf_hi "19.202850"; cov "0.409091"; +chr13 Cufflinks exon 8864739 8864773 1000 - . gene_id "CUFF.50497"; transcript_id "CUFF.50497.1"; exon_number "2"; FPKM "6.4009499697"; frac "1.000000"; conf_lo "0.000000"; conf_hi "19.202850"; cov "0.409091"; +chr13 Cufflinks transcript 9169898 9172437 1000 + . gene_id "CUFF.50509"; transcript_id "CUFF.50509.1"; FPKM "41.4918721248"; frac "1.000000"; conf_lo "16.471332"; conf_hi "66.512412"; cov "2.651786"; +chr13 Cufflinks exon 9169898 9169928 1000 + . gene_id "CUFF.50509"; transcript_id "CUFF.50509.1"; exon_number "1"; FPKM "41.4918721248"; frac "1.000000"; conf_lo "16.471332"; conf_hi "66.512412"; cov "2.651786"; +chr13 Cufflinks exon 9172357 9172437 1000 + . gene_id "CUFF.50509"; transcript_id "CUFF.50509.1"; exon_number "2"; FPKM "41.4918721248"; frac "1.000000"; conf_lo "16.471332"; conf_hi "66.512412"; cov "2.651786"; +chr13 Cufflinks transcript 9353602 9373527 1000 - . gene_id "CUFF.50527"; transcript_id "CUFF.50527.1"; FPKM "16.2485653076"; frac "1.000000"; conf_lo "0.000000"; conf_hi "35.010792"; cov "1.038462"; +chr13 Cufflinks exon 9353602 9353648 1000 - . gene_id "CUFF.50527"; transcript_id "CUFF.50527.1"; exon_number "1"; FPKM "16.2485653076"; frac "1.000000"; conf_lo "0.000000"; conf_hi "35.010792"; cov "1.038462"; +chr13 Cufflinks exon 9373497 9373527 1000 - . gene_id "CUFF.50527"; transcript_id "CUFF.50527.1"; exon_number "2"; FPKM "16.2485653076"; frac "1.000000"; conf_lo "0.000000"; conf_hi "35.010792"; cov "1.038462"; +chr13 Cufflinks transcript 9586173 9593034 1000 - . gene_id "CUFF.50563"; transcript_id "CUFF.50563.1"; FPKM "10.3039682439"; frac "1.000000"; conf_lo "0.000000"; conf_hi "24.875980"; cov "0.658537"; +chr13 Cufflinks exon 9586173 9586218 1000 - . gene_id "CUFF.50563"; transcript_id "CUFF.50563.1"; exon_number "1"; FPKM "10.3039682439"; frac "1.000000"; conf_lo "0.000000"; conf_hi "24.875980"; cov "0.658537"; +chr13 Cufflinks exon 9592999 9593034 1000 - . gene_id "CUFF.50563"; transcript_id "CUFF.50563.1"; exon_number "2"; FPKM "10.3039682439"; frac "1.000000"; conf_lo "0.000000"; conf_hi "24.875980"; cov "0.658537";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Jul 28 11:56:10 2014 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="bx-python" version="0.7.1"> + <repository changeset_revision="2d0c08728bca" name="package_bx_python_0_7" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>