view fml_gff_converter_programs/scripts/gff3_to_gtf_converter.pl @ 0:ed53dca1c6ff

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author vipints
date Tue, 07 Jun 2011 17:26:20 -0400
parents
children
line wrap: on
line source

#!/usr/bin/env perl
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Written (W) 2010 Vipin T Sreedharan, Friedrich Miescher Laboratory of the Max Planck Society
# Copyright (C) 2010 Max Planck Society
#
# Description : Convert a GFF3 format file to GTF format.
 
use strict;
use warnings;

use lib '/home/galaxy/perl5/share/perl/5.8.8/';
use Bio::FeatureIO;

my $usage = q(
gff3_to_gtf_converter.pl  - Program to convert a valid GFF3 format file to GTF format.
USAGE: gff3_to_gtf_converter.pl <GFF3 file name> <output file name>
);

# requirement check
if (scalar(@ARGV) != 2) {
    print $usage;
    exit
}

my $inFile = $ARGV[0]; 
my $outFile = $ARGV[1];
 
my $inGFF = Bio::FeatureIO->new( '-file' => "$inFile",
 '-format' => 'GFF',
 '-version' => 3 );
my $outGTF = Bio::FeatureIO->new( '-file' => ">$outFile",
 '-format' => 'GFF',
 '-version' => 2.5);

my ($gene, $feature_type, $exon_exon_cnt, $cds_exon_cnt) = ('', '', 0, 0);

while (my $feature = $inGFF->next_feature() ) {
    # from a valid GFF3 file try to get transcript information.
    if ($feature->type->name eq 'mRNA' || $feature->type->name eq 'miRNA' || $feature->type->name eq 'ncRNA' || $feature->type->name eq 'rRNA' || $feature->type->name eq 'snoRNA' || $feature->type->name eq 'snRNA' || $feature->type->name eq 'tRNA' || $feature->type->name eq 'misc_RNA' || $feature->type->name eq 'processed_transcript' || $feature->type->name eq 'transcript' || $feature->type->name eq 'scRNA') {
        my $parent = ($feature->get_Annotations('Parent'))[0]; 
        $gene = $parent->value;
        $cds_exon_cnt = 1; 
        $exon_exon_cnt = 1;
        $feature_type = $feature->type->name;
    }
    if ($feature->type->name eq 'exon' || $feature->type->name eq 'CDS' ||$feature->type->name eq 'stop_codon' || $feature->type->name eq 'start_codon') {
        my $parent = ($feature->get_Annotations('Parent'))[0];
        my $transcript = $parent->value;
       
        my ($col_exon_number, $protein_id) = ('', '');
        if ($feature->type->name eq 'exon') {
            $col_exon_number = Bio::Annotation::SimpleValue->new( '-value' => $exon_exon_cnt, '-tagname' => 'exon_number');
            $exon_exon_cnt++;
        } elsif ($feature->type->name eq 'CDS') {
            $col_exon_number = Bio::Annotation::SimpleValue->new( '-value' => $cds_exon_cnt, '-tagname' => 'exon_number');
            $cds_exon_cnt++;
            my $pid = $transcript;
            $pid =~s/Transcript/Protein/;
            $protein_id = Bio::Annotation::SimpleValue->new( '-value' => $pid, '-tagname' => 'protein_id');
        } elsif ($feature->type->name eq 'start_codon'){
            $col_exon_number = Bio::Annotation::SimpleValue->new( '-value' => $cds_exon_cnt, '-tagname' => 'exon_number');
        } else {
            $col_exon_number = Bio::Annotation::SimpleValue->new( '-value' => $cds_exon_cnt-1, '-tagname' => 'exon_number');
        }

        my $transcript_id = Bio::Annotation::SimpleValue->new( '-value' => $transcript, '-tagname' => 'transcript_id');
        my $gene_id = Bio::Annotation::SimpleValue->new( '-value' => $gene, '-tagname' => 'gene_id');
        my $source_identifer = Bio::Annotation::SimpleValue->new( '-value' => $feature_type, '-tagname' => 'source_id');

        $feature->add_Annotation($source_identifer);
        $feature->add_Annotation($transcript_id);
        $feature->add_Annotation($gene_id);
        $feature->add_Annotation($col_exon_number);
        $feature->add_Annotation($protein_id) if ($feature->type->name eq 'CDS');
    }
    $outGTF->write_feature($feature);
}
exit;