Mercurial > repos > cpt > cpt_psm_plotter
diff lib/CPT/FiletypeDetector.pm @ 1:8691c1c61a8e draft default tip
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:48:47 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/CPT/FiletypeDetector.pm Mon Jun 05 02:48:47 2023 +0000 @@ -0,0 +1,117 @@ +package CPT::FiletypeDetector; +use Moose; +use strict; +use warnings; +use Data::Dumper; +use autodie; + +# ABSTRACT: an incredibly basic filetype detection library for genomic data + + +sub head { + my ($self, $filename) = @_; + # We're only going to focus on detecting a few types + open( my $file, '<', $filename ); + my @lines; + my $c = 0; + while (<$file>) { + # Read ten lines + if ( $c++ < 10 ) { + chomp $_; + push( @lines, $_ ); + } + # Then exit + else { + last; + } + } + close($file); + return @lines; +} + +sub detect { + my ( $self, $filename ) = @_; + + my @lines = $self->head($filename); + + use CPT::Filetype::gff3; + use CPT::Filetype::gbk; + use CPT::Filetype::embl; + use CPT::Filetype::fasta; + + my @scorers = ( + CPT::Filetype::gff3->new(lines => \@lines, file => $filename), + CPT::Filetype::gbk->new(lines => \@lines, file => $filename), + CPT::Filetype::embl->new(lines => \@lines, file => $filename), + CPT::Filetype::fasta->new(lines => \@lines, file => $filename), + ); + + my $best_score = 0; + my $best_name = ""; + foreach(@scorers){ + my $score = $_->score(); + # "1 indicating ... to the exclusion [of others] + if($score == 1){ + return $_->name(); + } + + # Otherwise check if better + if($score > $best_score){ + $best_name = $_->name(); + } + } + + return $best_name; + + # if(defined $string){ + # return 'fasta' if( $string =~ /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i); + # return 'genbank' if( $string =~ /\.(gb|gbank|genbank|gbk)$/i); + # return 'scf' if( $string =~ /\.scf$/i); + # return 'pir' if( $string =~ /\.pir$/i); + # return 'embl' if( $string =~ /\.(embl|ebl|emb|dat)$/i); + # return 'raw' if( $string =~ /\.(txt)$/i); + # return 'gcg' if( $string =~ /\.gcg$/i); + # return 'ace' if( $string =~ /\.ace$/i); + # return 'bsml' if( $string =~ /\.(bsm|bsml)$/i); + # return 'swiss' if( $string =~ /\.(swiss|sp)$/i); + # return 'phd' if( $string =~ /\.(phd|phred)$/i); + # return 'gff' if( $string =~ /\.(gff|gff3)$/i); + # return 'blastxml' if( $string =~ /\.(xml)$/i); + # die "File type detection failure"; + # } + # else{ + # die "File type detection failure"; + # } + +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::FiletypeDetector - an incredibly basic filetype detection library for genomic data + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche <rasche.eric@yandex.ru> + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut