comparison cpt_psm_comparison_table/lib/CPT/FiletypeDetector.pm @ 0:b8b8b52904a5 draft

Uploaded
author cpt
date Tue, 05 Jul 2022 05:42:59 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b8b8b52904a5
1 package CPT::FiletypeDetector;
2 use Moose;
3 use strict;
4 use warnings;
5 use Data::Dumper;
6 use autodie;
7
8 # ABSTRACT: an incredibly basic filetype detection library for genomic data
9
10
11 sub head {
12 my ($self, $filename) = @_;
13 # We're only going to focus on detecting a few types
14 open( my $file, '<', $filename );
15 my @lines;
16 my $c = 0;
17 while (<$file>) {
18 # Read ten lines
19 if ( $c++ < 10 ) {
20 chomp $_;
21 push( @lines, $_ );
22 }
23 # Then exit
24 else {
25 last;
26 }
27 }
28 close($file);
29 return @lines;
30 }
31
32 sub detect {
33 my ( $self, $filename ) = @_;
34
35 my @lines = $self->head($filename);
36
37 use CPT::Filetype::gff3;
38 use CPT::Filetype::gbk;
39 use CPT::Filetype::embl;
40 use CPT::Filetype::fasta;
41
42 my @scorers = (
43 CPT::Filetype::gff3->new(lines => \@lines, file => $filename),
44 CPT::Filetype::gbk->new(lines => \@lines, file => $filename),
45 CPT::Filetype::embl->new(lines => \@lines, file => $filename),
46 CPT::Filetype::fasta->new(lines => \@lines, file => $filename),
47 );
48
49 my $best_score = 0;
50 my $best_name = "";
51 foreach(@scorers){
52 my $score = $_->score();
53 # "1 indicating ... to the exclusion [of others]
54 if($score == 1){
55 return $_->name();
56 }
57
58 # Otherwise check if better
59 if($score > $best_score){
60 $best_name = $_->name();
61 }
62 }
63
64 return $best_name;
65
66 # if(defined $string){
67 # return 'fasta' if( $string =~ /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i);
68 # return 'genbank' if( $string =~ /\.(gb|gbank|genbank|gbk)$/i);
69 # return 'scf' if( $string =~ /\.scf$/i);
70 # return 'pir' if( $string =~ /\.pir$/i);
71 # return 'embl' if( $string =~ /\.(embl|ebl|emb|dat)$/i);
72 # return 'raw' if( $string =~ /\.(txt)$/i);
73 # return 'gcg' if( $string =~ /\.gcg$/i);
74 # return 'ace' if( $string =~ /\.ace$/i);
75 # return 'bsml' if( $string =~ /\.(bsm|bsml)$/i);
76 # return 'swiss' if( $string =~ /\.(swiss|sp)$/i);
77 # return 'phd' if( $string =~ /\.(phd|phred)$/i);
78 # return 'gff' if( $string =~ /\.(gff|gff3)$/i);
79 # return 'blastxml' if( $string =~ /\.(xml)$/i);
80 # die "File type detection failure";
81 # }
82 # else{
83 # die "File type detection failure";
84 # }
85
86 }
87
88 no Moose;
89 1;
90
91 __END__
92
93 =pod
94
95 =encoding UTF-8
96
97 =head1 NAME
98
99 CPT::FiletypeDetector - an incredibly basic filetype detection library for genomic data
100
101 =head1 VERSION
102
103 version 1.99.4
104
105 =head1 AUTHOR
106
107 Eric Rasche <rasche.eric@yandex.ru>
108
109 =head1 COPYRIGHT AND LICENSE
110
111 This software is Copyright (c) 2014 by Eric Rasche.
112
113 This is free software, licensed under:
114
115 The GNU General Public License, Version 3, June 2007
116
117 =cut