Mercurial > repos > cpt > cpt_psm_recombine
comparison lib/CPT/FiletypeDetector.pm @ 1:97ef96676b48 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:51:26 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:b18e8268bf4e | 1:97ef96676b48 |
---|---|
1 package CPT::FiletypeDetector; | |
2 use Moose; | |
3 use strict; | |
4 use warnings; | |
5 use Data::Dumper; | |
6 use autodie; | |
7 | |
8 # ABSTRACT: an incredibly basic filetype detection library for genomic data | |
9 | |
10 | |
11 sub head { | |
12 my ($self, $filename) = @_; | |
13 # We're only going to focus on detecting a few types | |
14 open( my $file, '<', $filename ); | |
15 my @lines; | |
16 my $c = 0; | |
17 while (<$file>) { | |
18 # Read ten lines | |
19 if ( $c++ < 10 ) { | |
20 chomp $_; | |
21 push( @lines, $_ ); | |
22 } | |
23 # Then exit | |
24 else { | |
25 last; | |
26 } | |
27 } | |
28 close($file); | |
29 return @lines; | |
30 } | |
31 | |
32 sub detect { | |
33 my ( $self, $filename ) = @_; | |
34 | |
35 my @lines = $self->head($filename); | |
36 | |
37 use CPT::Filetype::gff3; | |
38 use CPT::Filetype::gbk; | |
39 use CPT::Filetype::embl; | |
40 use CPT::Filetype::fasta; | |
41 | |
42 my @scorers = ( | |
43 CPT::Filetype::gff3->new(lines => \@lines, file => $filename), | |
44 CPT::Filetype::gbk->new(lines => \@lines, file => $filename), | |
45 CPT::Filetype::embl->new(lines => \@lines, file => $filename), | |
46 CPT::Filetype::fasta->new(lines => \@lines, file => $filename), | |
47 ); | |
48 | |
49 my $best_score = 0; | |
50 my $best_name = ""; | |
51 foreach(@scorers){ | |
52 my $score = $_->score(); | |
53 # "1 indicating ... to the exclusion [of others] | |
54 if($score == 1){ | |
55 return $_->name(); | |
56 } | |
57 | |
58 # Otherwise check if better | |
59 if($score > $best_score){ | |
60 $best_name = $_->name(); | |
61 } | |
62 } | |
63 | |
64 return $best_name; | |
65 | |
66 # if(defined $string){ | |
67 # return 'fasta' if( $string =~ /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i); | |
68 # return 'genbank' if( $string =~ /\.(gb|gbank|genbank|gbk)$/i); | |
69 # return 'scf' if( $string =~ /\.scf$/i); | |
70 # return 'pir' if( $string =~ /\.pir$/i); | |
71 # return 'embl' if( $string =~ /\.(embl|ebl|emb|dat)$/i); | |
72 # return 'raw' if( $string =~ /\.(txt)$/i); | |
73 # return 'gcg' if( $string =~ /\.gcg$/i); | |
74 # return 'ace' if( $string =~ /\.ace$/i); | |
75 # return 'bsml' if( $string =~ /\.(bsm|bsml)$/i); | |
76 # return 'swiss' if( $string =~ /\.(swiss|sp)$/i); | |
77 # return 'phd' if( $string =~ /\.(phd|phred)$/i); | |
78 # return 'gff' if( $string =~ /\.(gff|gff3)$/i); | |
79 # return 'blastxml' if( $string =~ /\.(xml)$/i); | |
80 # die "File type detection failure"; | |
81 # } | |
82 # else{ | |
83 # die "File type detection failure"; | |
84 # } | |
85 | |
86 } | |
87 | |
88 no Moose; | |
89 1; | |
90 | |
91 __END__ | |
92 | |
93 =pod | |
94 | |
95 =encoding UTF-8 | |
96 | |
97 =head1 NAME | |
98 | |
99 CPT::FiletypeDetector - an incredibly basic filetype detection library for genomic data | |
100 | |
101 =head1 VERSION | |
102 | |
103 version 1.99.4 | |
104 | |
105 =head1 AUTHOR | |
106 | |
107 Eric Rasche <rasche.eric@yandex.ru> | |
108 | |
109 =head1 COPYRIGHT AND LICENSE | |
110 | |
111 This software is Copyright (c) 2014 by Eric Rasche. | |
112 | |
113 This is free software, licensed under: | |
114 | |
115 The GNU General Public License, Version 3, June 2007 | |
116 | |
117 =cut |