Mercurial > repos > nml > pseudogenome
comparison pseudogenome.pl @ 0:47b586ab4729 draft default tip
planemo upload commit 4fee4519135f7677cf50f721cf1ad7a7335ad66d-dirty
author | nml |
---|---|
date | Fri, 06 Apr 2018 14:29:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:47b586ab4729 |
---|---|
1 #!/usr/bin/env perl | |
2 use strict; | |
3 use warnings; | |
4 use autodie qw(:all); | |
5 use Bio::SeqIO; | |
6 use Readonly; | |
7 use File::Basename; | |
8 use Getopt::Long; | |
9 use Pod::Usage; | |
10 Getopt::Long::Configure('bundling'); | |
11 | |
12 =head1 NAME | |
13 | |
14 nml_pseudogenome.pl - To create a single pseudo genome out of multiple contigs provided in a single fasta file. Contig are combined in order of appearances in file | |
15 | |
16 =head1 SYNOPSIS | |
17 | |
18 nml_pseudogenome.pl -i F<file_name.fna> -n 100 -c X -o F<filename.fna> | |
19 | |
20 =head1 OPTIONS | |
21 | |
22 =over | |
23 | |
24 =item B<-i>, B<--input> | |
25 | |
26 Multiple fasta file | |
27 | |
28 =item B<-n>, B<--number> | |
29 | |
30 Number of filler base pairs to be added, default : 10 | |
31 | |
32 =item B<-c>, B<--chars> | |
33 | |
34 Character to be used as the 'glue' between contigs, default : 'N' | |
35 | |
36 =item B<--id> | |
37 | |
38 Name of fasta file to be used default: pseudogenome | |
39 | |
40 =item B<-o>, B<--output> | |
41 | |
42 Output file name, default : Same as input | |
43 | |
44 =item B<-s>, B<--stitch> | |
45 | |
46 Add the stitch pattern between contigs only | |
47 | |
48 =item B<-h>, B<--help> | |
49 | |
50 Print this help | |
51 | |
52 =item EXAMPLE | |
53 | |
54 nml_pseudogenome.pl -i multiple_fasta.fna -n 100 -c X -o pseudo.fna | |
55 | |
56 nml_pseudogenome.pl -i another_multiple.fna | |
57 | |
58 =back | |
59 | |
60 =head1 DESCRIPTION | |
61 | |
62 To create a single pseudo genome out of multiple contigs provided in a single fasta file. Contig are combined in order of appearances in file. | |
63 | |
64 =cut | |
65 | |
66 # Nonsub perlcode | |
67 | |
68 Readonly my $DEFAULT_NUM_CHAR => 10; | |
69 Readonly my $stitch_pattern => 'NNNNNCACACACTTAATTAATTAAGTGTGTGNNNNN'; | |
70 Readonly my $DEFAULT_CHAR => 'N'; | |
71 my ( $input,$id, $number, $char, $output,$stitch, $help ); | |
72 | |
73 GetOptions( | |
74 'i|input=s' => \$input, | |
75 'n|number=s' => \$number, | |
76 'c|char=s' => \$char, | |
77 'o|output=s' => \$output, | |
78 'h|help' => \$help, | |
79 's|stitch' => \$stitch, | |
80 'id=s' => \$id | |
81 ); | |
82 ($input,$id,$number,$char,$output) =check_inputs( $input, $number, $char,$output, $help,$stitch ); | |
83 | |
84 | |
85 | |
86 my $in = Bio::SeqIO->new(-file=>$input,-format=>'fasta'); | |
87 | |
88 my $sequence; | |
89 | |
90 #go thru every sequence and append to main sequence | |
91 while (my $seq = $in->next_seq()) { | |
92 if ($stitch) { | |
93 $sequence .= $seq->seq . $stitch_pattern; | |
94 } | |
95 else { | |
96 $sequence .= $seq->seq . ($char x $number ); | |
97 } | |
98 | |
99 } | |
100 | |
101 my $main = Bio::Seq->new(-display_id=>$id,-seq=>$sequence); | |
102 | |
103 my $out = Bio::SeqIO->new(-file => ">$output" ,-format=>'fasta'); | |
104 $out->write_seq($main); | |
105 | |
106 exit; | |
107 | |
108 =begin HTML | |
109 | |
110 =head2 check_inputs | |
111 | |
112 Title : check_inputs | |
113 Usage : check_inputs($fasta,$num,$filler,$out_to,$usage); | |
114 Function: check arguments provided by the user to see if they are usable and more or less correct | |
115 Returns : Return 1 if all is correct,otherwise 0 | |
116 Args : $query: Query that we are looking for in the database. Could be accession number or locus_tag | |
117 $db: Name of database we are looking for using the query provided | |
118 $format: Ensure that format was given by user and is valid format | |
119 $usage: If true, return usage description | |
120 Throws : none | |
121 | |
122 =cut | |
123 | |
124 sub check_inputs { | |
125 my ( $fasta, $num, $filler, $out_to, $usage,$use_stitch ) = @_; | |
126 | |
127 if ( $help || !( $fasta || $num || $filler || $out_to ) ) { | |
128 pod2usage(); | |
129 exit; | |
130 } | |
131 | |
132 if ( !($fasta) || !( -e $fasta ) ) { | |
133 print STDERR "Error: Input file not given or does not exist\n"; | |
134 pod2usage(); | |
135 exit; | |
136 } | |
137 | |
138 if ($use_stitch) { | |
139 print "Using stitch pattern\n"; | |
140 | |
141 } | |
142 else { | |
143 if ( !$num ) { | |
144 $num = $DEFAULT_NUM_CHAR; | |
145 print STDERR "Number of character not given, using $num\n"; | |
146 } | |
147 elsif ( !( $num =~ /^\d+$/xms ) ) { | |
148 print STDERR "Error: Number of character was not a number\n"; | |
149 pod2usage(); | |
150 exit; | |
151 } | |
152 | |
153 if ( !$filler ) { | |
154 $filler = $DEFAULT_CHAR; | |
155 print STDERR "No filler character given, using 'N'\n"; | |
156 } | |
157 | |
158 } | |
159 | |
160 if ( !($out_to) ) { | |
161 $out_to = fileparse($fasta) . ".pseudogenome"; | |
162 print | |
163 "Output file was not given. Result will be written to '$out_to'\n"; | |
164 } | |
165 if ( ! $id) { | |
166 $id = 'pseudogenome'; | |
167 } | |
168 | |
169 return ( $fasta,$id, $num, $filler, $out_to ); | |
170 } | |
171 | |
172 =end HTML | |
173 | |
174 =head1 SEE ALSO | |
175 | |
176 No related files. | |
177 | |
178 =head1 AUTHOR | |
179 | |
180 Philip Mabon, <philip.mabon@canada.ca> | |
181 | |
182 =head1 BUGS | |
183 | |
184 None reported. | |
185 | |
186 =head1 COPYRIGHT & LICENSE | |
187 | |
188 Copyright (C) 2018 by Public Health Agency of Canada | |
189 | |
190 This program is free software; you can redistribute it and/or modify | |
191 it under the same terms as Perl itself, either Perl version 5.8.2 or, | |
192 at your option, any later version of Perl 5 you may have available. | |
193 | |
194 =head1 DEVELOPER PAGE | |
195 | |
196 No developer documentation. | |
197 | |
198 =cut |