comparison smart-domain.pl @ 0:a3b26189fee3 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/smart_domains commit 266d7c45a443e893f15eab4b1485ca7c1c406a14
author earlhaminst
date Thu, 15 Jun 2017 07:52:09 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a3b26189fee3
1 #!/usr/bin/env perl
2 use strict;
3 use HTTP::Request::Common;
4 use LWP::UserAgent;
5 use Pod::Usage;
6 use Getopt::Long;
7 use Bio::SeqIO;
8 use JSON;
9
10 my $json = JSON->new->allow_nonref;
11 #run this script with --help to see the options
12
13 =pod
14
15 =head1 NAME
16
17 SMART_batch - submit sequences from a FASTA file to SMART
18
19 =head1 SYNOPSIS
20
21 B<SMART_batch.pl> I<options>
22
23 =head1 DESCRIPTION
24
25 Use B<SMART_batch.pl> to submit multiple protein sequences from a FASTA file into the SMART analysis queue. Results are saved into plain text files.
26
27 =head1 GENERAL OPTIONS
28
29
30 =over 4
31
32 =item B<--help>
33
34 display this message
35
36 =item B<--inputFile>
37
38 FASTA file with sequences to submit
39
40 =item B<--outputDirectory>
41
42 Directory which will be used to store the results. Will be created if it doesn't exist. Defaults to 'SMART_results'.
43
44 =item B<--outputFormat>
45
46 Choose prefered output format from txt, json or tabular.
47 Default is txt.
48
49 =back
50
51 =head1 ANALYSIS OPTIONS
52
53 =over 4
54
55 =item B<--includePfam>
56
57 Include Pfam domains in the search. (http://pfam.sanger.ac.uk/)
58
59 =item B<--includeSignalP>
60
61 Include signal peptide predictions. (http://www.cbs.dtu.dk/services/SignalP/)
62
63 =item B<--includeRepeats>
64
65 Include internal repeat predictions. (http://www.well.ox.ac.uk/rmott/ARIADNE/)
66
67 =item B<--includeDISEMBL>
68
69 Include predictions of internal protein disorder. (http://dis.embl.de/)
70
71 =item B<--includeSchnipsel>
72
73 Include predictions of outlier homologues and homologues of known structures. (http://smart.embl.de/help/smart_glossary.shtml#outlier)
74
75 =back
76
77
78 =head1 SEE ALSO
79
80 SMART Home page : http://smart.embl.de
81 SMART FAQ : http://smart.embl.de/help/FAQ.shtml
82
83 =head1 AUTHORS
84
85 Written by Ivica Letunic <ivica@letunic.com>
86
87 Modified by Anil Thanki <Anil.Thanki@earlham.ac.uk> to parse output in JSON and tabular format to adapt in Galaxy
88
89 =cut
90
91 my $submit_url = "http://smart.embl.de/smart/show_motifs.pl";
92 my $job_status_url = "http://smart.embl.de/smart/job_status.pl";
93 my $output_format = "txt";
94 my ($show_help, $input_file, $output_directory, $do_pfam, $do_signalp, $do_rep, $do_disembl, $do_schnipsel);
95 my $op_r = GetOptions (
96 "help" => \$show_help,
97 "inputFile=s" => \$input_file,
98 "outputDirectory=s" => \$output_directory,
99 "includePfam" => \$do_pfam,
100 "includeSignalP" => \$do_signalp,
101 "includeRepeats" => \$do_rep,
102 "includeDISEMBL" => \$do_disembl,
103 "includeSchnipsel" => \$do_schnipsel,
104 "outputFormat=s" => \$output_format,
105 );
106
107 unless ($input_file) { $show_help = 1; }
108
109 pod2usage(VERBOSE => 2) if ( $show_help );
110
111 my $ua = LWP::UserAgent->new();
112 my $result = "";
113 $ua->agent("SMARTbatch1.0");
114
115
116 print "\nSMART batch analysis\n======================\n";
117
118 unless (defined $output_directory) { $output_directory = 'SMART_results'; }
119 unless (-d $output_directory) { mkdir $output_directory; }
120 unless (-e $input_file) { print STDERR "Input file does not exist."; exit;}
121
122 my $io = new Bio::SeqIO(-format=> 'fasta', -file=> $input_file);
123
124 #process sequences one by one. ALWAYS wait for the results before submitting the next sequence.
125
126 while (my $seq = $io->next_seq) {
127 my $seq_id = $seq->display_id;
128 my $output_file;
129 if ($output_format eq "txt")
130 {
131 $output_file = $output_directory . "/" . $seq_id . "_SMART_results.txt";
132 } elsif ($output_format eq "tabular")
133 {
134 $output_file = $output_directory . "/" . $seq_id . "_SMART_results.tabular";
135 } elsif ($output_format eq "json"){
136 $output_file = $output_directory . "/" . $seq_id . "_SMART_results.json";
137 }
138 if (-e $output_file) {
139 my @s = stat($output_file);
140 if ($s[7] == 0) {
141 print "Removing empty results file $output_file.\n";
142 unlink $output_file;
143 } else {
144 print "Skipping sequence $seq_id because the results file already exists.\n";
145 next;
146 }
147 }
148 print "Submitting sequence $seq_id...\n";
149 #prepare the basic POST data
150 my %post_content;
151 $post_content{'SEQUENCE'} = $seq->seq;
152 $post_content{'TEXTONLY'} = 1;
153 if ($do_pfam) { $post_content{'DO_PFAM'} = 'DO_PFAM'; }
154 if ($do_signalp) { $post_content{'INCLUDE_SIGNALP'} = 'INCLUDE_SIGNALP'; }
155 if ($do_rep) { $post_content{'DO_PROSPERO'} = 'DO_PROSPERO'; }
156 if ($do_disembl) { $post_content{'DO_DISEMBL'} = 'DO_DISEMBL'; }
157 if ($do_schnipsel) { $post_content{'INCLUDE_BLAST'} = 'INCLUDE_BLAST'; }
158 my $req = POST $submit_url, Content_Type => 'form-data', Content => [ %post_content ];
159 my $response = $ua->request($req);
160 if ($response->is_success()) {
161 my @res = split(/\n/, $response->content);
162 #check if we got the results directly (precomputed results)
163 shift @res if ($res[1] =~ /^--\ SMART\ RESULT/);
164 if ($res[0] =~ /^--\ SMART\ RESULT/) {
165 response_parser($output_file, $response, $output_format);
166 } else {
167 #we're in the queue, or there was an error
168 my $job_id;
169 for (my $i = 0; $i <= $#res; $i++) {
170 if ($res[$i] =~ /job_status\.pl\?jobid=(\d+.+?)'/) {
171 $job_id = $1;
172 last;
173 }
174 }
175 unless (length $job_id) {
176 #there is no job ID, so an error occured
177 my $error_file = "$output_directory/$seq_id\_SMART_error.html";
178 open (ERR, ">$error_file") or die "Cannot write to $error_file";
179 print ERR $response->content;
180 close ERR;
181 print "SMART returned an error page, which was saved into '$error_file'.\nPlease check the file for details. Aborting further submissions.\n";
182 exit;
183 } else {
184 #we have a jobID, check every 10 seconds until we get the results
185 print "Job entered the queue with ID $job_id. Waiting for results.\n";
186 my $job_status_req = GET "$job_status_url?jobid=$job_id";
187 sleep 5;
188 while (1) {
189 my $job_status_response = $ua->request($job_status_req);
190 if ($job_status_response->is_success) {
191 #check if we got the results
192 my @job_status_res = split(/\n/, $job_status_response->content);
193 shift @job_status_res if ($job_status_res[1] =~ /^--\ SMART\ RESULT/);
194 if ($job_status_res[0] =~ /^--\ SMART\ RESULT/) {
195 response_parser($output_file, $job_status_response, $output_format);
196 last;
197 } else {
198 #still in queue
199 sleep 10;
200 }
201 } else {
202 print "SMART returned a web server error. Full message follows:\n\n";
203 print $response->as_string;
204 die;
205 }
206 }
207 }
208 }
209
210 } else {
211 print "SMART returned a web server error. Full message follows:\n\n";
212 print $response->as_string;
213 die;
214 }
215 #be nice to other users
216 sleep 5;
217 }
218
219 sub toJSON{
220 my ($text) = @_;
221
222 my @result = split("\n", $text);
223 my $line;
224 my %hash;
225 my @hashes;
226 my $json;
227
228 foreach $line (@result)
229 {
230 if(index($line, "=") > 0){
231 my $key = (split(/=/, $line))[0];
232 my $value = (split(/=/, $line))[1];
233 $hash{$key} = $value;
234 } elsif(length($line) == 0){
235 if (exists $hash{"DOMAIN"})
236 {
237 $json = encode_json \%hash;
238 push @hashes, $json;
239 }
240 %hash = ();
241 }
242 }
243
244 return @hashes;
245 }
246
247 sub response_parser{
248 my $output_file = $_[0];
249 my $job_status_response = $_[1];
250 my $output_format = $_[2];
251
252
253 open (OUT, ">$output_file") or die "Cannot write to $output_file";
254 $result = $job_status_response->content;
255 if ($output_format eq "txt")
256 {
257 print OUT $result;
258 } elsif ($output_format eq "tabular")
259 {
260 my @result = toJSON($result);
261
262 my $first_row = decode_json $result[0];
263 my @keys;
264 my $counter;
265
266 foreach my $key(sort keys %$first_row) {
267 print OUT "$key";
268 print OUT "\t" if ++$counter < scalar keys %$first_row;
269 push @keys, $key;
270 }
271 print OUT "\n";
272
273 my $counter;
274
275 foreach my $line (@result)
276 {
277 my $first_row = decode_json $line;
278 my $counter;
279 foreach my $key (@keys)
280 {
281 print OUT $first_row->{$key};
282 print OUT "\t" if ++$counter < scalar(@keys);
283 }
284 print OUT "\n";
285 }
286
287 } elsif ($output_format eq "json"){
288 my @result = toJSON($result);
289
290 print OUT "[";
291 my $counter;
292 foreach my $line (@result)
293 {
294 print OUT $line;
295 print OUT "," if ++$counter < scalar(@result);
296 }
297 print OUT "]";
298
299 }
300 close OUT;
301 print "Results saved to '$output_file'\n";
302 }