Mercurial > repos > cpt > cpt_psm_plotter
view cpt_dnaplotter.pl @ 1:8691c1c61a8e draft default tip
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:48:47 +0000 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl # # Code written by Eric Rasche # mailto:rasche.eric@yandex.ru # tel: 404.692.2048 # http://eric.rasche.co.uk # for # Center for Phage Technology # use strict; use warnings; use CPT::GalaxyGetOpt; use Data::Dumper; use CPT::Bio; use CPT::Circos::Conf; my $bio = CPT::Bio->new(); my @argv_copy; foreach(@ARGV){push(@argv_copy, "$_");} my $ggo = CPT::GalaxyGetOpt->new(); my $options = $ggo->getOptions( 'options' => [ [ 'file|f', 'Input file', { validate => 'File/Input', file_format => ['genbank', 'embl', 'txt'], } ], [], ['Track Configuration'], ['track_key', 'Key to select from genbank data', { validate => 'Genomic/Tag', required => 1, multiple => 1 } ], ['track_feature_filter_invert', 'Should the qualifier search be inverted?', { validate => 'Option', options => { 'yes', 'Yes', 'no', 'No' } , multiple => 1 } ], ['track_feature_filter_hastag', 'Select a tag which should be present in that qualifier (e.g., signal/tmhelix/pseudo)', { validate => 'String' , multiple => 1 } ], ['track_feature_filter_textquery', 'Specify text which MUST be in that tag', { validate => 'String' , multiple => 1 } ], ['track_feature_filter_strand', 'Which strand should the feature appear on?', { validate => 'Option', options => { 'f', 'Forward', 'r', 'Reverse', 'a', 'Any' } , multiple => 1 } ], [], ['enable_gc_skew_plot', 'Enable/Disable calculation of GC Skew Plot', { validate => 'Flag' } ], ['gc_skew_plot_window_size', 'Window size for calculation of GC Skew', { validate => 'Int', min => 1000, default => 10000} ], ['gc_skew_plot_step_size', 'Step size for calculation of GC Skew', { validate => 'Int', min => 200, default => 200 } ], ], 'outputs' => [ [ 'output_circos_confs', 'Circos Configuration Files', { validate => 'File/Output', required => 1, default => 'out', data_format => 'archive', default_format => 'zip', } ], ], 'defaults' => [ 'appid' => 'CircularDNAPlotter', 'appname' => 'Circos based DNAPlotter', 'appdesc' => 'plots genomes similar to Artemis\'s DNAPlotter', 'appvers' => '1.94.1', ], ); #perl cpt_dnaplotter.pl \ #-f ../t/test-files/moon.gbk \ #--track_key CDS --track_feature_filter_invert yes --track_feature_filter_hastag pseudo --track_feature_filter_strand f \ #--track_key CDS --track_feature_filter_invert yes --track_feature_filter_hastag pseudo --track_feature_filter_strand r \ #--track_key CDS --track_feature_filter_hastag pseudo --track_feature_filter_strand a \ #--track_key tRNA --track_feature_filter_strand a \ #--track_key CDS --track_feature_filter_hastag signal --track_feature_filter_strand a \ #--track_key CDS --track_feature_filter_hastag tmhelix --track_feature_filter_strand a my @reorg_args = (); my $cum_gc_ske_mean = 0; my %latest = (); for(my $i = 0; $i < scalar(@argv_copy); $i++){ my $c = $argv_copy[$i]; # We have entered a new one block if($c eq '--track_key'){ # If we have loaded data if(scalar(keys(%latest)) > 0){ my %copy; foreach(keys(%latest)){ $copy{$_} = "" . $latest{$_}; } push(@reorg_args, \%copy); } # Clean out latest to prep for new data foreach(keys(%latest)){ delete $latest{$_}; } } if($c =~ /^--track_(.*)/){ $latest{$1} = $argv_copy[$i+1]; # Artificially bump so we don't bother looking at the answer to # this question. We can "get away" with this because none of # the options are flags. However, I've disabled it in the event # that flags ARE introduced #$i++; } } push(@reorg_args, \%latest); #$VAR1 = [ #{ #'feature_filter_invert' => 'yes', #'feature_plot_color' => '005500', #'feature_filter_strand' => 'f', #'feature_filter_hastag' => 'pseudo', #'key' => 'CDS' #}, #{ #'feature_filter_strand' => 'a', #'key' => 'RBS' #} #]; my @files = (); my $number_of_tracks = 0; sub register_track { #my $r0 = ( 90 - (10 * $number_of_tracks - 1)/2) / 100; #my $r1 = ( 90 - (10 * $number_of_tracks - 9)/2) / 100; my $r0 = ( 90 - (10 * $number_of_tracks - 1)/1) / 100; my $r1 = ( 90 - (10 * $number_of_tracks - 9)/1) / 100; $number_of_tracks++; return ($r0, $r1); } sub circosconf { my $cc = CPT::Circos::Conf->new(); $cc->include('etc/colors_fonts_patterns.conf'); # Features to plot along the genome $cc->include('ideogram.conf'); # markings indicating position along genome $cc->include('ticks.conf'); # Genome data $cc->set('karyotype', 'karyotype.conf'); # Default image params are fine $cc->start_block('image'); $cc->include('etc/image.conf'); $cc->end_block(); # ??? $cc->set('chromosome_units', '1000'); $cc->set('chromosome_display_default', 'yes'); #$cc->include('highlights.conf'); $cc->include('plots.conf'); $cc->include('etc/housekeeping.conf'); my $result = $cc->finalize(); $cc = CPT::Circos::Conf->new(); return $result; } sub ideogramconf{ my $cc = CPT::Circos::Conf->new(); $cc->start_block('ideogram'); $cc->start_block('spacing'); $cc->set('default','0u'); $cc->set('break','0u'); $cc->end_block(); $cc->set('thickness', '20p'); $cc->set('stroke_thickness', '2'); $cc->set('stroke_color', 'black'); $cc->set('fill','no'); $cc->set('fill_color','black'); $cc->set('radius','0.85r'); $cc->set('show_label','yes'); $cc->set('label_font','default'); $cc->set('label_radius','dims(ideogram,radius) + 0.05'); $cc->set('label_size','36'); $cc->set('label_parallel','yes'); $cc->set('label_case','upper'); $cc->set('band_stroke_thickness','2'); $cc->set('show_bands','yes'); $cc->set('fill_bands','yes'); $cc->end_block(); return $cc->finalize(); } sub generate_feature_table { my ($filename, %filter) = @_; print "Filtering on features\n"; print Dumper \%filter; my $seqio_object = Bio::SeqIO->new(-file => $options->{file}, -format=>'genbank'); # Only handing first sequence. my $seq_object = $seqio_object->next_seq; my $parent = $seq_object->display_id(); # Feature data my @features; foreach my $feat($seq_object->get_SeqFeatures()){ if($feat->primary_tag() eq $filter{key}){ # If they've said "hastag" AND we do indeed have that tag AND we haven't inverted this filter. if( ($filter{feature_filter_hastag} && $feat->has_tag($filter{feature_filter_hastag}) && !$filter{feature_filter_invert}) || ($filter{feature_filter_hastag} && $filter{feature_filter_invert} && !$feat->has_tag($filter{feature_filter_hastag})) || (! $filter{feature_filter_hastag}) ){ if( !$filter{feature_filter_strand} || ($feat->strand() == 1 && ( $filter{feature_filter_strand} eq 'f' || $filter{feature_filter_strand} eq 'a' )) || ($feat->strand() == -1 && ( $filter{feature_filter_strand} eq 'r' || $filter{feature_filter_strand} eq 'a' )) || ($feat->strand() == 0 && ( $filter{feature_filter_strand} eq 'a' )) ){ push(@features, join(' ', $parent, $feat->start, $feat->end)); } } } } print "Found " . scalar @features . " features \n"; push(@files, [ 'data/' . $filename, join("\n", @features) ] ); } sub plotsconf{ my $cc = CPT::Circos::Conf->new(); $cc->start_block('plots'); my $idx = 0; foreach(@reorg_args){ my %filter = %{$_}; #{ #'feature_filter_invert' => 'yes', #'feature_plot_color' => '005500', #'feature_filter_strand' => 'f', #'feature_filter_hastag' => 'pseudo', #'key' => 'CDS' #}, $idx++; my $filename = sprintf('org.features.%s.txt', $idx); generate_feature_table($filename, %filter); my ($r0,$r1) = register_track(); $cc->start_block('plot'); $cc->set('type','tile'); $cc->set('file',$filename); $cc->set('orientation', 'center'); $cc->set('thickness', '30'); $cc->set('r1', $r1 . 'r');# '0.78r'); $cc->set('r0', $r0 . 'r');# '0.72r'); $cc->set('layers','3'); $cc->set('layers_overflow','collapse'); $cc->set('color','paired-6-qual-' . $idx); $cc->end_block(); } if($options->{enable_gc_skew_plot}){ my ($r0,$r1) = register_track(); $cc->start_block('plot'); $cc->set('type','histogram'); $cc->set('file','gc.txt'); $cc->set('r1',$r1 . 'r'); $cc->set('r0',$r0 . 'r'); $cc->set('fill_color','purple'); $cc->set('orientation','in'); $cc->start_block('rules'); $cc->start_block('rule'); $cc->set('condition','var(value) < 0'); $cc->set('fill_color', 'green'); $cc->end_block(); $cc->end_block(); $cc->end_block(); } #$cc->start_block('plot'); #$cc->set('type','histogram'); #$cc->set('file','gc_cumulative.txt'); #$cc->set('r1','0.6r'); #$cc->set('r0','0.55r'); #$cc->set('fill_color','purple'); #$cc->set('orientation','out'); #$cc->start_block('rules'); #$cc->start_block('rule'); #$cc->set('condition','var(value) < ' . $cum_gc_ske_mean); #$cc->set('fill_color', 'green'); #$cc->end_block(); #$cc->end_block(); #$cc->end_block(); $cc->end_block(); return $cc->finalize(); } sub ticksconf{ my $cc = CPT::Circos::Conf->new(); $cc->set('show_ticks','yes'); $cc->set('show_tick_labels','yes'); $cc->start_block('ticks'); $cc->set('radius','1r'); $cc->set('color','black'); $cc->set('thickness','2p'); $cc->set('multiplier','1e-3'); $cc->set('format','%d'); $cc->start_block('tick'); $cc->set('spacing','1000u'); $cc->set('size','10p'); $cc->end_block(); $cc->start_block('tick'); $cc->set('spacing','10000u'); $cc->set('size','15p'); $cc->set('show_label','yes'); $cc->set('label_size','20p'); $cc->set('label_offset','10p'); $cc->set('format','%d'); $cc->end_block(); $cc->end_block(); return $cc->finalize(); } sub karyotype { my @karyotype_data = (); my $seqio_object = Bio::SeqIO->new(-file => $options->{file}, -format=>'genbank'); # Only handing first sequence. my $seq_object = $seqio_object->next_seq; # Main 'chromosome' data push(@karyotype_data, join(' ', 'chr', '-',$seq_object->display_id(),$seq_object->display_id(),0, $seq_object->length(),'black')); return join("\n", @karyotype_data); } sub gcgraph_cumulative { my @gcdata = (); my $seqio_object = Bio::SeqIO->new(-file => $options->{file}, -format=>'genbank'); # Only handing first sequence. my $seq_object = $seqio_object->next_seq; my $parent = $seq_object->display_id(); my $seq = $seq_object->seq(); my $sep = int($options->{gc_skew_plot_window_size}/2); my $stepsep = int($options->{gc_skew_plot_step_size}/2); my $cumulative_gc_skew = 0; my @cumgc_vals; my $count = 0; foreach(my $i = $sep; $i < $seq_object->length() - $sep - $options->{gc_skew_plot_step_size}; $i += $options->{gc_skew_plot_step_size}){ $count++; $cumulative_gc_skew += _calculate_gc_skew_for_seq(substr($seq,$i-$sep,$options->{gc_skew_plot_window_size})), push(@cumgc_vals, $cumulative_gc_skew); push(@gcdata, join(" ", $parent, $i - $stepsep, $i + $stepsep, $cumulative_gc_skew )); } my $sum = 0; foreach(@cumgc_vals){$sum += $_;} $cum_gc_ske_mean = $sum / $count; return join("\n", @gcdata); # Main 'chromosome' data } sub gcgraph { my @gcdata = (); my $seqio_object = Bio::SeqIO->new(-file => $options->{file}, -format=>'genbank'); # Only handing first sequence. my $seq_object = $seqio_object->next_seq; my $parent = $seq_object->display_id(); my $seq = $seq_object->seq(); my $sep = int($options->{gc_skew_plot_window_size}/2); my $stepsep = int($options->{gc_skew_plot_step_size}/2); foreach(my $i = $sep; $i < $seq_object->length() - $sep - $options->{gc_skew_plot_step_size}; $i += $options->{gc_skew_plot_step_size}){ push(@gcdata, join(" ", $parent, $i - $stepsep, $i + $stepsep, _calculate_gc_skew_for_seq(substr($seq,$i-$sep,$options->{gc_skew_plot_window_size})), )); } return join("\n", @gcdata); # Main 'chromosome' data } sub _calculate_gc_skew_for_seq { my ($seq) = @_; $seq = uc($seq); my %counts; foreach(split //,$seq){ $counts{$_}++; } if($counts{G} + $counts{C} > 0){ return ($counts{G} - $counts{C}) / ($counts{G} + $counts{C}); } return 0; } push(@files, [ 'etc/karyotype.conf', karyotype() ]); push(@files, [ 'etc/circos.conf', circosconf() ]); push(@files, [ 'etc/ideogram.conf', ideogramconf() ]); if($options->{enable_gc_skew_plot}){ push(@files, [ 'data/gc.txt', gcgraph() ]); push(@files, [ 'data/gc_cumulative.txt', gcgraph_cumulative() ]); } push(@files, [ 'etc/plots.conf', plotsconf() ]); push(@files, [ 'etc/ticks.conf', ticksconf() ]); use Archive::Any::Create; my $archive = Archive::Any::Create->new(); $archive->container('conf'); foreach(@files){ my ($location, $content) = @{$_}; $archive->add_file($location, $content); } use CPT::OutputFiles; my $crr_output = CPT::OutputFiles->new( name => 'output_circos_confs', GGO => $ggo, ); $crr_output->CRR(data => $archive); =head1 NAME DNAPlotter =head1 DESCRIPTION Much like artemis's DNAPlotter, this tool plots genomes in a ciruclar fashion, and can plot gc-deviation tracks as well. The options are somewhat reduced compared to artemis, so if you need something that isn't available in this version please file a bug report. =head1 USE Each track has several parameters: =over 4 =item track_key This selects a set of features from a genbank file, e.g., CDSs or tRNAs =item track_feature_filter_invert This parameter will invert (negate) the results of whatever query parameters you specify after it. =item track_feature_filter_hastag Require that a feature has a specific tag.... =item track_feature_filter_textquery ...with this specific text in it =item track_feature_filter_strand Which strand should the feature be on (not inverted) =back Additionally, users are able to enable/disable GC skew plots. it's suggested that these are generally left alone, as they can quickly increase runtime. =cut