# HG changeset patch
# User cpt
# Date 1685933486 0
# Node ID 97ef96676b48368d8fc05b17c634d1d8f3fdf9fc
# Parent b18e8268bf4e30a61b21129f02143771dc553b56
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
diff -r b18e8268bf4e -r 97ef96676b48 PSM_Recombine.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/PSM_Recombine.py Mon Jun 05 02:51:26 2023 +0000
@@ -0,0 +1,59 @@
+#!/usr/bin/env python
+import argparse
+import logging
+from Bio import SeqIO
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences")
+ parser.add_argument("psmTable", type=argparse.FileType("r"))
+ parser.add_argument("gbkList", type=argparse.FileType("r"), nargs="+")
+ args = parser.parse_args()
+
+ gbkRecs = []
+ recIDs = []
+ recFlatten = [] # Can only seek argparse file once
+
+ for f in args.gbkList:
+ tempRecs = SeqIO.parse(f, "genbank")
+ for rec in tempRecs:
+ recFlatten.append(rec)
+
+ for line in args.psmTable:
+ lineElems = line.split("\t")
+ numGenes = 0
+ accession = ""
+ lineOut = ""
+ if recIDs == []:
+ for i in lineElems:
+ recIDs.append(i.strip())
+ lineOut += i.strip() + "\t"
+ for rec in recFlatten:
+ if i.strip() in rec.id or rec.id in i.strip():
+ gbkRecs.append(rec)
+ lineOut += "No. of phages in which gene is present\tBest Database Match"
+ print(lineOut)
+ continue
+
+ for i in range(0, len(lineElems)):
+ checkFeat = lineElems[i].strip()
+ if checkFeat == "-":
+ lineOut += "(-)\t"
+ continue
+ else:
+ lineOut += checkFeat + "\t"
+ numGenes += 1
+ if accession == "":
+ for feat in gbkRecs[i].features:
+ if (
+ "locus_tag" in feat.qualifiers.keys()
+ and feat.qualifiers["locus_tag"][0] == checkFeat
+ ):
+ if "protein_id" in feat.qualifiers.keys():
+ accession = feat.qualifiers["protein_id"][0]
+ break # Comment out if we need to get more info
+ lineOut += str(numGenes) + "\t" + accession
+ print(lineOut)
diff -r b18e8268bf4e -r 97ef96676b48 PSM_Recombine.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/PSM_Recombine.xml Mon Jun 05 02:51:26 2023 +0000
@@ -0,0 +1,124 @@
+
+ : generates a raw PSM file for plotting and a tabular summary
+
+ macros.xml
+ cpt-macros.xml
+
+
+ perl
+ perl-bioperl
+ perl-moose
+ perl-ipc-run
+ perl-getopt-long-descriptive
+ python
+ biopython
+
+ "${tableOut}"
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r b18e8268bf4e -r 97ef96676b48 cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml Mon Jun 05 02:51:26 2023 +0000
@@ -0,0 +1,114 @@
+
+
+
+ python
+ biopython
+ requests
+
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_0_prep.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_psm_0_prep.pl Mon Jun 05 02:51:26 2023 +0000
@@ -0,0 +1,194 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Storable;
+use Bio::SearchIO;
+use Bio::SeqIO;
+use Bio::Tools::CodonTable;
+use Data::Dumper;
+use File::Temp qw/tempfile tempdir/;
+
+
+use CPT::GalaxyGetOpt;
+my $ggo = CPT::GalaxyGetOpt->new();
+my $options = $ggo->getOptions(
+ 'options' => [
+ [ 'file', 'Input file', { validate => 'File/Input', multiple => 1, required => 1,
+ file_format => ['genbank', 'txt'],
+ } ],
+ ],
+ 'outputs' => [
+ [
+ 'cpt_psm_object',
+ 'Output PSM Object',
+ {
+ validate => 'File/Output',
+ required => 1,
+ default => 'cpt_psm',
+ data_format => 'text/plain',
+ default_format => 'TXT'
+ }
+ ],
+ ],
+ 'defaults' => [
+ 'appid' => 'PSM.Prep',
+ 'appname' => 'PSM Prep',
+ 'appdesc' => 'prepares data for the PSM Plotter',
+ 'appvers' => '1.94.2',
+ ],
+ 'tests' => [],
+);
+
+
+
+use CPT::Bio;
+my $bio = CPT::Bio->new();
+
+my @genbank_files = @{$options->{file}};
+
+my %data = (
+ file_list => [],
+);
+
+my $GLOBAL_UNLINK_VAR = 1;
+my $tempdir = tempdir('cpt.psm2.XXXXXXX',CLEANUP => $GLOBAL_UNLINK_VAR);
+use CPT::Util::CRC64;
+my $crc = CPT::Util::CRC64->new();
+
+foreach my $file(@genbank_files){
+ my $seqio_object = Bio::SeqIO->new(-file => $file,-format=>'genbank');
+ while(my $seqobj = $seqio_object->next_seq){
+ my ( $fh, $path ) = tempfile('cds_export.XXXXXXXXX', UNLINK => $GLOBAL_UNLINK_VAR, DIR => $tempdir, SUFFIX => '.fa');
+
+ my @gi_array;
+ foreach my $feat ( $seqobj->get_SeqFeatures ) {
+ if($feat->primary_tag eq 'CDS'){
+ my $header = $bio->_getIdentifier($feat);
+ # This ensures proteins have a file-specific ID appeneded to them.
+ my $seq = $bio->translate(
+ $bio->intelligent_get_seq($feat));
+
+ # Proteins come with translated stop codon
+ $seq =~ s/\*//g;
+ $seq =~ s/\+//g;
+ $seq =~ s/#//g;
+ $header .= "_" . $crc->crc64($seq);
+ push @gi_array, $header;
+ print $fh ">$header\n$seq\n";
+ }
+ }
+ $data{gbk}{$seqobj->display_id()}{'gi'} = \@gi_array;
+ $data{gbk}{$seqobj->display_id()}{'fasta_location'} = $path;
+ $data{gbk}{$seqobj->display_id()}{'gbk_location'} = $file;
+ push(@{$data{file_list}}, $seqobj->display_id());
+ close $fh;
+ }
+}
+
+
+
+use IPC::Run3;
+
+# Concatenate Fasta Files
+my @fasta_files;
+foreach(@{$data{file_list}}){
+ push(@fasta_files, $data{gbk}{$_}{fasta_location});
+}
+my @command = ('cat', @fasta_files);
+my ($merged_fa_fh, $merged_fa_path) = tempfile('merged.XXXXXXXXX', UNLINK => 1, DIR => $tempdir, SUFFIX => '.fa');
+my ($in, $out, $err);
+run3 \@command, \$in, \$out, \$err;
+if($err){
+ print STDERR $err;
+}
+print $merged_fa_fh $out;
+close($merged_fa_fh);
+
+
+# Create Blast Database
+my ($blastdb_fh, $blastdb_path) = tempfile('blastdb.XXXXXXXXX', UNLINK => 1, DIR => $tempdir);
+@command = ('makeblastdb',
+ '-dbtype', 'prot',
+ '-in', $merged_fa_path,
+ '-out', $blastdb_path,
+);
+my ($makeblast_in,$makeblast_out,$makeblast_err);
+run3 \@command, \$makeblast_in, \$makeblast_out, \$makeblast_err;
+
+# Blast files
+foreach(@{$data{file_list}}){
+ #push(@fasta_files, $data{gbk}{$_}{fasta_location});
+ my @blast_cmd = ('blastp',
+ '-query', $data{gbk}{$_}{fasta_location},
+ '-out', $data{gbk}{$_}{fasta_location} . ".xml",
+ '-outfmt', '5',
+ '-db', $blastdb_path,
+ );
+ my ($blast_in,$blast_out,$blast_err);
+ run3 \@blast_cmd, \$blast_in, \$blast_out, \$blast_err;
+}
+
+my $value;
+my @data_tsv;
+
+foreach(@{$data{file_list}}){
+ #push(@fasta_files, $data{gbk}{$_}{fasta_location});
+ my $file = $data{gbk}{$_}{fasta_location};
+
+ my $in = new Bio::SearchIO(
+ -format => 'blastxml',
+ -tempfile => 1,
+ -file => "$file.xml",
+ );
+ while( my $result = $in->next_result ) {
+ while( my $hit = $result->next_hit ) {
+ while( my $hsp = $hit->next_hsp ) {
+ my $Identity = $hsp->percent_identity/100 * $hsp->length('query');
+ my $IterationQueryLength = $result->query_length();
+ my $HitLength = $hit->length();
+ my $dice = (200*$Identity)/($HitLength + $IterationQueryLength);
+
+ my $c = $result->query_description();#genome_a_header
+ my $d = $hit->name();#genome_b_header
+ # Skip self-self links
+ next if($c eq $d);
+
+ push (@data_tsv,
+ [
+ $c,
+ $d,
+ $hsp->evalue(),
+ $dice,
+ ]
+ );
+
+ }#close hsp
+ }#close hit
+ }#close result
+}#close for genomes
+
+$data{hit_table} = \@data_tsv;
+
+use CPT::OutputFiles;
+my $psmout = CPT::OutputFiles->new(
+ name => 'cpt_psm_object',
+ GGO => $ggo,
+);
+my @val = $psmout->CRR(data => "none", extension => 'psm');
+store \%data,$val[0];
+
+
+
+
+
+
+
+=head1 NAME
+
+PSM Prep
+
+=head1 DESCRIPTION
+
+This tool takes in 2 or more GenBank files, blasts, and prepares data structures for use in the companion tool: PSM Plotter. Select as many (multi)-gbk files as you I want to plot. Once this tool is done, you can select any subset of those to plot then.
+
+=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_2_gentable.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_psm_2_gentable.pl Mon Jun 05 02:51:26 2023 +0000
@@ -0,0 +1,168 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Storable;
+use CPT::GalaxyGetOpt;
+use CPT::Bio::NW_MSA;
+use Data::Dumper;
+use CPT::Circos::Conf;
+use POSIX;
+
+
+my $ggo = CPT::GalaxyGetOpt->new();
+my $options = $ggo->getOptions(
+ 'options' => [
+ [ 'file', 'PSM2 Data File', { validate => 'File/Input', required => 1 } ],
+ [],
+ ['Cutoffs'],
+ ['evalue' , 'Evalue cutoff' , { validate => 'Float' , default => 1e-4 } ] ,
+ ['dice' , 'Dice cutoff' , { validate => 'Float' , default => 50 } ] ,
+ [],
+ ['Alignment Options'],
+ ['mismatch' , 'Mismatch Score' , { validate => 'Float' , default => -1} ] ,
+ ['gap_penalty' , 'Gap Penalty' , { validate => 'Float' , default => '0.0' } ] ,
+ ['match' , 'Match Score' , { validate => 'Float' , default => 5 } ] ,
+ ],
+ 'outputs' => [
+ [
+ 'diff_table',
+ 'Output Comparison Table',
+ {
+ validate => 'File/Output',
+ required => 1,
+ default => 'genome_comp',
+ data_format => 'text/tabular',
+ default_format => 'TSV_U',
+ },
+ ],
+ [
+ 'blastclust',
+ 'Output Blastclust Table',
+ {
+ validate => 'File/Output',
+ required => 1,
+ default => 'blastclust',
+ data_format => 'text/tabular',
+ default_format => 'TSV_U',
+ }
+ ],
+ ],
+ 'defaults' => [
+ 'appid' => 'PSM.Comp',
+ 'appname' => 'PSM Comparison Table',
+ 'appdesc' => 'aligns and lists data from PSM Prep',
+ 'appvers' => '1.94',
+ ],
+ 'tests' => [
+ ],
+);
+
+
+my %data_file = %{retrieve($options->{file})};
+
+print STDERR "Aliging genomes\n";
+my $msa = CPT::Bio::NW_MSA->new(
+ gap_penalty => $options->{'gap_penalty'},
+ match_score => $options->{'match'},
+ mismatch_score => $options->{'mismatch'},
+ bidi => 1,
+);
+
+my @hits = @{$data_file{hit_table}};
+my @clusters;
+
+foreach my $hit(@hits){
+ my ($from, $to, $evalue, $dice) = @{$hit};
+ if($evalue < $options->{evalue} && $dice > $options->{dice}){
+ if($options->{verbose}){
+ print "$from $to\n";
+ }
+
+ my $foundmatch = 0;
+ foreach my $cluster(@clusters){
+ if($from ~~ @{$cluster} || $to ~~ @{$cluster}){
+ $foundmatch = 1;
+ if(!($from ~~ @{$cluster})){
+ push(@{$cluster}, $from);
+ }
+ if(!($to ~~ @{$cluster})){
+ push(@{$cluster}, $to);
+ }
+ }
+ }
+ if($foundmatch == 0){
+ push(@clusters, ["".($#clusters+2), $from, $to]);
+ }
+ $msa->add_relationship($from, $to);
+ }
+}
+my @fixed_clusters;
+foreach my $cluster (@clusters) {
+ my ($idx, @values) = @{$cluster};
+ push(@fixed_clusters, [$idx, join(',', @values)]);
+}
+
+my @user_ordering = keys($data_file{gbk});
+
+foreach my $genome(@user_ordering){
+ print STDERR "\tAligning $genome\n";
+ my $gi_list_ref = $data_file{gbk}{$genome}{gi};#"GI" list
+ if(! defined $gi_list_ref){
+ warn "Could not find $genome genome in the data file. Please be sure you have correctly specified the name of a genome from a genbank file. (See the LOCUS line for the name).";
+ }else{
+ $msa->align_list($gi_list_ref);
+ }
+}
+
+my @aligned_results = $msa->merged_array();
+# Remove CRC64 hashes from sequences
+foreach my $row(@aligned_results){
+ $row = [map { s/_[A-F0-9]{16}$//; $_ } @{$row}];
+ #my $key = ${$row}[0];
+ #foreach my $cluster(@clusters){
+
+ #}
+}
+
+my %table = (
+ 'Sheet1' => {
+ header => \@user_ordering,
+ data => \@aligned_results,
+ }
+);
+
+
+use CPT::OutputFiles;
+my $crr_output = CPT::OutputFiles->new(
+ name => 'diff_table',
+ GGO => $ggo,
+);
+$crr_output->CRR(data => \%table);
+
+my %table2 = (
+ 'Sheet1' => {
+ header => ['Cluster ID', 'Contents'],
+ data => \@fixed_clusters,
+ }
+);
+my $crr_output2 = CPT::OutputFiles->new(
+ name => 'blastclust',
+ GGO => $ggo,
+);
+$crr_output2->CRR(data => \%table2);
+
+=head1 DESCRIPTION
+
+Following the execution of the PSM Prep tool, this tool simply aligns the genomes and generates a table comparison the positions of all proteins. It can be very useful to figure out which genes are missing in which genomes.
+
+=head2 IMPORTANT PARAMETERS
+
+=over 4
+
+=item C, C, C
+
+These parameters control the Needleman-Wunsch Multiple Sequence Alignment library's scoring scheme. Mismatch scores are generally negative and discourage unrelated proteins from being plotted in a line together. Match scores encourage related proteins to line up. Gap penalty is set at zero as we generally prefer gaps to mismatches in this tool; phage genomes are small and gaps are "cheap" to use, whereas mismatches can sometimes give an incorrect impression of relatedness. That said, how your plots look is completely up to you and we encourage experimentation!
+
+=back
+
+=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/PSM_Recombine.py
--- a/cpt_psm_recombine/PSM_Recombine.py Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-#!/usr/bin/env python
-import argparse
-import logging
-from Bio import SeqIO
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger(__name__)
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences")
- parser.add_argument("psmTable", type=argparse.FileType("r"))
- parser.add_argument('gbkList', type=argparse.FileType("r"), nargs="+")
- args = parser.parse_args()
-
- gbkRecs = []
- recIDs = []
- recFlatten = [] # Can only seek argparse file once
-
- for f in args.gbkList:
- tempRecs = SeqIO.parse(f, "genbank")
- for rec in tempRecs:
- recFlatten.append(rec)
-
- for line in args.psmTable:
- lineElems = line.split("\t")
- numGenes = 0
- accession = ""
- lineOut = ""
- if recIDs == []:
- for i in lineElems:
- recIDs.append(i.strip())
- lineOut += i.strip() + "\t"
- for rec in recFlatten:
- if i.strip() in rec.id or rec.id in i.strip():
- gbkRecs.append(rec)
- lineOut += "No. of phages in which gene is present\tBest Database Match"
- print(lineOut)
- continue
-
- for i in range(0, len(lineElems)):
- checkFeat = lineElems[i].strip()
- if checkFeat == "-":
- lineOut += "(-)\t"
- continue
- else:
- lineOut += checkFeat + "\t"
- numGenes += 1
- if accession == "":
- for feat in gbkRecs[i].features:
- if "locus_tag" in feat.qualifiers.keys() and feat.qualifiers["locus_tag"][0] == checkFeat:
- if "protein_id" in feat.qualifiers.keys():
- accession = feat.qualifiers["protein_id"][0]
- break # Comment out if we need to get more info
- lineOut += str(numGenes) + "\t" + accession
- print(lineOut)
-
-
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/PSM_Recombine.xml
--- a/cpt_psm_recombine/PSM_Recombine.xml Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,126 +0,0 @@
-
-
- : generates a raw PSM file for plotting and a tabular summary
-
- macros.xml
- cpt-macros.xml
-
-
- perl
- perl-bioperl
- perl-moose
- perl-ipc-run
- perl-getopt-long-descriptive
- python
- biopython
-
- "${tableOut}"
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/cpt-macros.xml
--- a/cpt_psm_recombine/cpt-macros.xml Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-
-
-
-
- python
- biopython
- requests
-
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {C. Ross},
- title = {CPT Galaxy Tools},
- year = {2020-},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/cpt_psm_0_prep.pl
--- a/cpt_psm_recombine/cpt_psm_0_prep.pl Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,194 +0,0 @@
-#!/usr/bin/env perl
-use strict;
-use warnings;
-use Storable;
-use Bio::SearchIO;
-use Bio::SeqIO;
-use Bio::Tools::CodonTable;
-use Data::Dumper;
-use File::Temp qw/tempfile tempdir/;
-
-
-use CPT::GalaxyGetOpt;
-my $ggo = CPT::GalaxyGetOpt->new();
-my $options = $ggo->getOptions(
- 'options' => [
- [ 'file', 'Input file', { validate => 'File/Input', multiple => 1, required => 1,
- file_format => ['genbank', 'txt'],
- } ],
- ],
- 'outputs' => [
- [
- 'cpt_psm_object',
- 'Output PSM Object',
- {
- validate => 'File/Output',
- required => 1,
- default => 'cpt_psm',
- data_format => 'text/plain',
- default_format => 'TXT'
- }
- ],
- ],
- 'defaults' => [
- 'appid' => 'PSM.Prep',
- 'appname' => 'PSM Prep',
- 'appdesc' => 'prepares data for the PSM Plotter',
- 'appvers' => '1.94.2',
- ],
- 'tests' => [],
-);
-
-
-
-use CPT::Bio;
-my $bio = CPT::Bio->new();
-
-my @genbank_files = @{$options->{file}};
-
-my %data = (
- file_list => [],
-);
-
-my $GLOBAL_UNLINK_VAR = 1;
-my $tempdir = tempdir('cpt.psm2.XXXXXXX',CLEANUP => $GLOBAL_UNLINK_VAR);
-use CPT::Util::CRC64;
-my $crc = CPT::Util::CRC64->new();
-
-foreach my $file(@genbank_files){
- my $seqio_object = Bio::SeqIO->new(-file => $file,-format=>'genbank');
- while(my $seqobj = $seqio_object->next_seq){
- my ( $fh, $path ) = tempfile('cds_export.XXXXXXXXX', UNLINK => $GLOBAL_UNLINK_VAR, DIR => $tempdir, SUFFIX => '.fa');
-
- my @gi_array;
- foreach my $feat ( $seqobj->get_SeqFeatures ) {
- if($feat->primary_tag eq 'CDS'){
- my $header = $bio->_getIdentifier($feat);
- # This ensures proteins have a file-specific ID appeneded to them.
- my $seq = $bio->translate(
- $bio->intelligent_get_seq($feat));
-
- # Proteins come with translated stop codon
- $seq =~ s/\*//g;
- $seq =~ s/\+//g;
- $seq =~ s/#//g;
- $header .= "_" . $crc->crc64($seq);
- push @gi_array, $header;
- print $fh ">$header\n$seq\n";
- }
- }
- $data{gbk}{$seqobj->display_id()}{'gi'} = \@gi_array;
- $data{gbk}{$seqobj->display_id()}{'fasta_location'} = $path;
- $data{gbk}{$seqobj->display_id()}{'gbk_location'} = $file;
- push(@{$data{file_list}}, $seqobj->display_id());
- close $fh;
- }
-}
-
-
-
-use IPC::Run3;
-
-# Concatenate Fasta Files
-my @fasta_files;
-foreach(@{$data{file_list}}){
- push(@fasta_files, $data{gbk}{$_}{fasta_location});
-}
-my @command = ('cat', @fasta_files);
-my ($merged_fa_fh, $merged_fa_path) = tempfile('merged.XXXXXXXXX', UNLINK => 1, DIR => $tempdir, SUFFIX => '.fa');
-my ($in, $out, $err);
-run3 \@command, \$in, \$out, \$err;
-if($err){
- print STDERR $err;
-}
-print $merged_fa_fh $out;
-close($merged_fa_fh);
-
-
-# Create Blast Database
-my ($blastdb_fh, $blastdb_path) = tempfile('blastdb.XXXXXXXXX', UNLINK => 1, DIR => $tempdir);
-@command = ('makeblastdb',
- '-dbtype', 'prot',
- '-in', $merged_fa_path,
- '-out', $blastdb_path,
-);
-my ($makeblast_in,$makeblast_out,$makeblast_err);
-run3 \@command, \$makeblast_in, \$makeblast_out, \$makeblast_err;
-
-# Blast files
-foreach(@{$data{file_list}}){
- #push(@fasta_files, $data{gbk}{$_}{fasta_location});
- my @blast_cmd = ('blastp',
- '-query', $data{gbk}{$_}{fasta_location},
- '-out', $data{gbk}{$_}{fasta_location} . ".xml",
- '-outfmt', '5',
- '-db', $blastdb_path,
- );
- my ($blast_in,$blast_out,$blast_err);
- run3 \@blast_cmd, \$blast_in, \$blast_out, \$blast_err;
-}
-
-my $value;
-my @data_tsv;
-
-foreach(@{$data{file_list}}){
- #push(@fasta_files, $data{gbk}{$_}{fasta_location});
- my $file = $data{gbk}{$_}{fasta_location};
-
- my $in = new Bio::SearchIO(
- -format => 'blastxml',
- -tempfile => 1,
- -file => "$file.xml",
- );
- while( my $result = $in->next_result ) {
- while( my $hit = $result->next_hit ) {
- while( my $hsp = $hit->next_hsp ) {
- my $Identity = $hsp->percent_identity/100 * $hsp->length('query');
- my $IterationQueryLength = $result->query_length();
- my $HitLength = $hit->length();
- my $dice = (200*$Identity)/($HitLength + $IterationQueryLength);
-
- my $c = $result->query_description();#genome_a_header
- my $d = $hit->name();#genome_b_header
- # Skip self-self links
- next if($c eq $d);
-
- push (@data_tsv,
- [
- $c,
- $d,
- $hsp->evalue(),
- $dice,
- ]
- );
-
- }#close hsp
- }#close hit
- }#close result
-}#close for genomes
-
-$data{hit_table} = \@data_tsv;
-
-use CPT::OutputFiles;
-my $psmout = CPT::OutputFiles->new(
- name => 'cpt_psm_object',
- GGO => $ggo,
-);
-my @val = $psmout->CRR(data => "none", extension => 'psm');
-store \%data,$val[0];
-
-
-
-
-
-
-
-=head1 NAME
-
-PSM Prep
-
-=head1 DESCRIPTION
-
-This tool takes in 2 or more GenBank files, blasts, and prepares data structures for use in the companion tool: PSM Plotter. Select as many (multi)-gbk files as you I want to plot. Once this tool is done, you can select any subset of those to plot then.
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/cpt_psm_2_gentable.pl
--- a/cpt_psm_recombine/cpt_psm_2_gentable.pl Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,168 +0,0 @@
-#!/usr/bin/env perl
-use strict;
-use warnings;
-use Storable;
-use CPT::GalaxyGetOpt;
-use CPT::Bio::NW_MSA;
-use Data::Dumper;
-use CPT::Circos::Conf;
-use POSIX;
-
-
-my $ggo = CPT::GalaxyGetOpt->new();
-my $options = $ggo->getOptions(
- 'options' => [
- [ 'file', 'PSM2 Data File', { validate => 'File/Input', required => 1 } ],
- [],
- ['Cutoffs'],
- ['evalue' , 'Evalue cutoff' , { validate => 'Float' , default => 1e-4 } ] ,
- ['dice' , 'Dice cutoff' , { validate => 'Float' , default => 50 } ] ,
- [],
- ['Alignment Options'],
- ['mismatch' , 'Mismatch Score' , { validate => 'Float' , default => -1} ] ,
- ['gap_penalty' , 'Gap Penalty' , { validate => 'Float' , default => '0.0' } ] ,
- ['match' , 'Match Score' , { validate => 'Float' , default => 5 } ] ,
- ],
- 'outputs' => [
- [
- 'diff_table',
- 'Output Comparison Table',
- {
- validate => 'File/Output',
- required => 1,
- default => 'genome_comp',
- data_format => 'text/tabular',
- default_format => 'TSV_U',
- },
- ],
- [
- 'blastclust',
- 'Output Blastclust Table',
- {
- validate => 'File/Output',
- required => 1,
- default => 'blastclust',
- data_format => 'text/tabular',
- default_format => 'TSV_U',
- }
- ],
- ],
- 'defaults' => [
- 'appid' => 'PSM.Comp',
- 'appname' => 'PSM Comparison Table',
- 'appdesc' => 'aligns and lists data from PSM Prep',
- 'appvers' => '1.94',
- ],
- 'tests' => [
- ],
-);
-
-
-my %data_file = %{retrieve($options->{file})};
-
-print STDERR "Aliging genomes\n";
-my $msa = CPT::Bio::NW_MSA->new(
- gap_penalty => $options->{'gap_penalty'},
- match_score => $options->{'match'},
- mismatch_score => $options->{'mismatch'},
- bidi => 1,
-);
-
-my @hits = @{$data_file{hit_table}};
-my @clusters;
-
-foreach my $hit(@hits){
- my ($from, $to, $evalue, $dice) = @{$hit};
- if($evalue < $options->{evalue} && $dice > $options->{dice}){
- if($options->{verbose}){
- print "$from $to\n";
- }
-
- my $foundmatch = 0;
- foreach my $cluster(@clusters){
- if($from ~~ @{$cluster} || $to ~~ @{$cluster}){
- $foundmatch = 1;
- if(!($from ~~ @{$cluster})){
- push(@{$cluster}, $from);
- }
- if(!($to ~~ @{$cluster})){
- push(@{$cluster}, $to);
- }
- }
- }
- if($foundmatch == 0){
- push(@clusters, ["".($#clusters+2), $from, $to]);
- }
- $msa->add_relationship($from, $to);
- }
-}
-my @fixed_clusters;
-foreach my $cluster (@clusters) {
- my ($idx, @values) = @{$cluster};
- push(@fixed_clusters, [$idx, join(',', @values)]);
-}
-
-my @user_ordering = keys($data_file{gbk});
-
-foreach my $genome(@user_ordering){
- print STDERR "\tAligning $genome\n";
- my $gi_list_ref = $data_file{gbk}{$genome}{gi};#"GI" list
- if(! defined $gi_list_ref){
- warn "Could not find $genome genome in the data file. Please be sure you have correctly specified the name of a genome from a genbank file. (See the LOCUS line for the name).";
- }else{
- $msa->align_list($gi_list_ref);
- }
-}
-
-my @aligned_results = $msa->merged_array();
-# Remove CRC64 hashes from sequences
-foreach my $row(@aligned_results){
- $row = [map { s/_[A-F0-9]{16}$//; $_ } @{$row}];
- #my $key = ${$row}[0];
- #foreach my $cluster(@clusters){
-
- #}
-}
-
-my %table = (
- 'Sheet1' => {
- header => \@user_ordering,
- data => \@aligned_results,
- }
-);
-
-
-use CPT::OutputFiles;
-my $crr_output = CPT::OutputFiles->new(
- name => 'diff_table',
- GGO => $ggo,
-);
-$crr_output->CRR(data => \%table);
-
-my %table2 = (
- 'Sheet1' => {
- header => ['Cluster ID', 'Contents'],
- data => \@fixed_clusters,
- }
-);
-my $crr_output2 = CPT::OutputFiles->new(
- name => 'blastclust',
- GGO => $ggo,
-);
-$crr_output2->CRR(data => \%table2);
-
-=head1 DESCRIPTION
-
-Following the execution of the PSM Prep tool, this tool simply aligns the genomes and generates a table comparison the positions of all proteins. It can be very useful to figure out which genes are missing in which genomes.
-
-=head2 IMPORTANT PARAMETERS
-
-=over 4
-
-=item C, C, C
-
-These parameters control the Needleman-Wunsch Multiple Sequence Alignment library's scoring scheme. Mismatch scores are generally negative and discourage unrelated proteins from being plotted in a line together. Match scores encourage related proteins to line up. Gap penalty is set at zero as we generally prefer gaps to mismatches in this tool; phage genomes are small and gaps are "cheap" to use, whereas mismatches can sometimes give an incorrect impression of relatedness. That said, how your plots look is completely up to you and we encourage experimentation!
-
-=back
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT.pm
--- a/cpt_psm_recombine/lib/CPT.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-package CPT;
-use strict;
-use warnings;
-use Moose;
-
-# ABSTRACT: main library wrapping most actions needed by CPT's portal scripts.
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT - main library wrapping most actions needed by CPT's portal scripts.
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Analysis/PAUSE.pm
--- a/cpt_psm_recombine/lib/CPT/Analysis/PAUSE.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,197 +0,0 @@
-package CPT::Analysis::PAUSE;
-
-# ABSTRACT: Library for use in PAUSE analysis
-use strict;
-use warnings;
-use Moose;
-use List::Util qw(sum);
-use Statistics::Descriptive;
-
-sub max ($$) { shift; $_[ $_[0] < $_[1] ] }
-sub min ($$) { shift; $_[ $_[0] > $_[1] ] }
-
-sub derivative {
- my ( $self, $data_ref ) = @_;
- my @data = @{$data_ref};
- my @new_data;
- foreach ( my $i = 0 ; $i < scalar(@data) - 1 ; $i++ ) {
- $new_data[ $i + 1 ] = $data[ $i + 1 ] - $data[$i];
- }
- return \@new_data;
-}
-
-sub find_peaks {
- my ( $self, %data ) = @_;
- use IPC::Run3;
- use File::Temp qw/tempfile/;
-
- # Store to CSV File
- my @starts = @{ $data{data} };
- my ( $fh0, $filename0 ) = tempfile('galaxy.pause.XXXXXXX');
- printf $fh0 ( "%s,%s\n", 'position', 'count' );
- for ( my $i = 0 ; $i < scalar(@starts) ; $i++ ) {
- printf $fh0 "%d,%d\n", $i,
- ( defined $starts[$i] ? $starts[$i] : 0 );
- }
- close($fh0);
-
- my ( $fh, $filename ) = tempfile('galaxy.pause.XXXXXXX');
- my @cmd = (
- 'Rscript', $data{location_of_rscript_file},
- $filename0, $filename, $data{snr}
- );
- my ( $in, $out, $err );
- run3 \@cmd, \$in, \$out, \$err;
-
- # Read in R data
- my @values;
- while (<$fh>) {
- chomp;
- push( @values, $_ );
- }
- close($fh);
-
- unlink($filename0);
- unlink($filename);
-
- return @values;
-}
-
-sub smooth {
- my ( $self, $data_ref ) = @_;
- my @data = @{$data_ref};
- my @new_data;
- my $length = scalar @data;
- foreach ( my $i = 0 ; $i < $length ; $i++ ) {
- my $avg =
- sum( @data[ $i - 20 .. $i - 1, $i + 1 .. $i + 20 ] ) / 40;
- $new_data[$i] = $avg;
- }
- return \@new_data;
-}
-
-sub histogram {
- my ( $self, %data ) = @_;
-
- my @coverage = @{ $data{data} };
- my @return_coverage;
- for ( my $i = 0 ; $i < scalar(@coverage) ; $i++ ) {
- my $size = $coverage[$i];
- unless ($size) { $size = 0 }
- $return_coverage[$i] = [ $i, $size, "*" x $size ];
- }
- my %results = (
- 'Sheet1' => {
- headers => [qw(Base Count Plot)],
- data => \@return_coverage,
- }
- );
- return %results;
-}
-
-sub getCoverageDensity {
- my ( $self, %data ) = @_;
-
- # Load the sam file
- my $sam = Bio::DB::Sam->new(
- -bam => $data{bam},
- -fasta => $data{genome},
- -autoindex => 1,
- );
-
- # Get all alignments to our indicated FASTA file
- my @alignments = $sam->get_features_by_location(
- -seq_id => $data{fasta_id},
- -start => 1,
- -end => $data{fasta_length}
- );
-
- # Set up some variables
- my $coverage_density_max_value = 0;
- my ( @coverage_density, @read_starts, @read_ends );
-
- # including some for statistics
- my $stat_start = Statistics::Descriptive::Sparse->new();
- my $stat_end = Statistics::Descriptive::Sparse->new();
-
- # Looping over alignments
- for my $a (@alignments) {
- my $start = $a->start;
- my $end = $a->end;
-
- # Increment the number of reads starting there
- $read_starts[$start]++;
- $read_ends[$end]++;
-
- # And increment the coverage density
- foreach ( $start .. $end ) {
- $coverage_density[$_]++;
- if ( $coverage_density[$_] >
- $coverage_density_max_value )
- {
- $coverage_density_max_value =
- $coverage_density[$_];
- }
- }
- }
- my @start_data_for_stats;
- my @end_data_for_stats;
- for ( my $i = 0 ; $i < $data{fasta_length} ; $i++ ) {
- if ( $read_starts[$i] ) {
- push( @start_data_for_stats, $read_starts[$i] );
- }
- if ( $read_ends[$i] ) {
- push( @end_data_for_stats, $read_ends[$i] );
- }
- }
- $stat_start->add_data(@start_data_for_stats);
- $stat_end->add_data(@end_data_for_stats);
-
- # Lots of data to return
- use CPT::Analysis::PAUSE::ParsedSam;
- my $psam = CPT::Analysis::PAUSE::ParsedSam->new(
- coverage_density => \@coverage_density,
- read_starts => \@read_starts,
- read_ends => \@read_ends,
- max => $coverage_density_max_value,
- stats_start_max => $stat_start->max(),
- stats_end_max => $stat_end->max(),
- stats_start_mean => $stat_start->mean(),
- stats_end_mean => $stat_end->mean(),
- stats_start_standard_deviation =>
- $stat_start->standard_deviation(),
- stats_end_standard_deviation => $stat_end->standard_deviation(),
- );
- return $psam;
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Analysis::PAUSE - Library for use in PAUSE analysis
-
-=head1 VERSION
-
-version 1.96
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Analysis/PAUSE/ParsedSam.pm
--- a/cpt_psm_recombine/lib/CPT/Analysis/PAUSE/ParsedSam.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-package CPT::Analysis::PAUSE::ParsedSam;
-
-# ABSTRACT: Library for use in PAUSE analysis
-use strict;
-use warnings;
-use Moose;
-use SVG;
-
-has 'coverage_density' => ( is => 'rw', isa => 'ArrayRef' );
-has 'read_starts' => ( is => 'rw', isa => 'ArrayRef' );
-has 'read_ends' => ( is => 'rw', isa => 'ArrayRef' );
-has 'max' => ( is => 'rw', isa => 'Int' );
-has 'stats_start_max' => ( is => 'rw', isa => 'Num' );
-has 'stats_end_max' => ( is => 'rw', isa => 'Num' );
-has 'stats_start_mean' => ( is => 'rw', isa => 'Num' );
-has 'stats_end_mean' => ( is => 'rw', isa => 'Num' );
-has 'stats_start_standard_deviation' => ( is => 'rw', isa => 'Num' );
-has 'stats_end_standard_deviation' => ( is => 'rw', isa => 'Num' );
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Analysis::PAUSE::ParsedSam - Library for use in PAUSE analysis
-
-=head1 VERSION
-
-version 1.96
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Analysis/PAUSE/SVG.pm
--- a/cpt_psm_recombine/lib/CPT/Analysis/PAUSE/SVG.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,445 +0,0 @@
-package CPT::Analysis::PAUSE::SVG;
-
-# ABSTRACT: Library for use in PAUSE analysis
-use strict;
-use warnings;
-use Moose;
-use Data::Dumper;
-use List::MoreUtils qw(each_array);
-use SVG;
-
-has 'svg' => ( is => 'rw' );
-has 'width' => ( is => 'rw', isa => 'Int' );
-has 'height' => ( is => 'rw', isa => 'Int' );
-has 'vertical_offset' => ( is => 'rw', isa => 'Int' );
-has 'start_end_max_num' => ( is => 'rw', isa => 'Int' );
-has 'num_rows' => ( is => 'rw', isa => 'Int' );
-has 'row_size' => ( is => 'rw', isa => 'Int' );
-has 'row_width' => ( is => 'rw', isa => 'Int' );
-has 'x_border' => ( is => 'rw', isa => 'Int' );
-has 'y_border' => ( is => 'rw', isa => 'Int' );
-has 'line_height' => ( is => 'rw', isa => 'Int' );
-has 'inter_line_spacing' => ( is => 'rw', isa => 'Int' );
-has 'max' => ( is => 'rw', isa => 'Int' );
-has 'fasta_id' => ( is => 'rw', isa => 'Any' );
-
-sub setup {
- my ($self) = @_;
- $self->svg(
- SVG->new(
- width => $self->width(),
- height => $self->height(),
- )
- );
-}
-
-sub add_header {
- my ( $self, @refs ) = @_;
-
- $self->plot_title( 'Plot of ' . $self->fasta_id() );
-
- my $i = 0;
- foreach (@refs) {
- my @subrefs = @{$_};
- foreach (@subrefs) {
- $i++;
- my %d = %{$_};
- $self->plot_key( $d{name}, $d{line}, $d{fill}, $i );
- }
- }
- $self->vertical_offset( $self->vertical_offset() - ( $i - 1 ) * 20 );
-}
-
-my $global_pline_idx = 0;
-
-sub plot_track {
- my ( $self, $points_ref, $stroke, $fill, $id ) = @_;
-
- $global_pline_idx++;
- $self->svg()->polyline(
- %{$points_ref},
- id => 'pline_' . $id . '-' . $global_pline_idx,
- style => {
- 'fill-opacity' => .5,
- 'stroke' => $stroke,
- 'fill' => $fill,
- }
- )
-
-}
-
-sub make_scale {
- my ( $self, $i, $start, $stop ) = @_;
-
- # Left axis label, must be rotated
- my $tmp_x = $self->fix_x_value(-30); #$self->x_border()-30;
- my $tmp_y = $self->fix_y_value($i) + 50;
- $self->svg()->text(
- id => 'left_side_label_row_' . $i,
- x => $tmp_x - 20,
- y => $tmp_y - 20,
- 'font-family' => 'Helvetica, sans-serif',
- 'transform' => sprintf( 'rotate(-90 %s %s)', $tmp_x, $tmp_y ),
- )->cdata('Start/End Hit Count Scale');
-
- # Right axis label, must be rotated
- $tmp_x = $self->fix_x_value( $self->row_width() + 60 );
- $tmp_y = $self->fix_y_value($i);
- $self->svg()->text(
- id => 'right_side_label_row_' . $i,
- x => $tmp_x - 20,
- y => $tmp_y,
- 'font-family' => 'Helvetica, sans-serif',
- 'transform' => sprintf( 'rotate(-90 %s %s)', $tmp_x, $tmp_y ),
- )->cdata('Coverage Density');
-
- # Horizontal increments
- for ( my $k = -4 ; $k <= 4 ; $k++ ) {
-
- # Left side label
- my $y_position = $k / 4 * $self->line_height();
- $self->svg()->text(
- id => sprintf( 'label_left_side_row_%s_%s', $i, $k ),
- x => $self->fix_x_value(-30),
- y => $self->fix_y_value( $i, $y_position ),
- 'font-family' => 'Helvetica, sans-serif',
- )->cdata( int( $self->start_end_max_num() * abs( $k / 4 ) ) );
-
- # Right side label
- $self->svg()->text(
- id => sprintf( 'label_right_side_row_%s_%s', $i, $k ),
- x => $self->fix_x_value( $self->row_width() + 10 ),
- y => $self->fix_y_value( $i, $y_position ),
- 'font-family' => 'Helvetica, sans-serif',
- )->cdata( int( $self->max() * abs( $k / 4 ) ) );
-
- # Vertical lines
- $self->svg()->line(
- x1 => $self->fix_x_value( $self->row_width() ),
- x2 => $self->fix_x_value(0),
- y1 => $self->fix_y_value( $i, $y_position ),
- y2 => $self->fix_y_value( $i, $y_position ),
- id => sprintf( 'vertical_increment_row_%s_%s', $i, $k ),
- opacity => .25,
- stroke => 'rgb(0,0,0)',
- 'stroke-width' => '2',
- );
- }
-
- # Vertical Increments
- my $number_of_increments = 10;
- for (
- my $k = 0 ;
- $k <= $self->row_width() ;
- $k += ( $self->row_width() / $number_of_increments )
- )
- {
-# We get % of way across (k/num_inc) and we multiply by the width value, to get % of width which we adjust with start to get correct value
- my $b =
- ( $k / $self->row_width() ) * ( $stop - $start ) + $start;
- my $kb = $b / 1000;
- $self->svg()->text(
- id =>
- sprintf( 'vertical_line_label_row_%s_%s', $i, $k ),
- x => $self->fix_x_value($k),
- y =>
- $self->fix_y_value( $i, $self->line_height() + 20 ),
- 'font-family' => 'Helvetica, sans-serif',
- )->cdata( ($kb) . ' kb' );
-
- $self->svg()->line(
- x1 => $self->fix_x_value($k),
- x2 => $self->fix_x_value($k),
- y1 => $self->fix_y_value( $i, -$self->line_height() ),
- y2 => $self->fix_y_value( $i, $self->line_height() ),
- id => sprintf( 'vertical_line_row_%s_%s', $i, $k ),
- opacity => .5,
- stroke => 'rgb(0,0,0)',
- 'stroke-width' => '1',
- );
- }
-}
-
-sub plot_title {
- my ( $self, $string ) = @_;
- $self->svg()->text(
- id => 'label_plot_title',
- x => $self->x_border(),
- y => 50 + $self->vertical_offset(),
- 'font-family' => 'Helvetica, sans-serif',
- 'font-size' => '150%',
- )->cdata($string);
- $self->vertical_offset( $self->vertical_offset() + 25 );
-}
-
-sub plot_key {
- my ( $self, $text, $stroke, $colour, $i ) = @_;
-
- $self->svg()->rectangle(
- x => $self->x_border(),
- y => 50 + $self->vertical_offset() - 15,
- width => 15,
- height => 15,
- id => 'label_key_example' . $i,
- 'fill-opacity' => .5,
- 'stroke' => $stroke,
- 'fill' => $colour,
- );
- $self->svg()->text(
- id => 'label_key_string' . $i,
- x => $self->x_border() + 20,
- y => 50 + $self->vertical_offset(),
- 'font-family' => 'Helvetica, sans-serif',
- )->cdata($text);
- $self->vertical_offset( $self->vertical_offset + 20 );
-}
-
-sub xmlify {
- my ($self) = @_;
- return $self->svg()->xmlify();
-}
-
-sub x_values_for_range_scaled {
- my ( $self, $start, $end, $pieces ) = @_;
- my @vals = ($start);
- my $by = ( $end - $start ) / $pieces;
- ## For all values from the x_border to xborder+row_width, add a value of row_width split/row_size (i.e., how far for EACH INDIVIDUAL value)
- for ( my $i = $start ; $i < $end ; $i += $by ) {
- push( @vals, $i );
- }
- push( @vals, $end );
- return @vals;
-}
-
-sub fix_x_values {
- my ( $self, @values ) = @_;
- return map { $self->fix_x_value($_) } @values;
-}
-
-sub fix_x_value {
- my ( $self, $val ) = @_;
- return $val + $self->x_border();
-}
-
-sub fix_y_value {
- my ( $self, $i, $val ) = @_;
- return (
- $self->vertical_offset() + $val - $self->line_height() + (
- ( ( 2 + $i ) * $self->line_height() ) +
- ( $i * $self->inter_line_spacing() ) +
- $self->y_border()
- )
- );
-}
-
-sub fix_all_y_values {
- my ( $self, $i, @arrays_to_fix, ) = @_;
- for ( my $j = 0 ; $j < scalar @arrays_to_fix ; $j++ ) {
-
- # For each array in postive_y (AoA)
- #
- # we cast to array, then we map this, then we have this
- # in an anonymous array which means we can just
- # replace. This is probably not as efficient as looking
- # at every value directly and doing "in place"
- # replacement, but I don't know how that would be
- # written here...
- $arrays_to_fix[$j] =
- [ map { $self->fix_y_value( $i, $_ ) }
- @{ $arrays_to_fix[$j] } ];
- }
- return @arrays_to_fix;
-}
-
-sub copy_data {
- my ( $self, $start, $stop, $data_to_ref, $data_from_ref, $max ) = @_;
-
- # Copy data from the original array to the new one, transforming out
- # the subset of interest. This is done across an AoA
- my @data_to = @{$data_to_ref};
- my @data_from = @{$data_from_ref};
- for ( my $k = 0 ; $k < scalar @data_from ; $k++ ) {
- foreach ( my $j = $start ; $j < $stop ; $j++ )
- { #1 to 10_000 in the genome
- if ( defined ${ $data_from[$k] }[$j] ) {
- push(
- @{ $data_to[$k] },
- -(
- $self->line_height() *
- ${ $data_from[$k] }[$j] /
- $max
- )
- );
- }
- else {
- push( @{ $data_to[$k] }, 0 );
- }
- }
- }
- return @data_to;
-}
-
-sub plot_individual_row {
- my ( $self, $start, $stop, $i, $regular_ref, $rescale_ref ) = @_;
-
- my @regular = map { ${$_}{data} } @{$regular_ref};
- my @rescale = map { ${$_}{data} } @{$rescale_ref};
- my @regular_y;
- my @rescale_y;
- ## Ensure we duplicate the number of arrays.
- foreach (@regular) {
- push( @regular_y, [] );
- }
- foreach (@rescale) {
- push( @rescale_y, [] );
- }
-
- # Determine bounds of row
- $self->push_all( \@regular_y, 0 );
- $self->push_all( \@rescale_y, 0 );
-
- @regular_y =
- $self->copy_data( $start, $stop, \@regular_y, \@regular,
- $self->start_end_max_num() );
- @rescale_y =
- $self->copy_data( $start, $stop, \@rescale_y, \@rescale,
- $self->max() );
-
- #print @rescale_y;
-
- # Set up our X values
- my @x_values = $self->fix_x_values(
- $self->x_values_for_range_scaled(
- 0, $self->row_width(), ( $stop - $start )
- )
- );
-
-#my @x_values_rescale = $self->fix_x_values( $self->x_values_for_range_scaled(0, $self->row_width(), ( $stop - $start )));
-
- $self->push_all( \@regular_y, 0 );
- $self->push_all( \@rescale_y, 0 );
-
- # Fix the ys
- @regular_y = $self->fix_all_y_values( $i, @regular_y );
- @rescale_y = $self->fix_all_y_values( $i, @rescale_y );
-
- # Prepare our styling
- my @regular_line = map { ${$_}{line} } @{$regular_ref};
- my @rescale_line = map { ${$_}{line} } @{$rescale_ref};
- my @regular_fill = map { ${$_}{fill} } @{$regular_ref};
- my @rescale_fill = map { ${$_}{fill} } @{$rescale_ref};
-
- # Add data to plot
- $self->svg_add_track(
- \@x_values, \@regular_y, \@regular_line,
- \@regular_fill, "$i-$start-$stop"
- );
- $self->svg_add_track(
- \@x_values, \@rescale_y, \@rescale_line,
- \@rescale_fill, "$i-$start-$stop"
- );
-
- # scale
- $self->make_scale( $i, $start, $stop );
-}
-
-sub debug {
- my ( $self, $title, @arrs ) = @_;
- print "=" x 16 . "\n";
- foreach (@arrs) {
- my @arr = @{$_};
- printf "Array %s : %s\n", $title, scalar @arr;
- print "\t"
- . join( ',',
- map { sprintf( '%-10d', int($_) ) } @arr[ 0 .. 10 ] )
- . "\n";
- my $a = scalar(@arr) - 11;
- my $b = scalar(@arr) - 1;
- print "\t"
- . join( ',',
- map { sprintf( '%-10d', int($_) ) } @arr[ $a .. $b ] )
- . "\n";
- }
-}
-
-sub svg_add_track {
- my (
- $self, $x_values_ref, $data_ref,
- $line_ref, $fill_ref, $base_track_id
- ) = @_;
- my @x_values = @{$x_values_ref};
- my @data = @{$data_ref};
- my @lines = @{$line_ref};
- my @fills = @{$fill_ref};
-
- my $it = each_array( @data, @lines, @fills );
- while ( my ( $pry, $prl, $prf ) = $it->() ) {
- my $plot_data = $self->svg()->get_path(
- x => \@x_values,
- y => $pry,
- -type => 'polyline',
- -closed => 'false' #specify that the polyline is closed.
- );
- $self->plot_track( $plot_data, $prl, $prf, "$base_track_id" );
- }
-}
-
-sub plot_data {
- my ( $self, %d ) = @_;
- $self->add_header( $d{regular}, $d{rescale} );
-
- ##loop through rows
- foreach ( my $i = 0 ; $i < $self->num_rows() ; $i++ ) {
- my ( $start, $stop ) =
- ( $i * $self->row_size(), ( $i + 1 ) * $self->row_size() );
- $self->plot_individual_row( $start, $stop, $i, $d{regular},
- $d{rescale}, );
- }
-}
-
-sub plot_data_subset {
- my ( $self, %d ) = @_;
-
- $self->add_header( $d{regular}, $d{rescale} );
- ##loop through rows
- $self->plot_individual_row( $d{from}, $d{to}, 0, $d{regular},
- $d{rescale}, );
-}
-
-sub push_all {
- my ( $self, $array_ref, @values ) = @_;
- foreach ( @{$array_ref} ) {
- push( @{$_}, @values );
- }
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Analysis::PAUSE::SVG - Library for use in PAUSE analysis
-
-=head1 VERSION
-
-version 1.96
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Analysis/TerL.pm
--- a/cpt_psm_recombine/lib/CPT/Analysis/TerL.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,517 +0,0 @@
-package CPT::Analysis::TerL;
-
-# ABSTRACT: Guess phage packaging strategy based on homology to terminases (TerL) of phages with known packaging strategies
-
-use strict;
-use warnings;
-use Data::Dumper;
-use autodie;
-use Moose;
-use Bio::SearchIO;
-use File::ShareDir;
-use File::Spec;
-
-has 'hmmer_evalue_cutoff' => ( is => 'rw', isa => 'Int' );
-has 'blast_evalue_cutoff' => ( is => 'rw', isa => 'Int' );
-has 'blast_dice_cutoff' => ( is => 'rw', isa => 'Int' );
-
-has 'search_hmmer' => ( is => 'rw', isa => 'Bool' );
-has 'search_blast' => ( is => 'rw', isa => 'Bool' );
-
-has 'data_dir' => ( is => 'rw', isa => 'Any' );
-
-has 'awk_string' => (
- is => 'ro',
- isa => 'Str',
- default =>
-'BEGIN{print "#row,query id,subject id,evalue,dice" } {qid=$1;sid=$2;percent_identical=$3;query_length=$4;subject_length=$5;evalue=$6;dice=(2*percent_identical*subject_length/ ( subject_length + query_length ) );printf("%d,%s,%s,%s,%s\n",FNR, qid, sid, dice, evalue);}'
-);
-
-has 'input_file' => ( is => 'rw', isa => 'Str' );
-
-my @column_max = ( 20, 10, 20, 10, 20, 60 );
-my %hits;
-
-sub run {
- my ( $self, %data ) = @_;
-
- #$self->prepare(%data);
- my $dir = '/galaxy/tool-data/' # File::ShareDir::dist_dir('CPT-Analysis-TerL');
- $self->data_dir($dir);
- $self->input_file( $data{input_file} );
-
- if ( $self->search_hmmer() ) {
- $self->hmmer();
- }
- if ( $self->search_blast() ) {
- $self->blast($self);
- }
-
- return $self->guess();
-}
-
-sub hmmer {
- my ($self) = @_;
-
- use Term::ProgressBar 2.00;
- my $progress = Term::ProgressBar->new(
- {
- name => 'HMMER',
- count => 100,
- ETA => 'linear',
- }
- );
- $progress->max_update_rate(1);
-
- my $hmmer_db_dir =
- File::Spec->catdir( $self->data_dir(), 'db', 'hmmer' );
- my @hmmer_dbs = glob( File::Spec->catfile( $hmmer_db_dir, "*.hmm" ) );
-
- my $i = 0;
- foreach (@hmmer_dbs) {
- $progress->update( 100 * ( $i++ ) / scalar @hmmer_dbs );
- my $db_name = substr( $_, rindex( $_, '/' ) + 1, -4 );
- my $output_filename = sprintf(
- '%s.%s.out',
- substr(
- $self->input_file(), 0,
- rindex( $self->input_file(), '.' )
- ),
- $db_name
- );
- my $query = sprintf( 'hmmsearch %s %s > %s',
- $_, $self->input_file(), $output_filename );
- system($query);
-
- my $in = Bio::SearchIO->new(
- -format => 'hmmer',
- -file => $output_filename
- );
- while ( my $result = $in->next_result ) {
- while ( my $hit = $result->next_hit ) {
- while ( my $hsp = $hit->next_hsp ) {
- my ( $from, $to ) =
- ( $result->query_name, $hit->name );
- unless ( $hits{$to}{$from} ) {
- $hits{$to}{$from} = [];
- }
- push(
- $hits{$to}{$from},
- {
- 'type' => 'hmmer',
- 'data' => {
- 'evalue' => (
- $hsp
- ->evalue
- eq '0'
- ? '0.0'
- : $hsp
- ->evalue
- ),
- }
- }
- );
- }
- }
- }
- unlink($output_filename);
- }
- $progress->update(100);
-}
-
-sub blast {
- my ($self) = @_;
-
- use Term::ProgressBar 2.00;
- my $progress = Term::ProgressBar->new(
- {
- name => 'Blast',
- count => 100,
- ETA => 'linear',
- }
- );
- $progress->max_update_rate(1);
-
- my $blast_db_dir =
- File::Spec->catdir( $self->data_dir(), 'db', 'blast' );
- my @blast_dbs =
- map { substr( $_, 0, -4 ) }
- glob( File::Spec->catfile( $blast_db_dir, "*.phr" ) );
-
- my $i = 0;
-
- #my %hits;
-
- foreach my $blast_db (@blast_dbs) {
- $progress->update( 100 * ( $i++ ) / scalar(@blast_dbs) );
- my $output_str =
- substr( $blast_db, rindex( $blast_db, '/' ) + 1 ) . '.csv';
- my $query = sprintf(
-'psiblast -query %s -db %s -evalue %s -outfmt "6 qseqid sseqid pident qlen slen evalue" | awk \'%s\' > %s',
- $self->input_file(), $blast_db, '1e-5',
- $self->awk_string(), $output_str );
- system($query);
- open( my $tmpfh, '<', $output_str );
- while (<$tmpfh>) {
- chomp $_;
- if ( $_ !~ /^#/ ) {
- my @line = split( /,/, $_ );
- unless ( $hits{ $line[1] }{ $line[2] } ) {
- $hits{ $line[1] }{ $line[2] } = [];
- }
- push(
- $hits{ $line[1] }{ $line[2] },
- {
- 'type' => 'psiblast',
- 'data' => {
- 'evalue' => $line[4],
- 'dice' => $line[3],
- }
- }
- );
- }
- }
- close($tmpfh);
- unlink($output_str);
-
- }
- $progress->update(100);
-}
-
-sub guess {
- my ($self) = @_;
-
- open( my $groupings_fh,
- '<',
- File::Spec->catfile( $self->data_dir(), 'groupings.tsv' ) );
-
- # Load groupings.tsv into memory
- my %data;
- while (<$groupings_fh>) {
- if ( $_ !~ /^#/ ) {
- chomp $_;
- my ( $major, $minor, $hit ) = split( /\t/, $_ );
- unless ( $data{$major}{$minor} ) {
- $data{$major}{$minor} = [];
- }
- push( $data{$major}{$minor}, $hit );
- }
- }
-
- # Create a reverse lookup table
- my %rdata;
- foreach my $i ( keys %data ) {
- foreach my $j ( keys %{ $data{$i} } ) {
- foreach my $k ( @{ $data{$i}{$j} } ) {
- if ( defined($k) ) {
- $rdata{$k} = [ $i, $j ];
- }
- else {
- print "$i $j\n";
- }
- }
- }
- }
-
- # Table printing stuff
- my @header = (
- 'Major Category',
- 'Major hits',
- 'Minor Category',
- 'Minor hits',
- 'Analysis',
- 'Evidence Type',
- 'Evidence'
- );
- my %output;
-
- # Loop across the input keys
- foreach my $input_key ( keys %hits ) {
- my %guesses;
- my %guess_evidence;
-
- # And across all of the hits that the query hit to
- foreach my $against ( keys %{ $hits{$input_key} } ) {
-
- # We look at the evidence
- my @evidence = @{ $hits{$input_key}{$against} };
-
- my ( $type_major, $type_minor );
- if ( $rdata{$against} ) {
- ( $type_major, $type_minor ) =
- @{ $rdata{$against} };
- }
- else {
- ( $type_major, $type_minor ) = (
- substr(
- $against, 0,
- rindex( $against, '_' )
- ),
- substr(
- $against,
- rindex( $against, '_' ) + 1
- )
- );
- }
-
- # Prepare hashes.
- unless ( $guesses{$type_major} ) {
- $guesses{$type_major} = ();
- }
- unless ( $guesses{$type_major}{$type_minor} ) {
- $guesses{$type_major}{$type_minor} = ();
- }
-
- # Loop across the evidence
- foreach my $piece_of_evidence (@evidence) {
-
- # Here is an example piece of evidence
- # 'GK_Gilmour_Gene43' => {
- # 'SP18' => [
- # {
- # 'data' => {
- # 'evalue' => '2e-08',
- # 'dice' => '23.8627'
- # },
- # 'type' => 'psiblast'
- # }
- # ],
- #
- if ( $type_major !~ /subject i/ ) {
- my %piece = %{$piece_of_evidence};
- if (
- $self->validate_evidence(
- $piece_of_evidence) > 0
- )
- {
- $guess_evidence{$type_major}++;
- $guess_evidence{
- "$type_major$type_minor"
- }++;
-
- # If it's not defined, set up sub arrays.
- unless ( $guesses{$type_major}
- {$type_minor}
- { $piece{'type'} } )
- {
- if ( $piece{'type'}
- eq 'psiblast' )
- {
- $guesses{
- $type_major
- }{$type_minor}
- {
- $piece{
-'type'
- }
- }
- = {
- 'evalue'
- => [],
- 'dice'
- => []
- };
- }
- else {
- $guesses{
- $type_major
- }{$type_minor}
- {
- $piece{
-'type'
- }
- }
- = { 'evalue'
- => []
- };
- }
- }
-
- # If it's zero, correct to zero.
- if ( $piece{'data'}{'evalue'}
- eq '0.0' )
- {
- $piece{'data'}{'evalue'}
- = '0.0';
- }
-
- # Add our evalue
- push(
- $guesses{$type_major}
- {$type_minor}
- { $piece{'type'} }
- {'evalue'},
- $piece{'data'}{'evalue'}
- );
-
- # And if psiblast, add dice
- if ( $piece{'type'} eq
- 'psiblast' )
- {
- push(
- $guesses{
- $type_major
- }{$type_minor}
- {
- $piece{
-'type'
- }
- }{'dice'},
- $piece{'data'}
- {'dice'}
- );
- }
- }
- }
- }
- }
-
- my @output_sheet;
-
- foreach my $major ( keys %guesses ) {
- if ( $guess_evidence{$major} ) {
- foreach my $minor ( keys %{ $guesses{$major} } )
- {
- if ( $guess_evidence{"$major$minor"} ) {
- foreach my $evidence_category (
- keys %{
- $guesses{$major}
- {$minor}
- }
- )
- {
- if ( $evidence_category
- ne 'evidence' )
- {
- # things like evalue, dice
- my %hits = %{
- $guesses{
- $major
- }{
- $minor
- }{
- $evidence_category
- }
- };
- foreach
- my $subtype (
- keys
- %hits )
- { # should be evalue, dice
- push(
- @output_sheet,
- [
- $major,
- $guess_evidence{
- $major
- }
- ,
- $minor,
- $guess_evidence{
-"$major$minor"
- }
- ,
- $evidence_category,
- $subtype,
- join
- (
-',',
- @{
- $hits{
- $subtype
- }
- }
- )
- ]
- );
- }
- }
- }
- }
- }
- }
- }
-
- if ( !scalar @output_sheet ) {
- @output_sheet = ( ['No evidence above threshold'], );
- }
- $output{$input_key} = {
- header => \@header,
- data => \@output_sheet,
- };
- }
- return \%output;
-}
-
-sub validate_evidence {
- my ( $self, $piece_of_evidence ) = @_;
- my %piece = %{$piece_of_evidence};
-
- #my ($self, $type, $subtype, $value) = @_;
- # {
- # 'data' => {
- # 'evalue' => '2e-08',
- # 'dice' => '23.8627'
- # },
- # 'type' => 'psiblast'
- # }
- if ( $piece{type} eq 'hmmer' ) {
- my $value = $piece{data}{evalue};
- if ( $value eq '0.0' || $value eq '0' ) {
- return 1;
- }
- elsif ( !defined($value) || $value eq '' ) {
- return 0;
- }
- else {
- return ( log($value) < $self->hmmer_evalue_cutoff() )
- ; # -64
- }
- }
- elsif ( $piece{type} eq 'psiblast' ) {
- my $evalue = $piece{data}{evalue};
- my $dice = $piece{data}{dice};
-
- my $evalue_return = 0;
- if ( $evalue eq '0.0' || $evalue eq '0' ) {
- $evalue_return = 1;
- }
- else {
- $evalue_return =
- ( log($evalue) < $self->blast_evalue_cutoff() ); #-140
- }
-
- return $evalue_return
- && ( $dice > $self->blast_dice_cutoff() ); # 30
- }
-}
-
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Analysis::TerL - Guess phage packaging strategy based on homology to terminases (TerL) of phages with known packaging strategies
-
-=head1 VERSION
-
-version 1.96
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Auth.pm
--- a/cpt_psm_recombine/lib/CPT/Auth.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,84 +0,0 @@
-package CPT::Auth;
-use strict;
-use warnings;
-use autodie;
-use Moose;
-
-has DN => (
- is => 'rw',
- isa => 'Str',
-);
-
-
-sub check_credentials {
- my ( $self, %params ) = @_;
- use Net::LDAPS;
- print STDERR "Connecting to LDAP\n";
- my $base = 'dc=tamu,dc=edu';
- my $ldap = Net::LDAPS->new('00-ldap-biobio.tamu.edu') or die "$@";
- my $mesg = $ldap->bind; # an anonymous bind
-
- my $username = $params{'username'};
- my $password = $params{'password'};
-
- $mesg = $ldap->search( # perform a search
- base => $base,
- filter => "uid=$username",
- );
- my $max = $mesg->count;
-
- # Should we exit early?
- for ( my $i = 0 ; $i < $max ; $i++ ) {
- my $entry = $mesg->entry($i);
- $self->DN() = $entry->dn();
- }
- $mesg = $ldap->bind( $self->DN(), password => $password );
- if ( $mesg->error() eq 'Success' ) {
- return 1;
-
- #print "Succesfully logged you in";
- }
- else {
- return 0;
-
- #print "Error: ";
- #print $mesg->error();
- }
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Auth
-
-=head1 VERSION
-
-version 1.99.4
-
-=head2 check_credentials
-
- $cptauth->check_credentials(username=>'J.doe',password=>$password);
-
-return 1 or 0, based on success or failure, respectively.
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio.pm
--- a/cpt_psm_recombine/lib/CPT/Bio.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,337 +0,0 @@
-package CPT::Bio;
-use Moose;
-use strict;
-use warnings;
-use autodie;
-use CPT::FiletypeDetector;
-use CPT::BioData;
-my $bd = CPT::BioData->new();
-
-my $filetype = CPT::FiletypeDetector->new();
-
-has 'var_translate' => ( is => 'rw', isa => 'Bool');
-has 'var_header' => ( is => 'rw', isa => 'Bool');
-has codonTable => (
- is => 'rw',
- isa => 'Any',
- default => sub {
- $bd->getTranslationTable(11)
- },
-);
-
-sub set_codon_table {
- my ($self, $num) = @_;
- $self->codonTable($bd->getTranslationTable($num));
-}
-
-
-sub _getFeatureTag {
- my ( $self, $feat, $tag ) = @_;
- if(! defined($feat)){
- warn "Undefined feature";
- }
- return $feat->has_tag($tag)
- ? ( join( ',', $feat->get_tag_values($tag) ) )
- : '';
-}
-
-sub _getIdentifier {
- my ( $self, $feat ) = @_;
- my $line;
- if ( ref $feat eq 'Bio::Seq::RichSeq' || ref $feat eq 'Bio::Seq' ) {
- return $feat->display_id;
- }
- else {
- my $locus_tag = $self->_getFeatureTag( $feat, 'locus_tag' );
- if ($locus_tag) {
- return $locus_tag;
- }
- my $gene = $self->_getFeatureTag( $feat, 'gene' );
- if ($gene) {
- return $gene;
- }
- my $product = $self->_getFeatureTag( $feat, 'product' );
- if ($product) {
- return $product;
- }
- }
- return sprintf("%s_%s_%s", $feat->start(), $feat->end(), ($feat->strand() == 1 ? 'sense':'antisense'));
-}
-
-
-sub requestCopy {
- my ( $self, %data ) = @_;
- use Bio::SeqIO;
- if ($data{'file'} ) {
- my ($guessed_type) = $filetype->detect( $data{'file'} );
- my $seqio = Bio::SeqIO->new(
- -file => $data{'file'},
- -format => $guessed_type
- );
- my @results;
- while ( my $seqobj = $seqio->next_seq() ) {
- return \$seqobj;
- }
- }
- else {
- die "No file specified";
- }
-}
-
-
-sub getSeqIO {
- my ( $self, $file ) = @_;
- use Bio::SeqIO;
- if ($file ) {
- my ($guessed_type) = $filetype->detect( $file );
- my $seqio = Bio::SeqIO->new(
- -file => $file,
- -format => $guessed_type
- );
- return $seqio;
- }
- else {
- die "No file specified";
- }
-}
-
-
-sub parseFile {
- my ( $self, %data ) = @_;
- use Bio::SeqIO;
-
- my ($guessed_type) = $filetype->detect( $data{'file'} );
- my $seqio = Bio::SeqIO->new(
- -file => $data{'file'},
- -format => $guessed_type
- );
-
- # Are we to translate this
- $self->var_translate(defined($data{translate}) && $data{translate});
- $self->var_header(defined($data{header}) && $data{header});
-
- my @results;
- if ( not defined $data{'subset'} ) {
- $data{'subset'} = 'all';
- }
- while ( my $seqobj = $seqio->next_seq() ) {
- if (
- (ref $data{'subset'} ne 'ARRAY'
- && $data{'subset'} eq 'whole' ) # Want the whole thing for a richseq
- ||
- (ref $seqobj eq 'Bio::Seq' || ref $seqobj eq 'Bio::Seq::fasta')
- # or it's a fasta type sequence
- )
- {
- push( @results, $self->handle_seq($seqobj));
- }
- else #data subset eq sometag
- {
- my %wanted_tags;
- if ( ref $data{'subset'} eq 'ARRAY' ) {
- %wanted_tags =
- map { $_ => 1 } @{ $data{'subset'} };
- }
- else {
- $wanted_tags{ $data{'subset'} }++;
- }
- foreach my $feat ( $seqobj->get_SeqFeatures ) {
- if (
- $wanted_tags{ $feat->primary_tag }
- || ( $wanted_tags{'all'}
- && $feat->primary_tag ne
- "source" )
- )
- {
- push( @results, $self->handle_seq($feat));
- }
- }
- }
- }
- if ( $data{'callback'} ) {
- $data{'callback'}->( \@results );
- }
- else {
- return \@results;
- }
-}
-
-sub handle_seq {
- my ($self, $obj) = @_;
-
- my @line;
- if ( $self->var_header() ){
- $line[0] = '>' . $self->_getIdentifier($obj);
- }
-
- # Get our sequence
- $line[1] = $self->intelligent_get_seq($obj);
-
- if ( $self->var_translate() ) {
- $line[1] = $self->translate($line[1]);
- }
- return \@line;
-}
-
-sub intelligent_get_seq {
- my ($self, $obj, %extra) = @_;
- # Top level, e.g., fasta/gbk file, "extra" doesn't apply to these
- if ( ref $obj eq 'Bio::Seq::RichSeq' || ref $obj eq 'Bio::Seq' ) {
- return $obj->seq;
- }else{
- return $self->get_seq_from_feature($obj, %extra);
- }
-}
-sub get_seq_from_feature {
- my ($self, $feat, %extra) = @_;
- my $seq;
- my $l;
- if($extra{parent}){
- $l = $extra{parent}->length();
- }
-
- if($extra{upstream}){
- if($feat->strand < 0){
- my $y = $feat->end + 1;
- my $z = $feat->end + $extra{upstream};
- if($y < $l){
- if($z > $l){
- $z = $l;
- }
- $seq .= $extra{parent}->trunc($y, $z)->revcom->seq;
- }
- }else{
- my $y = $feat->start - $extra{upstream};
- my $z = $feat->start - 1;
- if($z > 0){
- if($y < 1){
- $y = 1;
- }
- $seq .= $extra{parent}->trunc($y, $z)->seq;
- }
- }
- }
- if(ref($feat->location) eq 'Bio::Location::Simple'){
- $seq .= $feat->seq->seq();
- }else{
- $seq .= $feat->spliced_seq->seq();
- }
- if($extra{downstream}){
- if($feat->strand < 0){
- my $y = $feat->start - $extra{downstream};
- my $z = $feat->start - 1;
- if($z > 0){
- if($y < 1){
- $y = 1;
- }
- $seq .= $extra{parent}->trunc($y, $z)->revcom->seq;
- }
- }else{
- my $y = $feat->end + 1;
- my $z = $feat->end + $extra{downstream};
- if($y < $l){
- if($z > $l){
- $z = $l;
- }
- $seq .= $extra{parent}->trunc($y, $z)->seq;
- }
- }
- }
-
- return $seq;
-}
-
-
-sub translate {
- my ($self, $seq) = @_;
- if($seq =~ /^[ACTGN]+$/){
- my %ct = %{$self->codonTable};
- $seq = join( '' , map { if($ct{$_}){ $ct{$_} }else{ () } } unpack("(A3)*", $seq));
- }
- return $seq;
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio
-
-=head1 VERSION
-
-version 1.99.4
-
-=head2 _getFeatureTag
-
- my $tag = $libCPT->_getFeatureTag($feature,'note');
-
-returns all values of the given tag, joined with ','.
-
-=head2 requestCopy
-
- my $seqobj = $libCPT->requestCopy('file'=>'test.gbk');
-
-requests a 'copy' of a given Bio::SeqIO file, which allows for addition of features before writing out to file.
-
-=head2 getSeqIO
-
- my $seqio = $libCPT->getSeqIO('file'=>'test.gbk');
-
-requests a 'copy' of a given Bio::SeqIO file, which allows for addition of features before writing out to file.
-
-=head2 parseFile
-
- $libCPT->parseFile(
- 'file' => $options{'file'},
- 'callback' => \&func,
- 'translate' => 1,
- 'header' => 1,
- 'subset' => ['CDS', $options{'tag'}],
- );
-
-Arguably the most important function in this library, wraps a lot of functionality in a clean wrapper, since most of the scripts we have are written around data munging.
-
-=over 4
-
-=item *
-
-file - the Bio::SeqIO file to process
-
-=item *
-
-callback - the function to send our data to. Done all at once, in an array
-
-=item *
-
-translate - should we translate the sequence to amino acids if it's not already.
-
-=item *
-
-subset - either "whole", a valid tag, or an array of valid tags
-
-=item *
-
-header - Do we want a header (FASTA) with our result set
-
-=back
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/DataSource.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/DataSource.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-package CPT::Bio::DataSource;
-use Moose::Role;
-use strict;
-use warnings;
-use autodie;
-
-requires 'getSeqIO';
-
-
-sub test {
-
-}
-
-no Moose::Role;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::DataSource
-
-=head1 VERSION
-
-version 1.99.4
-
-=head2 test
-
- $ds->test()
-
-empty method for now
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/DataSource/Chado.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/DataSource/Chado.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,125 +0,0 @@
-package CPT::Bio::DataSource::Chado;
-no warnings;
-use Moose;
-with 'CPT::Bio::DataSource';
-
-has 'host' => ( is => 'rw', isa => 'Str' );
-has 'pass' => ( is => 'rw', isa => 'Str' );
-has 'user' => ( is => 'rw', isa => 'Str' );
-has 'name' => ( is => 'rw', isa => 'Str' );
-has 'port' => ( is => 'rw', isa => 'Str' );
-
-has 'landmark' => ( is => 'rw', isa => 'Str' );
-has 'organism' => ( is => 'rw', isa => 'Str' );
-
-
-sub getSeqIO {
- my ($self) = @_;
- require CPT::Chado::GMOD_Conf;
-
- my $db = Bio::DB::Das::Chado->new(
- -dsn => sprintf( 'dbi:Pg:dbname=%s;host=%s;port=%s', $self->name(), $self->host(), $self->port() ),
- -user => $self->user(),
- -pass => $self->pass(),
- -organism => $self->organism(),
- -inferCDS => 1,
-
- );
-
- # Get a list of "segments". Essentially (seqlen IS NOT NULL)
- my @segments = $db->segment( -name => $self->{'landmark'} );
-
- # TODO: Need to have a fallback method
- # Should only produce ONE since we specify landmark exactly
- foreach my $segment (@segments) {
- my $stream = $segment->get_feature_stream();
- use Bio::Seq;
- my $seq_obj = Bio::Seq->new(
- -seq => $segment->seq->seq(),
- -display_id => $segment->id()
- );
- use Bio::SeqFeature::Generic;
- while ( my $feat = $stream->next_seq ) {
-
- # In an IDEAL world we'd just do $seq_obj->add_SeqFeature($feat);
- #
- # HOWEVER.
- #
- # ------------- EXCEPTION: Bio::Root::NotImplemented -------------
- # MSG: Abstract method "Bio::DB::Das::Chado::Segment::Feature::attach_seq" is not implemented by package Bio::DB::Das::Chado::Segment::Feature.
- # This is not your fault - author of Bio::DB::Das::Chado::Segment::Feature should be blamed!
- # STACK: Error::throw
- # STACK: Bio::Root::Root::throw /usr/local/share/perl/5.14.2/Bio/Root/Root.pm:472
- # STACK: Bio::Root::RootI::throw_not_implemented /usr/local/share/perl/5.14.2/Bio/Root/RootI.pm:748
- # STACK: Bio::DB::Das::Chado::Segment::Feature::attach_seq /usr/local/share/perl/5.14.2/Bio/DB/Das/Chado/Segment/Feature.pm:374
- # STACK: Bio::Seq::add_SeqFeature /usr/local/share/perl/5.14.2/Bio/Seq.pm:1148
- # STACK: chado_export.pl:59
- # ----------------------------------------------------------------
-
- # BUT WE CAN'T. >_> rageface.tiff
-
- my %keys;
- foreach my $tag ( $feat->get_all_tags() ) {
- my @values = $feat->get_tag_values($tag);
- if ( $tag eq 'Note' ) {
- $tag = 'note';
- }
- if ( $tag eq 'Dbxref' ) {
- $tag = 'db_xref';
-
- #@values = map { if($_ ne 'GFF_source:Genbank'){ $_ } } @values;
- @values = grep !/GFF_source:Genbank/, @values;
- }
- $keys{$tag} = \@values;
- }
-
- #print $feat->gff_string(),"\n";
- my $new_feat = new Bio::SeqFeature::Generic(
- -start => $feat->start(),
- -end => $feat->end(),
- -strand => $feat->strand(),
- -primary_tag => $feat->primary_tag(),
- -tag => \%keys,
- );
- $seq_obj->add_SeqFeature($new_feat);
- }
-
- return $seq_obj;
- }
-
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::DataSource::Chado
-
-=head1 VERSION
-
-version 1.99.4
-
-=head2 getSeqIO
-
-supposed to get a seqIO object from a chado DB. not fully implemented
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/DataSource/GFF3.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/DataSource/GFF3.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-package CPT::Bio::DataSource::GFF3;
-no warnings;
-use Moose;
-with 'CPT::Bio::DataSource';
-
-
-sub getSeqIO {
- die 'unimplemented';
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::DataSource::GFF3
-
-=head1 VERSION
-
-version 1.99.4
-
-=head2 getSeqIO
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/DataSource/GenBank.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/DataSource/GenBank.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-package CPT::Bio::DataSource::GenBank;
-no warnings;
-use Moose;
-with 'CPT::Bio::DataSource';
-
-has 'file' => ( is => 'rw', isa => 'Str' );
-
-
-sub getSeqIO {
- my ($self) = @_;
- use Bio::SeqIO;
- my $seqio = Bio::SeqIO->new(
- -file => $self->file(),
- -format => 'GenBank',
- );
- my @results;
- return $seqio;
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::DataSource::GenBank
-
-=head1 VERSION
-
-version 1.99.4
-
-=head2 getSeqIO
-
- $gbk_ds->getSeqIO();
-
-supposed to return a genabnk DS. Not tested.
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/Dbxref.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/Dbxref.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-package CPT::Bio::Dbxref;
-use Moose;
-use autodie;
-use YAML;
-use File::ShareDir;
-use File::Spec qw/catfile/;
-
-has 'regex_map' => ( is => 'rw', isa => 'HashRef');
-has 'initialized' => ( is => 'rw', isa => 'Int', default => 0);
-
-sub init {
- my ($self) = @_;
- # Locate file
- my $data_dir = File::ShareDir::dist_dir('libCPT');
- my $dbxref_data = File::Spec->catfile($data_dir, 'dbxref.yaml');
- # Parse
- $self->regex_map(YAML::LoadFile($dbxref_data));
- $self->initialized(1);
-}
-
-sub get_prefix {
- my ($self, $dbxref) = @_;
- if(!$self->initialized()){
- $self->init();
- }
-
- my @hits;
- my %map = %{$self->regex_map()};
- # Search through regex database
- foreach my $db(keys(%map)){
- if(defined($map{$db}{local_id_syntax})){
- my $ref = $map{$db}{local_id_syntax};
- if(ref($ref) eq 'ARRAY'){
- foreach my $regi(@{$ref}){
- if($dbxref =~ /$regi/){
- push(@hits, $map{$db}{abbreviation});
- }
- }
- }
- if($dbxref =~ /$ref/){
- push(@hits, $map{$db}{abbreviation});
- }
- }
- }
- return @hits;
-}
-
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::Dbxref
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/GFF_Parsing.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/GFF_Parsing.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-package CPT::Bio::GFF_Parsing;
-use Moose;
-use autodie;
-
-my $tags = 'allele anticodon artificial_location
-bio_material bound_moiety cell_line cell_type chromosome citation
-clone clone_lib codon_start collected_by collection_date compare
-country cultivar culture_collection db_xref dev_stage direction
-EC_number ecotype environmental_sample estimated_length exception
-experiment focus frequency function gap_type gene gene_synonym
-germline haplogroup haplotype host identified_by inference isolate
-isolation_source lab_host lat_lon linkage_evidence locus_tag
-macronuclear map mating_type mobile_element_type mod_base mol_type
-ncRNA_class note number old_locus_tag operon organelle organism
-partial PCR_conditions PCR_primers phenotype plasmid pop_variant
-product protein_id proviral pseudo rearranged replace
-ribosomal_slippage rpt_family rpt_type rpt_unit_range rpt_unit_seq
-satellite segment serotype serovar sex specimen_voucher
-standard_name strain sub_clone sub_species sub_strain tag_peptide
-tissue_lib tissue_type transgenic translation transl_except
-transl_table trans_splicing variety';
-my %valid_tags = map { $_ => 1 } split( /\s+/, $tags );
-
-my $keys = "-10_signal -35_signal 3'UTR 5'UTR
-CAAT_signal CDS C_region D-loop D_segment GC_signal J_segment LTR
-N_region RBS STS S_region TATA_signal V_region V_segment
-assembly_gap attenuator enhancer exon gap gene iDNA intron mRNA
-mat_peptide misc_RNA misc_binding misc_difference misc_feature
-misc_recomb misc_signal misc_structure mobile_element
-modified_base ncRNA old_sequence operon oriT polyA_signal
-polyA_site precursor_RNA prim_transcript primer_bind promoter
-protein_bind rRNA rep_origin repeat_region sig_peptide source
-stem_loop tRNA terminator tmRNA transit_peptide unsure variation";
-my %valid_keys = map { $_ => 1 } split( /\s+/, $keys );
-
-has 'tag_conv' => ( is => 'ro', isa => 'HashRef', default => sub { return {
- #tags
- 'dbxref' => 'db_xref',
- 'parent' => 'note',
- 'name' => 'label',
- 'Name' => 'label',
- 'old-name' => 'obsolete_name',
- 'id' => 'note',
- 'nat-host' => 'host',
- 'genome' => 'note',
- 'region' => 'source',
-}; });
-
-
-sub fix_gff_tag {
- my ($self, $tag) = @_;
- # Lowercase it
- if(defined $tag && defined(${$self->tag_conv()}{lc($tag)})){
- return ${$self->tag_conv()}{lc($tag)};
- }
- return $tag;
-}
-
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::GFF_Parsing
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/Lipo.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/Lipo.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,85 +0,0 @@
-package CPT::Bio::Lipo;
-use Moose;
-use strict;
-use warnings;
-use Data::Dumper;
-use autodie;
-
-# ABSTRACT: Lipo finding functionality in a library.
-
-sub run_hash {
- my ( $self, $hash_ref ) = @_;
- my %hash = %{$hash_ref};
- my @return_keys;
- foreach ( sort { $a <=> $b } keys %hash ) {
- if ( $hash{$_}[4] !~ qr/^.{10,39}C/ ) {
- next;
- }
- else {
- $hash{$_}[5] = $self->run_seq( $hash{$_}[4] );
- }
- }
- return \%hash;
-}
-
-sub run_seq {
- my ( $self, $seq ) = @_;
- my @C; #A list of each C in the string, in str(10,40)
- for ( my $j = 10 ; $j < length($seq) && $j < 40 ; $j++ ) {
- if ( substr( $seq, $j, 1 ) =~ /[c]/i ) {
- push( @C, $j );
- }
- }
- my @results;
- for ( my $z = 0 ; $z < scalar(@C) ; $z++ ) {
- my $upC10 = "";
-
- #Make sure it's not ALL DEKRs 10 residues upstream. (Does that happen O.o)
- if ( substr( $seq, $C[$z] - 10, 10 ) !~ /[DEKR]/ ) {
- push(
- @results,
- [
- substr( $seq, 0, $C[$z] - 10 ),
- substr( $seq, $C[$z] - 10, 10 ),
- substr( $seq, $C[$z], 1 ),
- substr( $seq, $C[$z] + 1 ),
- ]
- );
- }
- }
- if ( scalar @results ) {
- return \@results;
- }
- return undef;
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::Lipo - Lipo finding functionality in a library.
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/NW_MSA.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/NW_MSA.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,228 +0,0 @@
-package CPT::Bio::NW_MSA;
-use Moose;
-use strict;
-use warnings;
-use autodie;
-use List::Util qw(max);
-
-has 'sequences' => ( is => 'rw', isa => 'ArrayRef');
-# If true, relationships are assumed to be bidirectional
-has 'bidi' => (is => 'rw', isa => 'Bool');
-has 'relationships' => ( is => 'rw', isa => 'HashRef', default => sub { {} });
-#
-has 'verbose' => ( is => 'rw', isa => 'Num');
-#
-has 'gap_penalty' => ( is => 'rw', isa => 'Num');
-has 'match_score' => ( is => 'rw', isa => 'Num');
-has 'mismatch_score' => ( is => 'rw', isa => 'Num');
-
-# local stuff
-#has 'current_list' => ( is => 'rw', isa => 'ArrayRef');
-has 'merger' => (is => 'rw', isa => 'HashRef');
-has 'number_of_aligned_lists' => ( is => 'rw', isa => 'Num', default => sub { 0 });
-
-sub add_relationship {
- my ($self, $from, $to) = @_;
- ${$self->relationships()}{$from}{$to} = 1;
- if($self->bidi()){
- ${$self->relationships()}{$to}{$from} = 1;
- }
-}
-
-sub Sij {
- my($self, $merger_row, $query) = @_;
- # Comparing a query against a merger row
- my @check_against = @{$merger_row};
- #print "Checking " . join(",",@check_against) .":$query\t";
- foreach(@check_against){
- if(${$self->relationships()}{$query}{$_}
- || ${$self->relationships()}{$_}{$query}){
- #print $self->match_score() . "\n";
- return $self->match_score();
- }
- }
- #print $self->mismatch_score() . "\n";
- return $self->mismatch_score();
-}
-
-sub align_list {
- my ($self, $list_ref) = @_;
- # If we haven't aligned any lists, we do something special
- if($self->number_of_aligned_lists() == 0){
- # Pretend we've aligned ONE list already
- $self->number_of_aligned_lists(1);
- my @list = @{$list_ref};
- # Fake the merger
- my %merger;
- for(my $i = 0; $i < scalar @list; $i++){
- $merger{$i} = [$list[$i]];
- }
- $self->merger(\%merger);
- }else{
- $self->find_best_path($self->merger(), $list_ref);
- $self->number_of_aligned_lists($self->number_of_aligned_lists() + 1);
- }
-}
-
-sub find_best_path {
- my ($self, $merger_ref, $list_ref) = @_;
- my %merger = %{$merger_ref};
- my @list = @{$list_ref};
-
- my $max_i = scalar(keys(%merger));
- my $max_j = scalar(@list);
-
- my %score_mat;
- my %point_mat;
-
- # Initial zeros for matrices
- $point_mat{0}{0} = 'DONE';
- $score_mat{0}{0} = 0;
-
- for(my $a = 1; $a <= $max_i; $a++){
- $point_mat{$a}{0} = 'U';
- $score_mat{$a}{0} = $self->gap_penalty();
- }
- for(my $b = 1; $b <= $max_j; $b++){
- $point_mat{0}{$b} = 'L';
- $score_mat{0}{$b} = $self->gap_penalty();
- }
-
- # Score
- for(my $i = 1 ; $i <= $max_i; $i++){
- my $ci = $merger{$i-1};
- for(my $j = 1; $j <= $max_j; $j++){
- my $cj = $list[$j-1];
- # Scoring
- my $diag_score = $score_mat{$i-1}{$j-1} + $self->Sij($ci,$cj);
- my $up_score = $score_mat{$i-1}{$j} + $self->gap_penalty();
- my $left_score = $score_mat{$i}{$j-1} + $self->gap_penalty();
-
- if($diag_score >= $up_score){
- if($diag_score >= $left_score){
- $score_mat{$i}{$j} = $diag_score;
- $point_mat{$i}{$j} = 'D';
- }else{
- $score_mat{$i}{$j} = $left_score;
- $point_mat{$i}{$j} = 'L';
- }
- }else{
- if($up_score >= $left_score){
- $score_mat{$i}{$j} = $up_score;
- $point_mat{$i}{$j} = 'U';
- }else{
- $score_mat{$i}{$j} = $left_score;
- $point_mat{$i}{$j} = 'L';
- }
- }
- }
- }
-
- $self->print2DArray('score_mat', \%score_mat);
- $self->print2DArray('point_mat', \%point_mat);
-
-
- # Calculate merger
- my @new_row_set;
- my $i = $max_i + 0;
- my $j = $max_j + 0;
- while($i != 0 || $j != 0){
- my $dir = $point_mat{$i}{$j};
- my @new_row;
- if($dir eq 'D'){
- push(@new_row, @{$merger{$i-1}}, $list[$j-1]);
- $i--;
- $j--;
- }elsif($dir eq 'L'){
- push(@new_row, split(//, '-' x ($self->number_of_aligned_lists())));
- push(@new_row, $list[$j-1]);
- $j--;
- }elsif($dir eq 'U'){
- push(@new_row, @{$merger{$i-1}}, '-');
- $i--;
- }
- if($self->verbose()){
- print join("\t", $i, $j, $dir, @new_row),"\n";
- }
- push(@new_row_set,\@new_row);
- }
-
- my %new_merger;
- for(my $i = 0; $i < scalar(@new_row_set); $i++){
- $new_merger{$i} = $new_row_set[scalar(@new_row_set) - $i - 1];
- }
- $self->merger(\%new_merger);
-}
-
-sub merged_array{
- my ($self) = @_;
- my %m = %{$self->merger()};
- my @result;
- foreach(sort{$a<=>$b} keys(%m)){
- push(@result, $m{$_});
- }
- return @result;
-}
-
-
-sub print2DArray{
- my($self,$name, $ref) = @_;
- if($self->verbose && defined $ref){
- print '*' x 32,"\n";
- print $name,"\n";
- if(ref $ref eq 'ARRAY'){
- foreach(@{$ref}){
- print join("\t",@{$_}),"\n";
- }
- }elsif(ref $ref eq 'HASH'){
- my %h = %{$ref};
- foreach my $a(sort(keys(%h))){
- if(ref $h{$a} eq 'ARRAY'){
- print join("", map { sprintf "%-4s",$_ } @{$h{$a}});
- }else{
- foreach my $b(sort(keys($h{$a}))){
- if(defined($h{$a}{$b})){
- printf "%5s", $h{$a}{$b};
- }
- }
- }
- print "\n";
- }
- }else{
- die 'Unsupported';
- }
- print '*' x 32,"\n";
- }
-}
-
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::NW_MSA
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/ORF.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/ORF.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,205 +0,0 @@
-package CPT::Bio::ORF;
-use strict;
-use warnings;
-use autodie;
-use Moose;
-
-has min_gene_length => (
- is => 'rw',
- isa => 'Int',
- default => sub {
- 0
- },
-);
-has sc_atg => ( is => 'rw', isa => 'Bool', default => sub { 1 } );
-has sc_ttg => ( is => 'rw', isa => 'Bool', default => sub { 1 } );
-has sc_ctg => ( is => 'rw', isa => 'Bool', default => sub { 0 } );
-has sc_gtg => ( is => 'rw', isa => 'Bool', default => sub { 1 } );
-
-our %code = (
- "TTT" => "F", "TTC" => "F", "TTA" => "L", "TTG" => "L", "TCT" => "S",
- "TCC" => "S", "TCA" => "S", "TCG" => "S", "TAT" => "Y", "TAC" => "Y",
- "TAA" => "*", "TAG" => "*", "TGT" => "C", "TGC" => "C", "TGA" => "*",
- "TGG" => "W", "CTT" => "L", "CTC" => "L", "CTA" => "L", "CTG" => "L",
- "CCT" => "P", "CCC" => "P", "CCA" => "P", "CCG" => "P", "CAT" => "H",
- "CAC" => "H", "CAA" => "Q", "CAG" => "Q", "CGT" => "R", "CGC" => "R",
- "CGA" => "R", "CGG" => "R", "ATT" => "I", "ATC" => "I", "ATA" => "I",
- "ATG" => "M", "ACT" => "T", "ACC" => "T", "ACA" => "T", "ACG" => "T",
- "AAT" => "N", "AAC" => "N", "AAA" => "K", "AAG" => "K", "AGT" => "S",
- "AGC" => "S", "AGA" => "R", "AGG" => "R", "GTT" => "V", "GTC" => "V",
- "GTA" => "V", "GTG" => "V", "GCT" => "A", "GCC" => "A", "GCA" => "A",
- "GCG" => "A", "GAT" => "D", "GAC" => "D", "GAA" => "E", "GAG" => "E",
- "GGT" => "G", "GGC" => "G", "GGA" => "G", "GGG" => "G",
-);
-
-
-
-sub run {
- my ($self, $sequence) = @_;
- # Read through forward strand
- my @putative_starts;
-
- # 30 seconds with a bioperl object
- # 5 seconds with string munging. >:|
- my $dna = uc( $sequence );
- my $length = length($sequence);
-
- # Pre-create the regular expressions
- my ( $regex_forward, $regex_backwards );
- my $not_statement_f = '^';
- my $not_statement_r = '^';
- if ( !$self->sc_atg() ) {
- $not_statement_f .= 'A';
- $not_statement_r .= 'T';
- }
- if ( !$self->sc_ctg() ) {
- $not_statement_f .= 'C';
- $not_statement_r .= 'G';
- }
- if ( !$self->sc_ttg() ) {
- $not_statement_f .= 'T';
- $not_statement_r .= 'A';
- }
- if ( !$self->sc_gtg() ) {
- $not_statement_f .= 'G';
- $not_statement_r .= 'C';
- }
-
- # If any start is acceptable, we re-add them and remove our ^
- if($not_statement_r eq '^' && $not_statement_f eq '^'){
- $not_statement_f = 'ACTG';
- $not_statement_r = 'ACTG';
- }
- $regex_forward = qr/[${not_statement_f}]TG/;
- $regex_backwards = qr/CA[${not_statement_r}]/;
-
- # Collect putative starts
- for ( my $i = 1 ; $i < $length - 1 ; $i++ ) {
- my $tri_nt = substr( $dna, $i - 1, 3 ); #$seq_obj->subseq($i,$i+2);
- if ( $tri_nt =~ $regex_forward ) {
- push( @putative_starts, [ $i, '+' ] );
- }
- if ( $tri_nt =~ $regex_backwards ) {
- push( @putative_starts, [ $i + 2, '-' ] );
- }
- }
- my %ORFs;
-
- #Loop through all of the starts we have
- my $fc = 0;
- my $rc = 0;
- foreach (@putative_starts) {
- my @putative_start = @{$_};
-
- my $final_seq = "";
-
- my $add;
- my $tri_nt;
- if ( $putative_start[1] eq "+" ) {
- my $end;
- for ( my $k = $putative_start[0] ; $k < $length ; $k = $k + 3 )
- {
- my $tri_nt = substr( $dna, $k, 3 );
- my $aa = $code{$tri_nt};
- if ( $aa && $aa ne '*' ) {
- $end = $k + 3;
- $final_seq .= $tri_nt;
- }
- else {
- last;
- }
- }
- if ( length($final_seq)/3 > $self->min_gene_length() ) {
- $ORFs{ 'f_' + $fc++ } = [
- length($final_seq)/3,
- $putative_start[0],
- $end,
- 'F',
- $final_seq
- ];
- }
- } # - strand
- else {
- my $end;
- for ( my $k = $putative_start[0] ; $k >= 2 ; $k = $k - 3 ) {
- my $tmp = reverse( substr( $dna, $k - 3, 3 ) );
-
- $tmp =~ tr/ACTG/qzAC/;
- $tmp =~ tr/qz/TG/;
-
- my $aa = $code{$tmp};
- if ( defined $aa && $aa ne '*' ) {
- $end = $k - 1;
- $final_seq .= $tmp;
- }
- else {
- last;
- }
-
- }
- if ( length($final_seq)/3 > $self->min_gene_length() ) {
- $ORFs{ 'r_' + $rc++ } = [
- length($final_seq)/3,
- $end,
- $putative_start[0],
- 'R',
- $final_seq
- ];
- }
- }
- }
-
- my @orfs;
-
- for my $orf_key ( sort( keys(%ORFs) ) ) {
- my @tmp= @{ $ORFs{$orf_key} };
- my $seqobj = Bio::Seq->new(
- -display_id => sprintf(
- 'orf%05d_%s',
- ($orf_key + 1), $tmp[3],
- ),
- -desc => sprintf(
- '[%s-%s; %s aa long]'
- ,$tmp[1], $tmp[2], $tmp[0]
- ),
- -seq => $tmp[4]
- );
- push(@orfs, $seqobj);
- }
- return @orfs;
-}
-
-
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::ORF
-
-=head1 VERSION
-
-version 1.99.4
-
-=function run
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/RBS.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/RBS.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,59 +0,0 @@
-package CPT::Bio::RBS;
-use Moose;
-use autodie;
-
-has 'algo' => ( is => 'rw', isa => 'Str' );
-has 'predictor' => ( is => 'rw', isa => 'Any' );
-has 'only_best' => ( is => 'rw', isa => 'Bool', default => sub { 0 } );
-
-sub set_algorithm {
- my ( $self, $algorithm ) = @_;
- if ( $algorithm eq 'naive' ) {
- use CPT::Bio::RBS::Algo::Naive;
- my $a = CPT::Bio::RBS::Algo::Naive->new();
- $self->predictor($a);
- }
- else {
- die 'Algorithm not implemented';
- }
-}
-
-# Run the prediction on the sequence
-sub predict {
- my ( $self, $sequence ) = @_;
- return $self->predictor()->predict(
- sequence => lc($sequence),
- return_best => $self->only_best(),
- );
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::RBS
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/RBS/Algo.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/RBS/Algo.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-package CPT::Bio::RBS::Algo;
-use Moose::Role;
-use strict;
-use warnings;
-use autodie;
-
-# Function to predict RBSs in a given sequence
-requires 'predict';
-
-no Moose::Role;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::RBS::Algo
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/RBS/Algo/Naive.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/RBS/Algo/Naive.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,103 +0,0 @@
-package CPT::Bio::RBS::Algo::Naive;
-use Moose;
-with 'CPT::Bio::RBS::Algo';
-use CPT::Bio::RBS_Object;
-
-my @SDs = (
- 'aggaggt',
- 'ggaggt',
- 'aggagg',
- 'aggag',
- 'gaggt',
- 'ggagg',
- 'aggt',
- 'gggt',
- 'gagg',
- 'gggg',
- 'agga',
- 'ggag',
- 'gga',
- 'gag',
- 'agg',
- 'ggt',
-);
-
-sub predict {
- my ( $self, %params ) = @_;
- my $upstream = $params{sequence};
- my $only_best = $params{return_best};
-
- my $length = length($upstream);
-
- my @results = ();
- foreach my $rbs ( @SDs ){
- while ($upstream =~ /$rbs/g) {
- # Position of regex match
- my $loc = $-[0];
- # Seq before RBS
- my $before = substr($upstream,0, $loc);
- # Seq after RBS
- my $after = substr($upstream, $loc + length($rbs));
- my $rbs_o = CPT::Bio::RBS_Object->new(
- upstream => sprintf('%s %s %s', $before , uc($rbs) , $after),
- score => $self->score_match($rbs, length($after)),
- rbs_seq => uc($rbs),
- separation => length($after),
- );
- push( @results, $rbs_o );
- }
- }
- @results = sort { $b->score() <=> $a->score() } @results;
- if (@results) {
- if($only_best){
- return ($results[0]);
- }else{
- return @results;
- }
- }
- else {
- return (
- CPT::Bio::RBS_Object->new(
- upstream =>$upstream,
- score => '-1',
- rbs_seq => 'None',
- separation => -1,
- )
- );
- }
-}
-
-sub score_match {
- my ($self, $match, $dist) = @_;
- return length($match);
-}
-
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::RBS::Algo::Naive
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/RBS_Object.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/RBS_Object.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-package CPT::Bio::RBS_Object;
-use Moose;
-use autodie;
-
-has 'upstream' => ( is => 'rw', isa => 'Str' );
-has 'score' => (is => 'rw', isa => 'Int');
-has 'rbs_seq' => (is => 'rw', isa => 'Str');
-has 'separation' => (is => 'rw', isa => 'Int');
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::RBS_Object
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Bio/SAR.pm
--- a/cpt_psm_recombine/lib/CPT/Bio/SAR.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,76 +0,0 @@
-package CPT::Bio::SAR;
-use strict;
-use warnings;
-use autodie;
-use Moose;
-
-sub filter_sar {
- my ($self, @seqs) = @_;
- my @good;
- foreach(@seqs){
- if(has_sar_motif($_)){
- push(@good, $_);
- }
- }
- return @good;
-}
-
-sub has_sar_motif {
- my ( $self, $seq ) = @_;
-
- return 0 if(length $seq < 40);
-
- my $reg_a = qr/([^DEKR]{3}K[^DEKR]{8,}[^DER]{1}[^DEKR]{3})/;
- my $reg_b = qr/([KR]{1,}[^DEKR]{12,}[^DER]{1}[^DEKR]{3})/;
-
- my $first40 = substr( $seq, 0, 40 );
-
- # there is a transmembrane domain in the first 40 AAs
- # there is at least one positive charged AAs in front of the TMD
- if ( $first40 =~ $reg_a || $first40 =~ $reg_b ) {
- my $modi1st40 = $first40;
- # Cut out the match, and then add the whole thing to the end.
- my $t4homology =
- #substr($seq,0, $-[0] ), # Before the match
- #substr($seq, $-[0], ($+[0] - $-[0])), # the match
- substr($seq, $+[0]). # After the match
- $first40;
- $t4homology = substr($t4homology, 0 , 40);
-
- if ( $t4homology =~ qr/E[A-Z]{8}[DC][A-Z]{4,5}T/ ) {
- return 1;
- }
- }
- return 0;
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Bio::SAR
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/BioData.pm
--- a/cpt_psm_recombine/lib/CPT/BioData.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,246 +0,0 @@
-package CPT::BioData;
-use Moose;
-use strict;
-use warnings;
-use autodie;
-
-has 'dummy_var' => (isa => 'Str', is => 'ro');
-
-my %genbank_feature_tags = (
- "locus_tag" => 1,
- "gene" => 1,
- "product" => 1,
- "allele" => 1,
- "anticodon" => 1,
- "artificial_location" => 1,
- "bio_material" => 1,
- "bound_moiety" => 1,
- "cell_line" => 1,
- "cell_type" => 1,
- "chromosome" => 1,
- "citation" => 1,
- "clone" => 1,
- "clone_lib" => 1,
- "codon_start" => 1,
- "collected_by" => 1,
- "collection_date" => 1,
- "compare" => 1,
- "country" => 1,
- "cultivar" => 1,
- "culture_collection" => 1,
- "db_xref" => 1,
- "dev_stage" => 1,
- "direction" => 1,
- "EC_number" => 1,
- "ecotype" => 1,
- "environmental_sample" => 1,
- "estimated_length" => 1,
- "exception" => 1,
- "experiment" => 1,
- "focus" => 1,
- "frequency" => 1,
- "function" => 1,
- "gap_type" => 1,
- "gene_synonym" => 1,
- "germline" => 1,
- "haplogroup" => 1,
- "haplotype" => 1,
- "host" => 1,
- "identified_by" => 1,
- "inference" => 1,
- "isolate" => 1,
- "isolation_source" => 1,
- "lab_host" => 1,
- "lat_lon" => 1,
- "linkage_evidence" => 1,
- "macronuclear" => 1,
- "map" => 1,
- "mating_type" => 1,
- "mobile_element_type" => 1,
- "mod_base" => 1,
- "mol_type" => 1,
- "ncRNA_class" => 1,
- "note" => 1,
- "number" => 1,
- "old_locus_tag" => 1,
- "operon" => 1,
- "organelle" => 1,
- "organism" => 1,
- "partial" => 1,
- "PCR_conditions" => 1,
- "PCR_primers" => 1,
- "phenotype" => 1,
- "plasmid" => 1,
- "pop_variant" => 1,
- "protein_id" => 1,
- "proviral" => 1,
- "pseudo" => 1,
- "rearranged" => 1,
- "replace" => 1,
- "ribosomal_slippage" => 1,
- "rpt_family" => 1,
- "rpt_type" => 1,
- "rpt_unit_range" => 1,
- "rpt_unit_seq" => 1,
- "satellite" => 1,
- "segment" => 1,
- "serotype" => 1,
- "serovar" => 1,
- "sex" => 1,
- "specimen_voucher" => 1,
- "standard_name" => 1,
- "strain" => 1,
- "sub_clone" => 1,
- "sub_species" => 1,
- "sub_strain" => 1,
- "tag_peptide" => 1,
- "tissue_lib" => 1,
- "tissue_type" => 1,
- "transgenic" => 1,
- "translation" => 1,
- "transl_except" => 1,
- "transl_table" => 1,
- "trans_splicing" => 1,
- "variety" => 1,
-);
-my %artemis_colours = (
- 0 => 'rgb(255,255,255)',
- 1 => 'rgb(100,100,100)',
- 2 => 'rgb(255,0,0)',
- 3 => 'rgb(0,255,0)',
- 4 => 'rgb(0,0,255)',
- 5 => 'rgb(0,255,255)',
- 6 => 'rgb(255,0,255)',
- 7 => 'rgb(255,255,0)',
- 8 => 'rgb(152,251,152)',
- 9 => 'rgb(135,206,250)',
- 10 => 'rgb(255,165,0)',
- 11 => 'rgb(200,150,100)',
- 12 => 'rgb(255,200,200)',
- 13 => 'rgb(170,170,170)',
- 14 => 'rgb(0,0,0)',
- 15 => 'rgb(255,63,63)',
- 16 => 'rgb(255,127,127)',
- 17 => 'rgb(255,191,191)',
-);
-
-sub artemis_colour_decode{
- my ($self, $idx) = @_;
- return $artemis_colours{$idx};
-}
-
-my %table321 = (
- 'Gly' => 'G', 'Pro' => 'P',
- 'Ala' => 'A',
- 'Val' => 'V',
- 'Leu' => 'L',
- 'Ile' => 'I',
- 'Met' => 'M',
- 'Cys' => 'C',
- 'Phe' => 'F',
- 'Tyr' => 'Y',
- 'Trp' => 'W',
- 'His' => 'H',
- 'Lys' => 'K',
- 'Arg' => 'R',
- 'Gln' => 'Q',
- 'Asn' => 'N',
- 'Glu' => 'E',
- 'Asp' => 'D',
- 'Ser' => 'S',
- 'Thr' => 'T',
- 'XXX' => 'X',
- 'End' => '*',
- 'Stop' => '*'
-);
-
-sub decode321{
- my ($self, $three) = @_;
- return $table321{$three};
-}
-
-
-sub get321Table {
- my ($self) = @_;
- return \%table321;
-}
-
-
-sub getTranslationTable {
- my ($self, $table_id) = @_;
- require Bio::Tools::CodonTable;
- my $table = Bio::Tools::CodonTable->new( -id => (defined $table_id? $table_id: 1) );
- my %result;
- my @codons = qw(A C T G);
- foreach my $i (@codons) {
- foreach my $j (@codons) {
- foreach my $k (@codons) {
- $result{"$i$j$k"} = $table->translate("$i$j$k");
- }
- }
- }
- if(defined($table_id) && $table_id == 11){
- $result{TGA} = '*';
- $result{TAA} = '#';
- $result{TAG} = '+';
- }
- return \%result;
-}
-
-
-
-sub isValidTag {
- my ( $self, $tag ) = @_;
- return $genbank_feature_tags{$tag};
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::BioData
-
-=head1 VERSION
-
-version 1.99.4
-
-=head2 get321Table
-
- $bio->get321Table();
-
-Convenience function which returns a codon translation table (3 letter ID to 1 letter code)
-
-=head2 getTranslationTable
-
- $bio->getTranslationTable();
-
-Convenience function which returns a hash translated according to Bio::Tools::CodonTable
-
-This is done for speed reasons. CodonTable is very slow and we require better performance
-
-=head2 isValidTag
-
- if($cptbio->isValidTag('locus_tag')) { ... }
-
-Will validate a GBK feature tag
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/CLI.pm
--- a/cpt_psm_recombine/lib/CPT/CLI.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-package CPT::CLI;
-use strict;
-use warnings;
-
-our $VERSION = '1.00';
-
-=head1 NAME
-
-CPT::CLI - a stub library to make Dist::Zilla happy
-
-=head1 SYNOPSIS
-
- use CPT::CLI;
-
-=head1 FUNCTIONAL INTERFACE
-
-=head2 new
-
- my $libCPT = CPT::CLI->new();
-
-=cut
-
-sub new {
- my ( $class, %options ) = @_;
- my $self = bless( {}, $class );
- return $self;
-}
-
-1;
-__END__
-
-=head1 AUTHOR
-
-Eric Rasche, rasche.eric@yandex.ru
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright (C) 2013 by Eric Rasche
-
-This library is free software; you can redistribute it and/or modify
-it under the same terms as Perl itself, either Perl version 5.14.2 or,
-at your option, any later version of Perl 5 you may have available.
-
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Chado/GMOD_Conf.pm
--- a/cpt_psm_recombine/lib/CPT/Chado/GMOD_Conf.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-package CPT::Chado::GMOD_Conf;
-use Moose;
-
-has 'database_identifier' => ( is => 'rw', isa => 'Str' );
-has 'gmod_root' => ( is => 'rw', isa => 'Str', default => '/usr/share/gmod/' );
-has 'database' => ( is => 'rw', isa => 'Str' );
-has 'username' => ( is => 'rw', isa => 'Str' );
-has 'password' => ( is => 'rw', isa => 'Str' );
-has 'host' => ( is => 'rw', isa => 'Str', default => 'localhost' );
-has 'port' => ( is => 'rw', isa => 'Int', default => 5432 );
-
-sub load_config {
- my ( $self, $identifier ) = @_;
- open( my $db_info, '<',
- $self->gmod_root() . "conf/${identifier}.conf" );
- while (<$db_info>) {
- chomp $_;
- if ( $_ =~ /DBUSER=/ ) {
- $self->username( substr( $_, 7 ) );
- }
- elsif ( $_ =~ /DBPASS=/ ) {
- $self->password( substr( $_, 7 ) );
- }
- elsif ( $_ =~ /DBNAME=/ ) {
- $self->database( substr( $_, 7 ) );
- }
- }
- close($db_info);
-}
-
-sub get_connector {
- my ( $self, $identifier ) = @_;
- $self->load_config($identifier);
- use DBI;
- my $dbh = DBI->connect(
- sprintf(
- "dbi:Pg:dbname=%s;host=%s;port=%s;",
- $self->database(), $self->host(), $self->port()
- ),
- $self->username(),
- $self->password()
- );
- return $dbh;
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Chado::GMOD_Conf
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Circos/Conf.pm
--- a/cpt_psm_recombine/lib/CPT/Circos/Conf.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,90 +0,0 @@
-package CPT::Circos::Conf;
-use strict;
-use warnings;
-use autodie;
-use Moose;
-
-# This parameter specifies that we should behave according to galaxy_override spec.
-has data => (
- is => 'rw',
- isa => 'ArrayRef',
- default => sub {
- []
- },
-);
-my @current_block = ();
-
-sub set {
- my ($self, $key, $value) = @_;
- $self->push_d(sprintf('%s = %s', $key, $value));
-}
-
-sub start_block {
- my ($self, $block) = @_;
- $self->push_d(sprintf('<%s>', $block));
- push(@current_block, $block);
-}
-
-sub end_block {
- my ($self) = @_;
- $self->push_d(sprintf('%s>', pop @current_block));
-}
-
-sub include {
- my ($self, $file) = @_;
- $self->push_d(sprintf('<>', $file));
-}
-
-sub push_d {
- my ($self, $string) = @_;
- push(@{$self->data()}, $self->spaces_for_indent() . $string);
-}
-
-sub spaces_for_indent {
- my ($self) = @_;
- if(scalar(@current_block) > 0){
- return ' ' x scalar(@current_block);
- }
- return '';
-}
-
-sub finalize {
- my ($self) = @_;
- if(scalar @current_block > 0){
- die 'Blocks [' . join(',', @current_block) . '] were not closed';
- }
- else{
- return join("\n", @{$self->data()});
- }
-}
-
-no Moose::Role;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Circos::Conf
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/External.pm
--- a/cpt_psm_recombine/lib/CPT/External.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-package CPT::External;
-use Moose::Role;
-use strict;
-use warnings;
-use autodie;
-
-requires 'analyze';
-requires 'cleanup';
-
-no Moose::Role;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::External
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/External/LipoP.pm
--- a/cpt_psm_recombine/lib/CPT/External/LipoP.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-package CPT::External::LipoP;
-no warnings;
-use Moose;
-use File::Temp qw(tempfile);
-use IPC::Run qw(run);
-
-has 'sequence' => ( is => 'rw', isa => 'Str' );
-has 'response' => ( is => 'rw', isa => 'Str' );
-has 'cleavage' => ( is => 'rw', isa => 'ArrayRef' );
-has 'tmpfile' => (is => 'rw', isa => 'Any');
-
-sub create_fasta_file {
- my ( $self, $seq ) = @_;
- my ( $fh, $filename ) = tempfile( "cpt.lipop.XXXXXXX", UNLINK => 1 );
-
- #printf $fh ">%s\n%s\n", 'seq', $seq;
- printf $fh "%s\n", $seq;
- close($fh);
- return $filename;
-}
-
-sub parse_text {
- my ($self) = @_;
-
- foreach my $line ( split( /\n/, $self->response() ) ) {
- if ( $line =~ /#.*cleavage=(\d+)-(\d+)/ ) {
- $self->cleavage( [ $1, $2 ] );
- }
- }
- # # seq SpII score=17.8897 margin=13.48964 cleavage=19-20 Pos+2=K
- # # Cut-off=-3
- # seq LipoP1.0:Best SpII 1 1 17.8897
- # seq LipoP1.0:Margin SpII 1 1 13.48964
- # seq LipoP1.0:Class SpI 1 1 4.40006
- # seq LipoP1.0:Class CYT 1 1 -0.200913
- # seq LipoP1.0:Signal CleavII 19 20 17.8897 # LVVSA|CKSPP Pos+2=K
- # seq LipoP1.0:Signal CleavI 28 29 4.03409 # PPVQS|QRPEP
- # seq LipoP1.0:Signal CleavI 21 22 2.04071 # VSACK|SPPPV
- # seq LipoP1.0:Signal CleavI 27 28 -2.07502 # PPPVQ|SQRPE
- # seq LipoP1.0:Signal CleavI 19 20 -2.6907 # LVVSA|CKSPP
- # seq LipoP1.0:Signal CleavI 22 23 -2.87659 # SACKS|PPPVQ
-}
-
-sub analyze {
- my ( $self, $seq ) = @_;
- $self->sequence($seq);
- $self->tmpfile($self->create_fasta_file($seq));
- my @cmd = ( 'LipoP', $self->tmpfile(), '-workdir', '/tmp/nonexistant/', '-wwwdir', '/tmp/nonexistant' );
- my ( $in, $out, $err );
- run \@cmd, \$in, \$out, \$err;
- if ($err) { print "Error: $err\n"; }
- $self->response($out);
- $self->parse_text();
- unlink($self->tmpfile());
-}
-
-sub cleanup {
- my ($self) = @_;
- return;
-}
-
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::External::LipoP
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/External/TMHMM.pm
--- a/cpt_psm_recombine/lib/CPT/External/TMHMM.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,164 +0,0 @@
-package CPT::External::TMHMM;
-no warnings;
-use Moose;
-use File::Temp qw(tempfile);
-use IPC::Run qw(run);
-use File::Temp qw(tempdir);
-use File::Copy qw(move);
-
-has 'sequence' => ( is => 'rw', isa => 'Str' );
-has 'response' => ( is => 'rw', isa => 'Str' );
-
-has 'num_predicted' => ( is => 'rw', isa => 'Int' );
-has 'predicted_locations' => ( is => 'rw', isa => 'ArrayRef' );
-has 'prob_n_in' => ( is => 'rw', isa => 'Str' );
-has 'hash' => ( is => 'rw', isa => 'Str' );
-has 'picture_location' => ( is => 'rw', isa => 'Str' );
-
-my ( $fh, $filename, $image );
-
-sub create_fasta_file {
- my ( $self, $seq ) = @_;
- ( $fh, $filename ) = tempfile( "cpt.tmhmm.XXXXXXX", UNLINK => 1 );
-
- #printf $fh ">%s\n%s\n", 'seq', $seq;
- printf $fh "%s\n", $seq;
- return $filename;
-}
-
-sub parse_text {
- my ($self) = @_;
- unless ( $self->predicted_locations() ) {
- $self->predicted_locations( [] );
- }
- unless ( $self->num_predicted() ) {
- $self->num_predicted(0);
- }
- foreach my $line ( split( /\n/, $self->response() ) ) {
- if ( $line =~ /Number of predicted TMHs:\s*(\d+)/ ) {
- $self->num_predicted($1);
- }
- elsif ( $line =~ /Total prob of N-in:\s*([0-9.]*)/ ) {
- $self->prob_n_in($1);
- }
- elsif ( $line =~ /TMHMM2.0\s*TMhelix\s*([0-9]+)\s*([0-9]+)/ ) {
-
- #$self->num_predicted($self->num_predicted()+1);
- push( @{ $self->predicted_locations() }, [ $1, $2 ] );
- }
- }
-
- # # seq Length: 145
- # # seq Number of predicted TMHs: 1
- # # seq Exp number of AAs in TMHs: 20.91302
- # # seq Exp number, first 60 AAs: 20.91265
- # # seq Total prob of N-in: 0.04659
- # # seq POSSIBLE N-term signal sequence
- # seq TMHMM2.0 outside 1 3
- # seq TMHMM2.0 TMhelix 4 23
- # seq TMHMM2.0 inside 24 145
-}
-
-sub analyze {
- my ( $self, $seq ) = @_;
-
- # Set our hash
- require Digest::MD5;
- $self->hash( Digest::MD5::md5_hex($seq) );
-
- # Set our sequence
- $self->sequence($seq);
-
- # Tmp dir to run in
- my $dir = tempdir( CLEANUP => 1 );
- my $tmpfile = $self->create_fasta_file($seq);
-
- # Plot and use specified workdir
- my @cmd = ( 'tmhmm.pl', '-plot', '-workdir', $dir, '<', $tmpfile );
-
- # Run the command
- my ( $in, $out, $err );
- run \@cmd, \$in, \$out, \$err;
-
- # If error,we error
- if ($err) {
-
- # print STDERR "Error: $err\n";
- # Kinda a crappy way to handle this...
- $self->response($err);
- return 0;
- }
- {
-
- # Move the created plot to a known location
- my @tmhmm_files = glob("$dir/*/*.png");
-
- # There's really only one png but this is just as easy to write
- for my $png (@tmhmm_files) {
- $image = sprintf( "/tmp/cpt.ext.tmhmm.%s.png", $self->hash() );
- move( $png, $image );
- }
-
- # Module to remove the temporary dir so we clean up after
- # ourselves quickly, since this programme seems to open a LARGE
- # number of file handles.
- require File::Path;
- File::Path::remove_tree($dir);
-
- # set response and parse it, then return OK.
- $self->response($out);
- $self->parse_text();
- return 1;
- }
-}
-
-sub cleanup {
- if ( defined($fh) ) {
- close($fh);
- unlink($filename);
- if ( -e $image ) {
- unlink($image);
- }
- }
-}
-
-END {
- if ( defined($fh) ) {
- close($fh);
- unlink($filename);
- if ( -e $image ) {
- unlink($image);
- }
- }
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::External::TMHMM
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Filetype.pm
--- a/cpt_psm_recombine/lib/CPT/Filetype.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-package CPT::Filetype;
-use Moose::Role;
-use strict;
-use warnings;
-use autodie;
-
-has lines => ( is => 'rw', isa => 'ArrayRef');
-# Also have file location if we need to open a filehandle on it to further
-# check.
-has file => ( is => 'rw', isa => 'Str');
-
-
-requires 'score';
-requires 'name';
-
-no Moose::Role;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Filetype
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 score
-
-Score should be a method that returns a number between 0 and 1 describing the
-probability that it is this format, with 1 indicating that it truly is this
-file to the exclusion of every other type.
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Filetype/embl.pm
--- a/cpt_psm_recombine/lib/CPT/Filetype/embl.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-package CPT::Filetype::embl;
-no warnings;
-use Moose;
-with 'CPT::Filetype';
-
-sub score {
- my ($self) = @_;
- my $first_line = ${$self->lines()}[0];
- my @embl_identifiers = (
- 'FT', 'FH', 'SQ', 'DE', 'AC', 'PA', 'SV', 'DT', 'KW',
- 'OS', 'OC', 'OX', 'R ', '' , 'DR', 'CC', 'CO', 'XX',
- );
- my %embl_id_map = map { $_ => 1 } @embl_identifiers;
- my $embl_score = 0;
- foreach(@{$self->lines()}){
- $embl_score++ if($embl_id_map{substr($_,0,2)});
- }
- return $embl_score/10;
-}
-
-sub name {
- return 'embl';
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Filetype::embl
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Filetype/fasta.pm
--- a/cpt_psm_recombine/lib/CPT/Filetype/fasta.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-package CPT::Filetype::fasta;
-no warnings;
-use Moose;
-with 'CPT::Filetype';
-
-sub score {
- my ($self) = @_;
- my @l = @{$self->lines()};
-
- for(my $i = 0; $i < scalar(@l) - 1; $i++){
- if(substr($l[$i],0,1) eq '>' && $l[$i+1] =~ /[A-Za-z]/){
- return 1;
- }
- }
- return 0;
-}
-
-sub name {
- return 'fasta';
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Filetype::fasta
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Filetype/gbk.pm
--- a/cpt_psm_recombine/lib/CPT/Filetype/gbk.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-package CPT::Filetype::gbk;
-no warnings;
-use Moose;
-with 'CPT::Filetype';
-
-sub score {
- my ($self) = @_;
- my $first_line = ${$self->lines()}[0];
- return $first_line =~ '^LOCUS';
-}
-
-sub name {
- return 'genbank';
-}
-
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Filetype::gbk
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Filetype/gff3.pm
--- a/cpt_psm_recombine/lib/CPT/Filetype/gff3.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-package CPT::Filetype::gff3;
-no warnings;
-use Moose;
-with 'CPT::Filetype';
-
-sub score {
- my ($self) = @_;
- my $first_line = ${$self->lines()}[0];
- return $first_line =~ '##gff-version 3';
-}
-
-sub name {
- return 'gff';
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Filetype::gff3
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/FiletypeDetector.pm
--- a/cpt_psm_recombine/lib/CPT/FiletypeDetector.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,117 +0,0 @@
-package CPT::FiletypeDetector;
-use Moose;
-use strict;
-use warnings;
-use Data::Dumper;
-use autodie;
-
-# ABSTRACT: an incredibly basic filetype detection library for genomic data
-
-
-sub head {
- my ($self, $filename) = @_;
- # We're only going to focus on detecting a few types
- open( my $file, '<', $filename );
- my @lines;
- my $c = 0;
- while (<$file>) {
- # Read ten lines
- if ( $c++ < 10 ) {
- chomp $_;
- push( @lines, $_ );
- }
- # Then exit
- else {
- last;
- }
- }
- close($file);
- return @lines;
-}
-
-sub detect {
- my ( $self, $filename ) = @_;
-
- my @lines = $self->head($filename);
-
- use CPT::Filetype::gff3;
- use CPT::Filetype::gbk;
- use CPT::Filetype::embl;
- use CPT::Filetype::fasta;
-
- my @scorers = (
- CPT::Filetype::gff3->new(lines => \@lines, file => $filename),
- CPT::Filetype::gbk->new(lines => \@lines, file => $filename),
- CPT::Filetype::embl->new(lines => \@lines, file => $filename),
- CPT::Filetype::fasta->new(lines => \@lines, file => $filename),
- );
-
- my $best_score = 0;
- my $best_name = "";
- foreach(@scorers){
- my $score = $_->score();
- # "1 indicating ... to the exclusion [of others]
- if($score == 1){
- return $_->name();
- }
-
- # Otherwise check if better
- if($score > $best_score){
- $best_name = $_->name();
- }
- }
-
- return $best_name;
-
- # if(defined $string){
- # return 'fasta' if( $string =~ /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i);
- # return 'genbank' if( $string =~ /\.(gb|gbank|genbank|gbk)$/i);
- # return 'scf' if( $string =~ /\.scf$/i);
- # return 'pir' if( $string =~ /\.pir$/i);
- # return 'embl' if( $string =~ /\.(embl|ebl|emb|dat)$/i);
- # return 'raw' if( $string =~ /\.(txt)$/i);
- # return 'gcg' if( $string =~ /\.gcg$/i);
- # return 'ace' if( $string =~ /\.ace$/i);
- # return 'bsml' if( $string =~ /\.(bsm|bsml)$/i);
- # return 'swiss' if( $string =~ /\.(swiss|sp)$/i);
- # return 'phd' if( $string =~ /\.(phd|phred)$/i);
- # return 'gff' if( $string =~ /\.(gff|gff3)$/i);
- # return 'blastxml' if( $string =~ /\.(xml)$/i);
- # die "File type detection failure";
- # }
- # else{
- # die "File type detection failure";
- # }
-
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::FiletypeDetector - an incredibly basic filetype detection library for genomic data
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/GBK2GFF3.pm
--- a/cpt_psm_recombine/lib/CPT/GBK2GFF3.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,376 +0,0 @@
-package CPT::GBK2GFF3;
-use strict;
-use warnings;
-use CPT;
-use Data::Dumper;
-use autodie;
-use Bio::SeqIO;
-use Moose;
-
-has data => (
- is => 'rw',
- isa => 'ArrayRef',
- default => sub { [] },
-);
-has header => (
- is => 'rw',
- isa => 'ArrayRef',
-);
-has sequence => (
- is => 'rw',
- isa => 'Str',
-);
-has seqid => (
- is => 'rw',
- isa => 'Str',
-);
-has genbank => (
- is => 'rw',
- isa => 'Str',
-);
-has is_circular => (
- is => 'rw',
- isa => 'Str',
-);
-has id_prefix => ( is => 'rw', isa => 'Str' );
-has global_feat_idx => ( is => 'rw', isa => 'Int', default => sub { 0 } );
-has source => ( is => 'rw', isa => 'ArrayRef');
-
-my %feat_type_count;
-my %reserved_keys = map { $_ => 1 }
- qw/ID Name Alias Parent Target Gap Derives_from Note Dbxref Ontology_term/;
-
-use CPT::Bio;
-my $bio = CPT::Bio->new();
-my %key_mapping = (
- "-" => [ "located_sequence_feature", "SO:0000110" ],
- "-10_signal" => [ "minus_10_signal", "SO:0000175" ],
- "-35_signal" => [ "minus_35_signal", "SO:0000176" ],
- "3'UTR" => [ "three_prime_UTR", "SO:0000205" ],
- "3'clip" => [ "three_prime_clip", "SO:0000557" ],
- "5'UTR" => [ "five_prime_UTR", "SO:0000204" ],
- "5'clip" => [ "five_prime_clip", "SO:0000555" ],
- "CAAT_signal" => [ "CAAT_signal", "SO:0000172" ],
- "CDS" => [ "CDS", "SO:0000316" ],
- "D-loop" => [ "D_loop", "SO:0000297" ],
- "D_segment" => [ "D_gene", "SO:0000458" ],
- "GC_signal" => [ "GC_rich_region", "SO:0000173" ],
- "LTR" => [ "long_terminal_repeat", "SO:0000286" ],
- "RBS" => [ "ribosome_entry_site", "SO:0000139" ],
- "STS" => [ "STS", "SO:0000331" ],
- "TATA_signal" => [ "TATA_box", "SO:0000174" ],
- "attenuator" => [ "attenuator", "SO:0000140" ],
- "enhancer" => [ "enhancer", "SO:0000165" ],
- "exon" => [ "exon", "SO:0000147" ],
- "gap" => [ "gap", "SO:0000730" ],
- "gene" => [ "gene", "SO:0000704" ],
- "iDNA" => [ "iDNA", "SO:0000723" ],
- "intron" => [ "intron", "SO:0000188" ],
- "mRNA" => [ "mRNA", "SO:0000234" ],
- "mat_peptide" => [ "mature_protein_region", "SO:0000419" ],
- "misc_RNA" => [ "transcript", "SO:0000673" ],
- "misc_binding" => [ "binding_site", "SO:0000409" ],
- "misc_difference" => [ "sequence_difference", "SO:0000413" ],
- "misc_feature" => [ "region", "SO:0000001" ],
- "misc_recomb" => [ "recombination_feature", "SO:0000298" ],
- "misc_signal" => [ "regulatory_region", "SO:0005836" ],
- "misc_structure" => [ "sequence_secondary_structure", "SO:0000002" ],
- "modified_base" => [ "modified_DNA_base", "SO:0000305" ],
- "operon" => [ "operon", "SO:0000178" ],
- "oriT" => [ "origin_of_transfer", "SO:0000724" ],
- "polyA_signal" => [ "polyA_signal_sequence", "SO:0000551" ],
- "polyA_site" => [ "polyA_site", "SO:0000553" ],
- "precursor_RNA" => [ "primary_transcript", "SO:0000185" ],
- "prim_transcript" => [ "primary_transcript", "SO:0000185" ],
- "primer_bind" => [ "primer_binding_site", "SO:0005850" ],
- "promoter" => [ "promoter", "SO:0000167" ],
- "protein_bind" => [ "protein_binding_site", "SO:0000410" ],
- "rRNA" => [ "rRNA", "SO:0000252" ],
- "repeat_region" => [ "repeat_region", "SO:0000657" ],
- "repeat_unit" => [ "repeat_unit", "SO:0000726" ],
- "satellite" => [ "satellite_DNA", "SO:0000005" ],
- "scRNA" => [ "scRNA", "SO:0000013" ],
- "sig_peptide" => [ "signal_peptide", "SO:0000418" ],
- "snRNA" => [ "snRNA", "SO:0000274" ],
- "snoRNA" => [ "snoRNA", "SO:0000275" ],
- "source" => [ "contig", "SO:0000149" ], # manually modified
- "stem_loop" => [ "stem_loop", "SO:0000313" ],
- "tRNA" => [ "tRNA", "SO:0000253" ],
- "terminator" => [ "terminator", "SO:0000141" ],
- "transit_peptide" => [ "transit_peptide", "SO:0000725" ],
- "variation" => [ "sequence_variant", "SO:0000109" ],
-);
-
-
-sub get_gff3_file {
- my ($self) = @_;
- my @output;
- for my $header_line ( @{ $self->header() } ) {
- push( @output, $header_line );
- }
- push( @output, join( "\t", @{ $self->get_source } ) );
- for my $data_line ( @{ $self->data() }) {
- push( @output, join( "\t", @{$data_line} ) );
- }
- push( @output, '##FASTA' );
- push( @output, '>' . $self->seqid() );
- my $seq = $self->sequence();
- $seq =~ s/(.{80})/$1\n/g;
- push( @output, $seq );
- return join( "\n", @output );
-}
-
-sub escape {
- my ($self, $str) = @_;
- $str =~ s/,/%2C/g;
- $str =~ s/=/%3D/g;
- $str =~ s/;/%3B/g;
- $str =~ s/\t/%09/g;
- return $str;
-}
-
-sub FT_SO_map {
- my ( $self, $key ) = @_;
- if ( $key_mapping{$key} ) {
- my @result = @{ $key_mapping{$key} };
- return $result[0];
- }
- else {
- return 'region';
- }
-}
-
-sub source_map {
- my ( $self, $type ) = @_;
- if ( $self->{'override_source'} ) {
- return $self->{'override_source'};
- }
- if ( $self->{'annotation_software'}{$type} ) {
- return $self->{'annotation_software'}{$type};
- }
- else {
- return '.';
- }
-}
-
-sub get_attrs {
- my ( $self, %data ) = @_;
- my $feature = $data{'feat'};
- my $parents_ref = $data{'parents'};
-
- my %attrs = ();
- $attrs{'ID'} =
- $self->id_prefix() . '.' . ($self->global_feat_idx());
-
- $self->global_feat_idx($self->global_feat_idx()+1);
-
- # Handle Identifier
- my $identifier = $bio->_getIdentifier($feature);
- if ( $identifier ne 'ERROR' ) {
- $attrs{'Name'} = $identifier . '.' . $feature->primary_tag;
- }
-
- # Handle parents, if there are any
- if ($parents_ref) {
- if ( ref($parents_ref) eq 'ARRAY' ) {
- $attrs{'Parent'} = $parents_ref;
- }
- else {
- $attrs{'Parent'} = [$parents_ref];
- }
- }
-
- # These are otherwise "Special" keys that need to be handled differently.
- if ( $feature->has_tag('note') ) {
- my @notes = $feature->get_tag_values('note');
- $attrs{'Note'} = \@notes;
- }
- if ( $feature->has_tag('db_xref') ) {
- my @dbxref = $feature->get_tag_values('db_xref');
- $attrs{'Dbxref'} = \@dbxref;
- }
-
- # Do the rest
- for my $tag ( $feature->get_all_tags() ) {
-
- # If not one of the specially handled ones
- if ( $tag ne 'name' && $tag ne 'note' && $tag ne 'db_xref' ) {
-
- # If not a reserved_key
- if ( !$reserved_keys{$tag} ) {
- my @vals = $feature->get_tag_values($tag);
- $attrs{lc($tag)} = \@vals;
- }
- else {
- warn
-"Trying to set a reserved key $tag with value $attrs{$tag}";
- }
- }
- }
- my %response = (
- id => $attrs{'ID'},
- attr_str => $self->post_process_attribute_string(%attrs),
- );
- return %response;
-}
-
-sub post_process_attribute_string {
- my ($self,%attrs) = @_;
- my @parts = ();
- for my $k ( keys %attrs ) {
-
- # IGNORED TAGS
- if ( $k ne 'translation' && $k ne 'product' ) {
- my $joined =
- $self->escape_and_join_attribute_subpart( $attrs{$k} );
- push @parts, "$k=$joined";
- }
- }
-
- #print STDERR join("\t",@parts),"\n";
- return join( ";", @parts );
-
-}
-
-sub escape_and_join_attribute_subpart {
- my ($self,$ref) = @_;
- if ( ref($ref) eq 'ARRAY' ) {
- my @attrs = @{$ref};
- return join( ",", map { $self->escape($_) } @attrs );
- }
- else { #scalar
- return $self->escape($ref);
- }
-}
-
-sub get_source {
- my ($self) = @_;
- if ( !$self->source() ) {
- $self->auto_source();
- }
- return $self->source();
-}
-
-sub auto_source {
-
- # Auto generate a source feature, in case there isn't one.
- my ($self) = @_;
- my @region = (
- $self->seqid(),
- ( $self->genbank() ? 'Genbank' : 'Assembly' ),
- 'contig',
- 1,
- $self->get_length,
- '.',
- '.',
- '.',
- sprintf( "ID=%s;Name=%s", $self->seqid(), $self->seqid() )
- . ( $self->is_circular() ? ";Is_circular=True" : "" )
- );
- $self->source(\@region);
-}
-
-sub add_feature {
- my ( $self, $feat ) = @_;
- my $primary_tag = $feat->primary_tag;
- if ( $primary_tag eq 'CDS' ) {
- my ( $id, $data_0 ) = $self->_add_gene( feat => $feat, );
- my ($data_1) = $self->_add_feature(
- feat => $feat,
- parent => $id,
- );
- $self->_low_level_add_feature($data_0);
- $self->_low_level_add_feature($data_1);
- }
- elsif ( $primary_tag eq 'source' ) {
- my ($data) = $self->_add_feature( feat => $feat, );
-
- # YUCK.
- my @z = @{$data};
- my $seqid = $self->seqid();
- $z[8] =~ s/ID=[^;]*;/ID=$seqid;/g;
- $self->source( \@z );
- }
- elsif ( $primary_tag ne 'gene' ) {
- my ($data) = $self->_add_feature( feat => $feat, );
- $self->_low_level_add_feature($data);
- }
-
-}
-
-sub _low_level_add_feature {
- my ( $self, $data ) = @_;
- push( @{ $self->data() }, $data );
-}
-
-sub _add_feature {
- my ( $self, %data ) = @_;
- my %attrs =
- $self->get_attrs( feat => $data{feat}, parents => $data{parent} );
- my @data = (
- $self->seqid(),
- $self->source_map( $data{feat}->primary_tag ),
- $self->FT_SO_map( $data{feat}->primary_tag ),
- $data{feat}->start,
- $data{feat}->end,
- '.',
- ( $data{feat}->strand == 1 ? '+' : '-' ),
- '.',
- $attrs{attr_str}
- );
-
- #$self->_low_level_add_feature(\@data);
- return \@data;
-}
-
-sub _add_gene {
- my ( $self, %data ) = @_;
- my $id = $self->id_prefix() . '.' . ( $self->global_feat_idx());
- $self->global_feat_idx($self->global_feat_idx()+1);
- my $gene_count = ++$feat_type_count{'gene'};
- my @data = (
- $self->seqid(),
- $self->source_map( $data{feat}->primary_tag ),
- $self->FT_SO_map('gene'),
- $data{feat}->start,
- $data{feat}->end,
- '.',
- ( $data{feat}->strand == 1 ? '+' : '-' ),
- '.',
- "ID=$id;Name=Gene$gene_count"
- );
-
- #$self->_low_level_add_feature(\@data);
- return ( $id, \@data );
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::GBK2GFF3
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Galaxy.pm
--- a/cpt_psm_recombine/lib/CPT/Galaxy.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,284 +0,0 @@
-package CPT::Galaxy;
-use Moose;
-use strict;
-use warnings;
-use Data::Dumper;
-use autodie;
-
-
-sub gen {
- my ( $self, %p ) = @_;
- my $parameterCollection = $p{full_options};
- my @opt_spec = @{ $parameterCollection->params() }; # This feels bad?
- my %defaults = @{ $p{defaults} };
- my @outputs = @{ $p{outputs} };
- my @tests;
- if(defined $p{tests} && ref $p{tests} eq 'ARRAY'){
- @tests = @{ $p{tests} };
- }
-
- my $optional_output_file = $p{_output_file};
-
- #my @registered_outputs = @{ $outputs{'registered'} };
- my $appid = $p{appid};
- my $appname = $p{appname};
- my $appdesc = $p{appdesc};
- my $appvers = $p{appvers};
-
- # Set up the XML Writer
- require XML::Writer;
- my $xml_writer;
- if($optional_output_file){
- $xml_writer = XML::Writer->new(OUTPUT => $optional_output_file);
- }else{
- $xml_writer = XML::Writer->new();
- }
-
- # Set up the tool element
- $xml_writer->startTag(
- 'tool',
- id => $appid,
- name => $appname,
- version => $appvers,
- );
-
- # Add all of our sections, passing a single xml_writer around.
- $self->description_section($xml_writer,$appdesc);
- $self->version_section($xml_writer);
- $self->stdio_section($xml_writer);
-
- $self->command_section($xml_writer,\@opt_spec);
- $self->input_section($xml_writer,\@opt_spec);
- $self->output_section($xml_writer,\@opt_spec);
-
- $self->help_section($xml_writer);
-
- $self->test_section($xml_writer, @tests);
-
- $xml_writer->endTag('tool');
- $xml_writer->end();
- # End of tool xml conf
-
- # if OOF was set to 'self', that means it's stored internally, so we should return
- if(defined $optional_output_file && $optional_output_file eq 'self'){
- return $xml_writer->to_string;
- }
-}
-
-sub test_section {
- my ($self, $xml_writer, @test_cases) = @_;
- $xml_writer->startTag('tests');
- foreach my $test(@test_cases){
- my %test_details = %{$test};
- $xml_writer->startTag('test');
- # Each test case has: name, params, outputs
-
- # Params will be as they're specified on the command line, so they /should/ be okay to use in galaxy code.
- my %params = %{$test_details{'params'}};
- foreach(sort(keys(%params))){
- # As written, will not handle multiply valued attributes
- $xml_writer->startTag('param',
- name => $_,
- value => $params{$_},
- );
- $xml_writer->endTag();
- }
- # outputs
- my %outputs = %{$test_details{'outputs'}};
- foreach(sort(keys(%outputs))){
- # As written, will not handle multiple outputs well
- # This bit of code because for every output there's a
- # name you expect on the command line, and a file you
- # want to compare against (galaxy mucks about with
- # names so we don't have to worry about it. However,
- # from the command line, we have to know the name of
- # the output file we're going to produce so we can
- # compare it against another copy of this file. It's
- # less than ideal, but there's not much we can do.
- my @output_cmp = @{$outputs{$_}};
- $xml_writer->startTag('output',
- name => $_,
- file => $output_cmp[1],
- );
- $xml_writer->endTag();
- }
- $xml_writer->endTag();
- }
- $xml_writer->endTag();
-}
-
-sub description_section{
- my ($self, $xml_writer, $appdesc) = @_;
- $xml_writer->startTag('description');
- $xml_writer->characters(sprintf('%s',$appdesc));
- $xml_writer->endTag('description');
-}
-
-sub version_section{
- my ($self, $xml_writer) = @_;
- $xml_writer->startTag('version_command');
- $xml_writer->characters("perl $0 --version");
- $xml_writer->endTag('version_command');
-}
-sub stdio_section{
- my ($self, $xml_writer) = @_;
- $xml_writer->startTag('stdio');
- $xml_writer->startTag(
- 'exit_code',
- range => "1:",
- level => "fatal",
- );
- $xml_writer->endTag('exit_code');
- $xml_writer->endTag('stdio');
-}
-sub command_section{
- ###################
- # COMMAND SECTION #
- ###################
- my ($self, $xml_writer,$opt_spec_ref) = @_;
- my @opt_spec = @{$opt_spec_ref};
- $xml_writer->startTag(
- 'command',
- interpreter => 'perl',
- );
- my $command_string = join("\n", $0, '--galaxy','--outfile_supporting $__new_file_path__','');
- foreach (@opt_spec) {
- if(
- # not galaxy specific and we are not instructed to hide
- !$_->_galaxy_specific() && $_->_show_in_galaxy()
- ||
- # is galaxy specific and is hidden
- $_->_galaxy_specific() && $_->hidden() && $_->_show_in_galaxy()
- ){
- #if(!$_->hidden() || ){
- my $command_addition = $_->galaxy_command();
- if($command_addition){
- $command_string .= $command_addition . "\n";
- }
- }
- }
- $xml_writer->characters($command_string);
- $xml_writer->endTag('command');
-}
-sub input_section{
- my ($self, $xml_writer,$opt_spec_ref) = @_;
- my @opt_spec = @{$opt_spec_ref};
- #################
- # INPUT SECTION #
- #################
- $xml_writer->startTag('inputs');
- foreach (@opt_spec) {
- if(
- # not galaxy specific and we are not instructed to hide
- !$_->hidden() && !$_->_galaxy_specific() && $_->_show_in_galaxy()
- ){
- $_->galaxy_input($xml_writer);
- }
- }
- $xml_writer->endTag('inputs');
-}
-sub output_section{
- my ($self, $xml_writer,$opt_spec_ref) = @_;
- my @opt_spec = @{$opt_spec_ref};
- ##################
- # OUTPUT SECTION #
- ##################
- $xml_writer->startTag('outputs');
- foreach (@opt_spec) {
- if(
- # not galaxy specific and we are not instructed to hide
- !$_->_galaxy_specific() && $_->_show_in_galaxy()
- ){
- $_->galaxy_output($xml_writer);
- }
- }
- $xml_writer->endTag('outputs');
-}
-sub help_section{
- my ($self, $xml_writer) = @_;
- ################
- # HELP SECTION #
- ################
-
- $xml_writer->startTag('help');
- # Here we incur some dependencies. D:
- use IPC::Run3;
- my ($in,$out,$err);
- use File::Temp;
- my $tempfile = File::Temp->new(
- TEMPLATE => 'libcpt.galaxy.tempXXXXX',
- DIR => '/tmp/',
- UNLINK => 1,
- SUFFIX => '.html'
- );
-
- use File::Which;
- my $pod2md = which("pod2markdown");
- if(! defined($pod2md)){
- print STDERR "pod2markdown not available. Install Pod::Markdown";
- }else{
- my @command = ('pod2markdown',$0,$tempfile);
- run3 \@command, \$in, \$out, \$err;
- # Pandoc
- my $pandoc = which("pandoc");
- if(! defined($pandoc)){
- print STDERR "Pandoc not available, cannot convert to RST";
- }else{
- @command = ("pandoc",'-f','markdown','-t','rst', $tempfile);
- run3 \@command, \$in, \$out, \$err;
- if(-e $tempfile){
- unlink($tempfile);
- }
- $xml_writer->characters($out);
- }
- }
- $xml_writer->endTag('help');
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Galaxy
-
-=head1 VERSION
-
-version 1.99.4
-
-=head2 gen
-
- require CPT::Galaxy;
- my $galaxy_xml_generator = CPT::Galaxy->new();
- $galaxy_xml_generator->gen(
- full_options => \@options_specification,
- appdesc => $self->{'appdesc'},
- appid => $self->{'appid'},
- appname => $self->{'appname'},
- defaults => $passed_opts{'defaults'},
- outputs => $passed_opts{'outputs'},
- );
-
-Generates a galaxy XML file (using XML::Writer) from the options_specification object, which is an array of
-['file|f=s', "blah", {some_req => 'some_val'] and CPT::Parameter::* objects. For simplicity, the first type
-is currently DEPRECATED
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/GalaxyGetOpt.pm
--- a/cpt_psm_recombine/lib/CPT/GalaxyGetOpt.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,384 +0,0 @@
-package CPT::GalaxyGetOpt;
-use CPT::ParameterGroup;
-use autodie;
-use File::Spec::Functions qw[ catfile catdir ];
-use Carp;
-no warnings;
-use Moose;
-
-has 'appdesc' => ( is => 'rw', isa => 'Str', default => sub { 'No application description provided' } );
-has 'appid' => ( is => 'rw', isa => 'Str', default => sub { "unknown_app_$0" } );
-has 'appname' => ( is => 'rw', isa => 'Str', default => sub { "Unnamed application $0"} );
-has 'appvers' => ( is => 'rw', isa => 'Str', default => sub { '0.0.1' } );
-has 'registered_outputs' => ( is => 'rw', isa => 'HashRef' );
-has 'opt' => ( is => 'rw', isa => 'Any');
-has filetype_detector => (
- is => 'rw',
- isa => 'Any',
-);
-
-
-sub getOptions {
- my ( $self, %passed_opts ) = @_;
- require Getopt::Long::Descriptive;
- # these get pushed through, thankfully. This way we can manually check for --genenrate_galaxy_export and exit slightly earlier.
- my %args = map { $_ => 1 } @ARGV;
- # Sections passed to us
- my @script_args = @{ $passed_opts{'options'} };
- my %defaults = @{ $passed_opts{'defaults'} };
-
-
-
- # Output Files Stuff
- my @getopt_formatted_outputs;
- my %outputs;
- # This is originally an array of output files. We transform to a hash for
- # easier lookup
- foreach(@{$passed_opts{'outputs'}}){
- # Grab name/desc/opts
- my ($name, $desc, $opts) = @{$_};
- $outputs{$name} = {
- description => $desc,
- options => $opts,
- required => 1,
- };
- my %tmp_opts_fix = %{$opts};
- $tmp_opts_fix{required} = 1;
- $tmp_opts_fix{validate} = 'File/Output';
- push(@getopt_formatted_outputs, [$name, $desc, \%tmp_opts_fix]);
-
- # Adds the {name}_files_path special parameter
- my %fp_opts = %{$opts};
- $fp_opts{hidden} = 1;
- $fp_opts{value} = "${name}.files_path";
- $fp_opts{default} = "${name}.files_path";
- $fp_opts{required} = 1;
- $fp_opts{_galaxy_specific} = 1;
- $fp_opts{validate} = "String";
- #$fp_opts{_show_in_galaxy} = 0;
- my $desc2 = "Associated HTML files for $name";
- my $name2 = "${name}_files_path";
- my @files_path = ($name2, $desc2, \%fp_opts);
- push(@getopt_formatted_outputs, \@files_path);
-
- # Adds the {name}_format special parameter
- my %fp_opts3 = %{$opts};
- #$fp_opts3{hidden} = 1;
- $fp_opts3{default} = $fp_opts3{"default_format"};
- $fp_opts3{validate} = "File/OutputFormat";
- $fp_opts{required} = 1;
- #$fp_opts3{_galaxy_specific} = 1;
- #$fp_opts3{_show_in_galaxy} = 0;
- my $desc3 = "Associated Format for $name";
- my $name3 = "${name}_format";
- my @files_path3 = ($name3, $desc3, \%fp_opts3);
- push(@getopt_formatted_outputs, \@files_path3);
-
- # Adds the {name}_files_path special parameter
- my %fp_opts4 = %{$opts};
- $fp_opts4{hidden} = 1;
- $fp_opts4{value} = "${name}.id";
- $fp_opts4{default} = "${name}.id";
- $fp_opts4{validate} = "String";
- $fp_opts4{_galaxy_specific} = 1;
- #$fp_opts4{_show_in_galaxy} = 0;
- my $desc4 = "Associated ID Number for $name";
- my $name4 = "${name}_id";
- my @files_path4 = ($name4, $desc4, \%fp_opts4);
- push(@getopt_formatted_outputs, \@files_path4);
-
- }
- $self->registered_outputs(\%outputs);
-
-
- # Store the application's name and description
- if($defaults{appdesc}){
- $self->appdesc($defaults{appdesc});
- }
- if($defaults{appid}){
- $self->appid($defaults{appid});
- }
- if($defaults{appname}){
- $self->appname($defaults{appname});
- }
- if($defaults{appvers}){
- $self->appvers($defaults{appvers});
- }
-
- my $usage_desc;
- if ( $self->appname() && $self->appdesc() ) {
- $usage_desc = sprintf( "%s: %s\n%s [options] ", $self->appname(), $self->appdesc(), $0 );
- }
-
- # Individual parameter parsers
- require CPT::Parameter;
- # Which are stored in a collection of them
- require CPT::ParameterCollection;
-
-
- my $parameterCollection = CPT::ParameterCollection->new();
-
- $parameterCollection->push_params(
- [
- #['Standard Options'],
- #@extra_params,
- #[],
- ['outfile_supporting' , 'File or folder to output to, necessary when (class == Report || multiple calls to classyReturnResults || multiple standalone output files)' , { hidden => 1, validate => 'String', default => '__new_file_path__', _galaxy_specific => 1, _show_in_galaxy => 0 }],
- ['galaxy' , 'Run with galaxy-specific overrides' , {validate => 'Flag' , hidden=>1, _galaxy_specific => 1, _show_in_galaxy => 0}],
- ['generate_galaxy_xml' , 'Generate a compatible galaxy-xml file. May need editing' , {validate => 'Flag' , hidden=>1, _galaxy_specific => 1, _show_in_galaxy => 0}],
- [],
- ['Script Options'],
- ]
- );
-
- # If there's a default specified, we should apply that.
- foreach (@script_args) {
- # If it's an array, push as is, because either it's old style/label/empty
- # However, this doesn't allow setting defaults for things that aren't parameters
- # and it won't set defaults for old style
- if ( ref $_ eq 'ARRAY' ) {
- $parameterCollection->push_param($_);
- }elsif( ref $_ eq 'HASH'){
- my $pG = CPT::ParameterGroup->new();
- $pG->set_data($_);
- $parameterCollection->push_group($pG);
- }
- }
-
- # Our magic output files stuff
- $parameterCollection->push_params(
- [
- [],
- ['Output Files'],
- @getopt_formatted_outputs,
- ]
- );
-
- # Other standard options like verbosity/version/etc
- $parameterCollection->push_params(
- [
- [],
- ['Other Standard Options'],
- ['verbose|v', 'Be more verbose', {validate => 'Flag', _show_in_galaxy => 0}],
- ['version', 'Print version information', {validate => 'Flag', _show_in_galaxy => 0}],
- ['help', 'Print usage message and exit', {validate => 'Flag', _show_in_galaxy => 0}],
- ],
- );
-
- # If we want the galaxy_xml, do that before the reduction step is called.
- if ( $args{'--version'} ) {
- print $self->appvers() . "\n";
- exit 1;
- }
-
- # If we want the galaxy_xml, do that before the reduction step is called.
- if ( $args{'--generate_galaxy_xml'} ) {
- require CPT::Galaxy;
- my $galaxy_xml_generator = CPT::Galaxy->new();
- $galaxy_xml_generator->gen(
- full_options => $parameterCollection,
- appdesc => $self->appdesc(),
- appid => $self->appid(),
- appname => $self->appname(),
- appvers => $self->appvers(),
- defaults => $passed_opts{'defaults'},
- outputs => $passed_opts{'outputs'},
- tests => $passed_opts{'tests'},
- );
- exit 1;
- }
-
- if( $args{'--gen_test'} ){
- require CPT::GenerateTests;
- my $tgen = CPT::GenerateTests->new();
- if(defined $passed_opts{'tests'}){
- print $tgen->gen(@{$passed_opts{'tests'}});
- }else{
- print $tgen->gen_empty();
- }
- exit 0;
- }
-
- # Now that the options_spec is complete, we reduce to something getopt will be happy with
- my @getopt_spec = $parameterCollection->getopt();
- #print STDERR Dumper \@getopt_spec;
- #exit 1;
-
- # It would be nice if there was a way to ensure that it didn't die here...
- # Execute getopt
- my ( $local_opt, $usage ) = Getopt::Long::Descriptive::describe_options( $usage_desc, @getopt_spec );
- $self->opt($local_opt);
-
- # Now that we've gotten the user's options, we need to copy their values back to our ParameterCollection
- $parameterCollection->populate_from_getopt($self->opt());
-
- # If they want help, print + exit
- if ( $self->opt && $self->opt->help ) {
- print $usage;
- exit 1;
- }
- # Validate their choices
- if ( $parameterCollection->validate($self->opt) ) {
- return $self->opt;
- # Easy access for the script, don't want them to have to deal with PC (just yet)
- }
- else {
- # Some params failed to validate, so we die.
- croak "Validation errors were found so cannot continue";
- }
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::GalaxyGetOpt
-
-=head1 VERSION
-
-version 1.99.4
-
-=head2 getOptions
-
- my $ggo = CPT::GalaxyGetOpt->new();
- my $options = $ggo->getOptions(
- 'options' => [
- [ 'file', 'Input file', { validate => 'File/Input' } ],
- [
- "option" => "Select an option!",
- {
- validate => 'Option',
- options => \%options,
- multiple => 1,
- }
- ],
- [
- "float" => "I'm a float",
- { validate => 'Float' }
- ],
- [
- "int" => "I'm an int",
- { validate => 'Int', default => [42, 34], required => 1, multiple => 1 }
- ],
- [],
- ['New section'],
- [
- "string" => "I'm a simple string",
- { validate => 'String' }
- ],
- [
- 'flag' => 'This is a flag',
- { validate => 'Flag' }
- ],
- ],
- 'outputs' => [
- [
- 'my_output_data',
- 'Output TXT File',
- {
- validate => 'File/Output',
- required => 1,
- default => 'out',
- data_format => 'text/plain',
- default_format => 'TXT',
- }
- ],
- ],
- 'defaults' => [
- 'appid' => 'TemplateScript',
- 'appname' => 'Template',
- 'appdesc' => 'A basic template for a new CPT application',
- 'appvers' => '1.94',
- ],
- 'tests' => [
- {
- test_name => "Default",
- params => {
- },
- outputs => {
- 'my_output_data' => ["out.txt", 'test-data/outputs/template.default.txt' ],
- },
- },
- {
- test_name => "Option A specified",
- params => {
- 'int', '10000',
- },
- outputs => {
- 'my_output_data' => ["out.txt", 'test-data/outputs/template.A.txt' ],
- },
- },
- ],
- );
-
- my @data;
- foreach(qw(file int string option float flag)){
- # Create a 2D array of all of our optoins
- push(@data, [ $_, $options->{$_}]);
- }
-
- my %table = (
- 'Sheet1' => {
- header => [qw(Key Value)],
- data => \@data,
- }
- );
-
- # And store it to file!
- use CPT::OutputFiles;
- my $crr_output = CPT::OutputFiles->new(
- name => 'my_output_data',
- GGO => $ggo,
- );
- $crr_output->CRR(data => \%table);
-
-Gets command line options, and prints docs. Very convenient, removes the burden of writing any Getopt code from you.
-
-=head3 Default Options Provided
-
-=over 4
-
-=item *
-
-help, man - cause an early exit and printing of the POD for your script.
-
-=item *
-
-verbose - flag to make the script verbose. Will print everything that is sent to returnResults. Individual scripts should take advantage of this option
-
-=back
-
-=head3 Advanced Options Provided
-
-=over 4
-
-=item C<--generate_galaxy_xml>
-
-Generates valid XML for the tool for use in Galaxy
-
-=item C<--gen_test>
-
-Generates code to test the script using specified test data
-
-=back
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/GenerateTests.pm
--- a/cpt_psm_recombine/lib/CPT/GenerateTests.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,151 +0,0 @@
-package CPT::GenerateTests;
-use Moose;
-use strict;
-use warnings;
-use autodie;
-
-
-sub gen {
- my ( $self, @tests ) = @_;
-
- my $test_count = 0;
-
- # Header
- my @outtext = (
- '#!/usr/bin/env perl',
- 'use strict;',
- 'use warnings;',
- 'use Test::More tests => 0;',
- 'use IPC::Run3 qw(run3);',
- '',
- 'my ( @base, @cmd, $in, $out, $err );',
- '',
- sprintf("%s = ('perl', '%s');", '@base', $0),
- 'my %result_files = (',
- );
- my %test_names;
- # Loop across tests
- foreach my $test_ref(@tests){
- my %test = %{$test_ref};
- my %params = %{$test{params}};
- my $command_line = "";
- foreach(sort(keys(%params))){
- $command_line .= "--$_ $params{$_} ";
- }
- my %outputs = %{$test{outputs}};
-
- $test_names{$test{test_name}}++;
- if($test_names{$test{test_name}} > 1){
- printf STDERR "Duplicate test found: %s. This will cause fewer tests to be run than expected\n", $test{test_name};
- }
- push(@outtext, sprintf(' "%s" => {', $test{test_name}));
- push(@outtext, sprintf(' command_line => "%s",', $command_line));
- push(@outtext, ' outputs => {');
-
- foreach my $key(keys %outputs){
- push(@outtext, sprintf(' "%s" => ["%s", "%s"],', $key, @{$outputs{$key}}));
- # Add another test
- $test_count++;
- $test_count++;
- }
- push(@outtext, ' },');
- push(@outtext, ' },');
- };
- push(@outtext, ');');
- push(@outtext,'');
-
- push(@outtext, 'foreach ( keys(%result_files) ) {');
- push(@outtext, ' # run with the command line');
- push(@outtext, ' my @cmd1 = ( @base, split( / /, $result_files{$_}{command_line} ) );');
- push(@outtext, ' run3 \@cmd1, \$in, \$out, \$err;');
- push(@outtext, ' if($err){ print STDERR "Exec STDERR: $err"; }');
- push(@outtext, ' if($out){ print STDERR "Exec STDOUT $out"; }');
- push(@outtext, ' # and now compare files');
- push(@outtext, ' foreach my $file_cmp ( keys( %{$result_files{$_}{outputs}} ) ) {');
- push(@outtext, ' my ($gen, $static) = @{$result_files{$_}{outputs}{$file_cmp}};');
- push(@outtext, ' my @diff = ( "diff", $gen, $static );');
- push(@outtext, ' my ($in_g, $out_g, $err_g);');
- push(@outtext, ' run3 \@diff, \$in_g, \$out_g, \$err_g;');
- push(@outtext, ' if($err_g) { print STDERR "err_g $err_g\n"; }');
- push(@outtext, ' if($out_g) { print STDOUT "out_g $out_g\n"; }');
- push(@outtext, ' chomp $out_g;');
- push(@outtext, ' is( -e $gen, 1, "[$_] Output file must exist"); ');
- push(@outtext, ' is( length($out_g), 0, "[$_] Checking validity of output \'$file_cmp\'" );');
- push(@outtext, ' unlink $gen;');
- push(@outtext, ' }');
- push(@outtext, '}');
-
- # Update test counts
- $outtext[3] = "use Test::More tests => $test_count;";
- if($test_count == 0){
- return $self->gen_empty();
- }
-
- return join("\n", @outtext);
-}
-
-sub gen_empty {
- my ( $self ) = @_;
-
- my $test_count = 0;
-
- my @outtext = (
- '#!/usr/bin/env perl',
- 'use strict;',
- 'use warnings;',
- 'use Test::More skip_all => "No tests defined for ' . $0 .'"',
- );
- return join("\n", @outtext);
-}
-
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::GenerateTests
-
-=head1 VERSION
-
-version 1.99.4
-
-=head2 gen
-
- require CPT::GenerateTests;
- my $tgen = CPT::GenerateTests->new();
- $tgen->gen(
- {
- test_name => "Default",
- params => {
- 'file' => 't/test-files/aa.gbk',
- 'chromosome' => 'test',
- 'color' => 'red',
- 'intensity' => 'vvvvl',
- },
- outputs => {
- 'result_name' => ['circos_k.txt', 'test-data/circos_k.txt'],
- }
- },
- );
- exit 1;
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Logger.pm
--- a/cpt_psm_recombine/lib/CPT/Logger.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-package CPT::Logger;
-use Moose;
-use strict;
-use warnings;
-use autodie;
-
-# This will eventually be merged into CPT proper.
-
-has 'outfile' => ( is => 'ro', isa => 'Str' );
-has 'appid' => ( is => 'ro', isa => 'Str' );
-has 'outfilehandle' => ( is => 'ro', isa => 'FileHandle' );
-
-my $fh;
-
-sub new {
- my ( $class, %options ) = @_;
- my $self = {%options};
- bless( $self, $class );
- if ( $self->{'outfile'} ) {
- open( $fh, '>>', $self->{'outfile'} );
- }
- return $self;
-}
-
-sub log {
- my ( $self, $message ) = @_;
- my @t = localtime;
- $t[5] += 1900;
- $t[4]++;
- my $time = sprintf "%04d-%02d-%02d %02d:%02d:%02d",
- @t[ 5, 4, 3, 2, 1, 0 ];
- print $fh "[$time] $message\n";
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Logger
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/OutputFiles.pm
--- a/cpt_psm_recombine/lib/CPT/OutputFiles.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,737 +0,0 @@
-package CPT::OutputFiles;
-use Carp;
-use Moose;
-use strict;
-use warnings;
-use autodie;
-use File::Spec;
-
-
-# ABSTRACT: Handles script outputs in a sane way, providing facilities to format data and name files for regular use, or in galaxy.
-
-
-# A list of acceptable ouput formats. Some/many of these may be missing implementations.
-# For instance, the pandoc output format is completely unimplemented
-# These will NEED to be re-worked.
-has 'acceptable_formats' => (
- is => 'ro',
- isa => 'HashRef',
- default => sub {
- {
- 'text/tabular' => [qw(TSV TSV_U CSV CSV_U XLS ODS Dumper JSON YAML XLSX)],
- 'genomic/annotated' => [qw(ABI Ace AGAVE ALF AsciiTree BSML BSML_SAX ChadoXML Chaos ChaosXML CTF EMBL EntrezGene Excel Exp Fasta Fastq GAME GCG Genbank Interpro KEGG LargeFasta LaserGene LocusLink PHD PIR PLN Qual Raw SCF SeqXML Strider Swiss Tab TIGR TIGRXML TinySeq ZTR)],
- 'genomic/raw' => [qw(Fasta)],
- 'genomic/interval' => [qw(GFF3)],
- 'text/html' => [qw(HTML)], # Theoretically this will be consumed by text/report
- 'text/report' => [qw(Pandoc)],
- 'text/plain' => [qw(TXT CONF)],
- 'image/svg' => [qw(SVG)],
- 'image/png' => [qw(PNG)],
- 'archive' => [qw(tar.gz zip tar)],
- 'Dummy' => [qw(Dummy)],
- }
- }
-);
-
-has 'format_mapping' => (
- is => 'ro',
- isa => 'HashRef',
- default => sub {
- {
- 'TSV' => 'tabular',
- 'CSV' => 'tabular',
- 'TSV_U' => 'tabular',
- 'CSV_U' => 'tabular',
- 'XLS' => 'data',
- 'ODS' => 'data',
- 'Dumper' => 'txt',
- 'JSON' => 'txt',
- 'YAML' => 'txt',
- 'XLSX' => 'data',
- 'Fasta' => 'fasta',
- 'GFF3' => 'interval',
- 'HTML' => 'html',
- 'Pandoc' => 'txt',
- 'TXT' => 'txt',
- 'CONF' => 'txt',
- 'SVG' => 'xml',
- 'PNG' => 'png',
- 'Dummy' => 'data',
- 'tar.gz' => 'tar.gz',
- 'zip' => 'zip',
- 'tar' => 'tar',
- #Genomic formats
- 'ABI' => 'data',
- 'Ace' => 'txt',
- 'AGAVE' => 'xml',
- 'ALF' => '',
- 'AsciiTree' => 'txt',
- 'BSML' => 'xml',
- 'BSML_SAX' => 'xml',
- 'ChadoXML' => 'xml',
- 'Chaos' => 'xml',
- 'ChaosXML' => 'xml',
- 'CTF' => 'data',
- 'EMBL' => 'txt',
- 'EntrezGene' => 'txt',
- 'Excel' => 'data',
- 'Exp' => 'txt',
- 'Fastq' => 'fastq',
- 'GAME' => 'xml',
- 'GCG' => 'txt',
- 'Genbank' => 'txt',
- 'Interpro' => 'xml',
- 'KEGG' => 'txt',
- 'LargeFasta' => 'txt',
- 'LaserGene' => 'data',
- 'LocusLink' => 'data',
- 'PHD' => 'data',
- 'PIR' => 'data',
- 'PLN' => 'data',
- 'Qual' => 'data',
- 'Raw' => 'txt',
- 'SCF' => 'data',
- 'SeqXML' => 'xml',
- 'Strider' => 'data',
- 'Swiss' => 'txt',
- 'Tab' => 'tabular',
- 'TIGR' => 'xml',
- 'TIGRXML' => 'xml',
- 'TinySeq' => 'xml',
- 'ZTR' => 'data',
- }
- }
-);
-
-sub valid_formats {
- my ($self, $format) = @_;
- return ${$self->acceptable_formats()}{$format};
-}
-
-sub get_format_mapping{
- my ($self, $format) = @_;
- return ${$self->format_mapping()}{$format};
-}
-
-
-# User supplied options
-has 'name' => ( is => 'ro', isa => 'Str' );
-has 'GGO' => ( is => 'ro', isa => 'Any' );
-has 'galaxy' => (is => 'rw', isa => 'Bool');
-
-# These are extracted on init from from CPT
-has 'output_id' => (is => 'rw', isa => 'Str');
-has 'output_label' => (is => 'rw', isa => 'Str');
-has 'output_opts' => (is => 'rw', isa => 'HashRef');
-# From galaxy
-has 'new_file_path' => (is => 'rw', isa => 'Str');
-has 'files_path' => (is => 'rw', isa => 'Str');
-has 'files_id' => (is => 'rw', isa => 'Str');
-# ???
-has 'parent_filename' => (is => 'rw', isa => 'Str');
-has 'parent_internal_format' => (is => 'rw', isa => 'Str');
-has 'parent_default_output_format' => (is => 'rw', isa => 'Str');
-
-has 'init_called' => (is => 'rw', isa =>'Bool');
-
-sub initFromArgs {
- my ($self, %args) = @_;
-
- # We will only ever care about one (as there is one of these objects
- # per registered output)
- my %registered_outputs = %{$self->GGO()->registered_outputs()};
- # If the output name specified in "name" was not known to registered_outputs
- if(!defined($self->name())){
- croak("You must supply a name to the instantiation of CRR");
- }
- #if(!defined($registered_outputs{$args{name}})){
- #croak("The script author tried to call GGO's classyReturnResults method with an output file not mentioned in the outputs section.");
- #}
-
- # Carrying on
- # We grab the pre-specified data regarding that output
- my %reg_out_params = %{$registered_outputs{$self->name()}};
- # Store these for future calls of sub/var
- $self->output_id($self->name());
- $self->output_label($reg_out_params{description});
- $self->output_opts($reg_out_params{options});
-
-
- $self->parent_internal_format($reg_out_params{options}{data_format});
- $self->parent_default_output_format($reg_out_params{options}{default_format});
- $self->parent_filename($reg_out_params{options}{default});
-
- # Special variables
- # --genemark "${genemark}" --genemark_format "${genemark_format}"
- # --genemark_files_path "${genemark_files_path}"
- # --genemark_id "${genemark_id}"
-
- # If they've specified a filename on the command line, that should
- # override the default value
- if(defined $self->GGO->opt->{$self->name()}){
- $self->parent_filename($self->GGO->opt->{$self->name()});
- }
- # If they've specified a {str}_format option on the command line, that
- # should override the default value
- if(defined $self->GGO->opt->{$self->name() . '_format'}){
- $self->parent_default_output_format($self->GGO->opt->{$self->name() . '_format'});
- }
-
- # Grab supporting files path (added as new history items)
- $self->new_file_path($self->GGO->opt->{outfile_supporting});
- # Copy galaxy specific variables
- if(defined $self->GGO->opt->{$self->name() . '_files_path'}){
- $self->files_path($self->GGO->opt->{$self->name() . '_files_path'});
- }
- if(defined $self->GGO->opt->{$self->name() . '_id'}){
- $self->files_id($self->GGO->opt->{$self->name() . '_id'});
- }
-
- # If --galaxy has been specified, we need to be aware of this
- if ( $self->GGO->opt->{galaxy} ) {
- $self->galaxy(1);
- }else{
- $self->galaxy(0);
- }
- $self->init_called(1);
-}
-
-
-
-has 'times_called' => ( is => 'rw', isa => 'Num', default => sub {0} );
-has 'naming_strategy' => ( is => 'rw', isa => 'Str', default => "norm" ); #Other options are "var" and "sub"
-
-
-sub _genCRR {
- my ($self, %args) = @_;
- if(!$self->init_called()){
- $self->initFromArgs(%args);
- }
-
- # If the user supplied a custom extension, pull that (useful in
- # dummy/data output type)
- if(defined $args{extension}){
- $self->extension($args{extension});
- }
-
- # This is a mandatory parameter
- if(!defined $args{filename}){
- $self->given_filename($self->parent_filename());
- }else{
- $self->given_filename($args{filename});
- }
-
- # Allow overriding default format parameters
- my $writer = $self->writer_for_format(
- defined $args{data_format} ? $args{data_format} : $self->parent_internal_format(),
- defined $args{format_as} ? $args{format_as} : $self->parent_default_output_format(),
- );
-
- # Ugh
- $writer->OutputFilesClass($self);
- if($args{'data'}){
- $writer->data( $args{'data'} );
- }
- $writer->process_data();
- $writer->write();
- my @returned_filenames = @{$writer->used_filenames()};
- print STDERR join("\n",map{"FN: $_"} @returned_filenames)."\n";
- $self->bump_times_called();
- return @returned_filenames;
-}
-
-
-sub CRR {
- my ( $self, %args ) = @_;
- return $self->_genCRR(%args);
-}
-
-
-sub subCRR {
- my ( $self, %args ) = @_;
- # Change naming behaviour
- $self->naming_strategy('sub');
- return $self->_genCRR(%args);
-}
-
-
-sub varCRR {
- my ( $self, %args ) = @_;
- # Change naming behaviour
- $self->naming_strategy('var');
- return $self->_genCRR(%args);
-}
-
-
-sub bump_times_called{
- my ($self) = @_;
- $self->times_called($self->times_called() + 1 );
- return $self->times_called();
-}
-
-
-sub writer_for_format{
- my($self, $format, $requested) = @_;
-
- # For the specified data_format, grab the acceptable handlers for that format
- my %acceptable = %{$self->acceptable_formats()};
- my %acceptable_handlers = map { $_ => 1 } @{ $acceptable{ $format } };
-
- if (!$acceptable_handlers{$requested} ) {
- carp(sprintf( "Unacceptable output format choice [%s] for internal"
- ."data type for type %s. Acceptable formats are [%s]."
- ."Alternatively, unacceptable output file.", $requested, $format,
- join( ', ', keys(%acceptable_handlers) ) ));
- }
-
- if ( $requested eq 'Dumper' ) {
- require CPT::Writer::Dumper;
- return CPT::Writer::Dumper->new();
- }
- elsif ( $requested eq 'TSV' ) {
- require CPT::Writer::TSV;
- return CPT::Writer::TSV->new();
- }
- elsif ( $requested eq 'CSV' ) {
- require CPT::Writer::CSV;
- return CPT::Writer::CSV->new();
- }
- elsif ( $requested eq 'TSV_U' ) {
- require CPT::Writer::TSV_U;
- return CPT::Writer::TSV_U->new();
- }
- elsif ( $requested eq 'CSV_U' ) {
- require CPT::Writer::CSV_U;
- return CPT::Writer::CSV_U->new();
- }
- elsif ( $requested eq 'YAML' ) {
- require CPT::Writer::YAML;
- return CPT::Writer::YAML->new();
- }
- elsif ( $requested eq 'JSON' ) {
- require CPT::Writer::JSON;
- return CPT::Writer::JSON->new();
- }
- elsif ( $requested eq 'Pandoc' ) {
- require CPT::Writer::Pandoc;
- return CPT::Writer::Pandoc->new();
- }
- elsif ( $requested eq 'XLS' ) {
- require CPT::Writer::Spreadsheet::XLS;
- return CPT::Writer::Spreadsheet::XLS->new();
- }
- elsif ( $requested eq 'XLSX' ) {
- require CPT::Writer::Spreadsheet::XLSX;
- return CPT::Writer::Spreadsheet::XLSX->new();
- }
- elsif ( $requested eq 'TXT' || $requested eq 'CONF' ) {
- require CPT::Writer::TXT;
- return CPT::Writer::TXT->new();
- }
- elsif ( $acceptable_handlers{$requested} && $format eq 'genomic/annotated'){
- require CPT::Writer::Genomic;
- return CPT::Writer::Genomic->new(format => $requested);
- }
- elsif ( $requested eq 'Fasta' ) {
- require CPT::Writer::Fasta;
- return CPT::Writer::Fasta->new();
- }
- elsif ( $requested eq 'GFF3' ) {
- require CPT::Writer::GFF3;
- return CPT::Writer::GFF3->new();
- }
- elsif ( $requested eq 'HTML' ) {
- require CPT::Writer::HTML;
- return CPT::Writer::HTML->new();
- }
- elsif ( $requested eq 'SVG' ) {
- require CPT::Writer::SVG;
- return CPT::Writer::SVG->new();
- }
- elsif ( $requested eq 'PNG' ) {
- require CPT::Writer::Dummy;
- return CPT::Writer::Dummy->new();
- }
- elsif ( $requested eq 'Dummy' ) {
- require CPT::Writer::Dummy;
- return CPT::Writer::Dummy->new();
- }
- elsif ( $requested eq 'tar.gz') {
- require CPT::Writer::Archive;
- return CPT::Writer::Archive->new( format => 'tar.gz' );
- }
- elsif ( $requested eq 'zip') {
- require CPT::Writer::Archive;
- return CPT::Writer::Archive->new( format => 'zip' );
- }
- elsif ( $requested eq 'tar') {
- require CPT::Writer::Archive;
- return CPT::Writer::Archive->new( format => 'tar' );
- }
- else {
- carp(sprintf("Data Format not yet supported [%s, %s]", $format, $requested));
- }
-}
-
-# File extension
-has 'extension' => ( is => 'rw', isa => 'Str');
-# What the user said this file was called.
-has 'given_filename' => (is => 'rw', isa => 'Str');
-
-
-
-
-sub generate_galaxy_variable{
- my ($self) = @_;
- unless( -d $self->new_file_path()){
- mkdir($self->new_file_path());
- }
- my $filename =File::Spec->catfile(
- $self->new_file_path(),
- sprintf( "primary_%s_%s_visible_%s", $self->files_id(), $self->given_filename(), $self->extension())
- );
- return $filename;
-}
-
-
-sub generate_nongalaxy_variable{
- my ($self) = @_;
- my $filename =File::Spec->catfile(
- sprintf( "%s.%s", $self->given_filename(), $self->extension())
- );
- return $filename;
-}
-
-
-sub generate_galaxy_subfile {
- my ($self) = @_;
- unless( -d $self->files_path()){
- mkdir($self->files_path());
- }
- my $filename =File::Spec->catfile(
- $self->files_path,
- sprintf( "%s.%s", $self->given_filename(), $self->extension())
- );
- return $filename;
-}
-
-
-sub generate_nongalaxy_subfile {
- my ($self) = @_;
- # they're pretty much equivalent for now
- return $self->generate_galaxy_subfile();
-}
-
-
-sub get_next_file{
- my ($self) = @_;
- my $filename;
- if ( $self->galaxy() ) {
- # In which case we want to return the primary output file.
- if ( $self->times_called() == 0 ) {
- $filename = $self->parent_filename();
- }
- else {
- if ( $self->naming_strategy eq 'sub' ) {
- $filename = $self->generate_galaxy_subfile();
- }
- elsif($self->naming_strategy eq 'var') {
- $filename = $self->generate_galaxy_variable();
- }else{
- confess("Unknown startegy for multiple output files: " . $self->naming_strategy());
- }
- }
- }
- else # do NOT use galaxy overrides. Paths should be more...sane
- {
- # First time we request, should $filename = the primary value, which
- # should be the file they specify.
- if ( $self->times_called() == 0 ) {
- $filename = $self->given_filename() . '.' . $self->extension();
- }
- else {
- if ( $self->naming_strategy eq 'sub' ) {
- $filename = $self->generate_nongalaxy_subfile();
- }
- elsif($self->naming_strategy eq 'var') {
- $filename = $self->generate_nongalaxy_variable();
- }else{
- confess("Unknown startegy for multiple output files: " . $self->naming_strategy());
- }
- }
- }
- return $filename;
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::OutputFiles - Handles script outputs in a sane way, providing facilities to format data and name files for regular use, or in galaxy.
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 METHODS
-
-=head2 initFromArgs
-
- $o->initFromArgs(name => 'GGO_known_output_name', GGO => $GGO);
-
-Internal method to intialise data structures from the output id provide in C and the data accessible via the C object. You B have already called C<< $GGO->getOptions >>
-
-=head2 classyReturnResults
-
- # in $GGO->getOptions(
- outputs => [
- ['html_page', 'HTML output page',
- {
- validate => 'File/Output',
- default => 'aa', # will produce aa.html
- data_format => 'text/html',
- default_format => 'HTML'
- }
- ]
- ['genbank_download', 'Variable number of GBK files',
- {
- validate => 'File/Output',
- default => 'result', # will produce result.gbk
- data_format => 'genomic/annotated',
- default_format => 'Genbank'
- }
- ]
- ]
- # )
-
- # Then in your script
- $csv_output = CPT::OutputFiles->new(
- name => 'html_page',
- opt => $options,
- );
- $csv_output->CRR(
- data => $data
- );
- # Subfile
- my $loc = $csv_output->subCRR(
- filename => 'cool_picture',
- data_format=>'data',
- extension=>"png"
- );
- move($png_file,$loc);
-
- # You give subfiles a name in case you need to refer to them at any
- # point in the parent file.
- $csv_output->subCRR(
- filename => 'output',
- data => $svg_object,
- data_format => 'image/svg',
- format_as => 'SVG'
- );
-
-
- $gbk_output = CPT::OutputFiles->new(
- name => 'genbank_download',
- opt => $options,
- );
- while(my $individual_genbank = $large_seqio->next){
- $gbk_output->varCRR(
- filename => $individual_genbank->seqid(),
- data => $individual_genbank,
- );
- }
-
-=head2 _genCRR
-
- _genCRR(extension => 'png', data => $data_ref, data_format => 'Dummy',
- format_as => 'Dummy', filename => "my-image");
-
-This is an internal method and should not be called directly. It's the end call of all C, C, and C. Those methods should be used instead.
-
-This method
-
-=over 4
-
-=item Stores some parameters
-
-Specifically C, C, C, C
-
-=item Creates a CPT::Writer
-
-=item Calls the writer's C method
-
-=item returns an array (not arrayref) of filenames
-
-These were the filenames that were produced in the writing process. This may be useful for data like CSV data where the output writer may produce N differently named files for each sheet of data.
-
-=back
-
-=head2 CRR
-
- $o->CRR(data => $ref);
-
-Writes data to an appropriately named file. (This is usually the "default" parameter supplied in the definition of this output). You should call this method first.
-
-=head2 subCRR
-
- $o->subCRR(data => $ref, filename => 'subreport', extension => 'html', data_format => 'text/html', format_as => 'HTML');
-
-Writes data to an appropriately named sub file. A subfile is a file that will appear in a folder in the current directory. Subfiles are useful when you want to reference other output files in a primary HTML output or similar. C gives you a method to produce files and have them automatically placed in a sensible location, from which you can reference the files.
-
-Files are placed in C<< $self->files_path >>. We C this for you, ignoring any errors. If you're paranoid you might want to re-run the mkdir/test for permissions/etc.
-
-You must provide
-
-=over 4
-
-=item filename
-
-name for the output file. You must generate this or it will be named identically to the parent. (And if you call it twice they will clobber each other silently and without mercy)
-
-=item extension
-
-E.g., 'png'
-
-=item data_format
-
-Internal data type. One of the standard C, C, C, etc.
-
-=item format_as
-
-You're welcome to provide a way to access the format parameter of subfiles to your users, however this is not done for you as there is no way for this module to know ahead of time how many subfiles you will produce.
-
-=back
-
-You may call this method after the first call to CRR or instead of calls to CRR
-
-=head2 varCRR
-
- $o->varCRR(data => $ref, filename => 'subreport', extension => 'html', data_format => 'text/html', format_as => 'HTML');
-
-Writes data to an appropriately named var file. A var file or variable file is much like a subfile, except that in galaxy they will show up as individual history items. Additionally, the default behaviour from the command line is to place all generated files in the current working directory, rather than in a special folder.
-
-You must provide
-
-=over 4
-
-=item filename
-
-name for the output file. You must generate this or it will be named identically to the parent. (And if you call it twice they will clobber each other silently and without mercy)
-
-=item extension
-
-E.g., 'png'
-
-=item data_format
-
-Internal data type. One of the standard C, C, C, etc.
-
-=item format_as
-
-You're welcome to provide a way to access the format parameter of subfiles to your users, however this is not done for you as there is no way for this module to know ahead of time how many subfiles you will produce.
-
-=back
-
-You may call this method after the first call to CRR or instead of calls to CRR
-
-=head2 bump_times_called
-
- $o->bump_times_called();
-
-Bumps the internal number representing the number of times you've tried to output files for a given output object. This data is used in construction of filenames
-
-=head2 writer_for_format
-
- $o->writer_for_format('text/tabular', 'TSV_U');
-
-Get the appropriate writer class and instantiate it for a given C and C.
-
-=head2 generate_galaxy_variable
-
- $o->generate_galaxy_variable();
-
-If we need the files to show up as separate History items in galaxy, filenames have to be constructed like this:
-
-=over 4
-
-=item F<$filepath/primary_546_output2_visible_bed>
-
-=item F<$filepath/primary_546_output3_visible_pdf>
-
-=back
-
-where filenames consist of 'primary', an ID number (provided in C on the command lien), a filename, 'visible', and an extension, all joined with C<_>. Additionally C<$filepath> is generally CWD (I think...)
-
-=head2 generate_nongalaxy_variable
-
- $o->generate_nongalaxy_variable();
-
-=over 4
-
-=item F<$given_filename.$extension>
-
-=back
-
-Parameters are taken from the object variables of the same names.
-
-=head2 generate_galaxy_subfile
-
- $o->generate_galaxy_subfile();
-
-=over 4
-
-=item F<$files_path/$given_filename.$extension>
-
-=back
-
-The paths for images and other files will end up looking something like
-F with the galaxy provided C prepended to the filename.
-
-=head2 generate_nongalaxy_subfile
-
- $o->generate_nongalaxy_subfile();
-
-See L. Know that the default for C<< $self->files_path >> is C<"outputname.files_path">. It's only "special" when run from inside galaxy.
-
-=head2 get_next_file
-
- $o->get_next_file();
-
-If it's the first time this method has been called, it constructs a default filename. If the C variable is true, then it's just whatever value was passed. Otherwise it's just C and C put together. C is taken from C.
-
-If it's not the first time it was called, this module expects you to be using L or L to call (which has set C). Those will generate appropriate filenames with calls to one of
-
-=over 4
-
-=item L
-
-=item L
-
-=item L
-
-=item L
-
-=back
-
-based on appropriate variables.
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Parameter.pm
--- a/cpt_psm_recombine/lib/CPT/Parameter.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,477 +0,0 @@
-package CPT::Parameter;
-use Moose::Role;
-use strict;
-use warnings;
-use autodie;
-use Carp;
-
-#requires 'galaxy_command';
-requires 'galaxy_input';
-requires 'galaxy_output';
-requires 'validate_individual';
-requires 'getopt_format';
-
-# Long name for this parameter (mandatory)
-has 'name' => ( is => 'rw', isa => 'Str' );
-
-# Short name for this paramter (optional)
-has 'short' => ( is => 'rw', isa => 'Str' );
-has 'multiple' => ( is => 'rw', isa => 'Bool' );
-has 'description' => ( is => 'rw', isa => 'Str' );
-
-# Attr
-# Default supplied parameters
-has 'default' => ( is => 'rw', isa => 'Any' );
-# User supplied values
-has 'value' => ( is => 'rw', isa => 'Any' );
-has 'required' => ( is => 'rw', isa => 'Bool' );
-has 'hidden' => ( is => 'rw', isa => 'Bool' );
-
-# Set of error messages to be returned
-has 'errors' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } );
-
-# Unimplemented
-# Are there any implications of setting this
-has 'implies' => ( is => 'rw', isa => 'ArrayRef' );
-
-# Internal
-has '_index' => ( is => 'rw', isa => 'Int', default => 0 );
-
-# Galaxy Specific
-has '_galaxy_specific' => (is => 'rw', isa => 'Bool', default => 0);
-# implies option is somehow intertwined with whether or not this is being produced for use in galaxy.
-has '_show_in_galaxy' => (is => 'rw', isa => 'Bool', default => 1);
-# This is a custom override. If the object is hidden by default, it will causae it to be shown. If the object is visible by default, it can cause it to be hidden.
-
-
-
-sub galaxy_command {
- my ($self) = @_;
- my $value = $self->get_galaxy_command_identifier();
-
- # If it's hidden, specific to galaxy, and hidden from galaxy users,
- # then it is safe to assume we've specified a SANE default.
- if($self->hidden() && $self->_galaxy_specific()){
- $value = $self->default();
- }
- my $string;
-
- # If it's a repeat, we handle that
- $string .= $self->handle_possible_galaxy_command_repeat_start();
- # If it's required we set it to a value IF we have one. Otherwise value
- # will be the galaxy_identifier.
- if($self->required()){
- $string .= sprintf( '--%s "${%s}"' . "\n",
- $self->get_galaxy_cli_identifier(), $value
- );
- }else{
- # If
- # This code is only relevant if we're multiple, otherwise the loop will
- # not pass here
- if ( !$self->multiple() ){
- $string .= sprintf('#if $%s and $%s is not "None":' . "\n",
- $self->get_galaxy_cli_identifier(),
- $self->get_galaxy_cli_identifier()
- );
- }
- # Flag
- $string .= sprintf( '--%s "${%s}"'."\n",
- $self->get_galaxy_cli_identifier(),
- $value
- );
- # End
- if ( !$self->multiple() ){
- $string .= "#end if\n";
- }
- }
- $string .= $self->handle_possible_galaxy_command_repeat_end();
- return $string;
-}
-
-
-
-sub getOptionsArray {
- my ($self) = @_;
- my @getoptions;
- push( @getoptions, $self->getopt_identifier() );
-
- my $mod_desc = $self->description();
- if(defined $self->default()){
- if(ref $self->default() eq 'ARRAY'){
- $mod_desc .= sprintf(" (Default: %s)", join(",",@{$self->default()}));
- }else{
- $mod_desc .= sprintf(" (Default: %s)", $self->default());
- }
- }
- if(substr(blessed($self),0,22) eq 'CPT::Parameter::Option'){
- my %kv = %{$self->options()};
- my @k = keys(%kv);
- $mod_desc .= sprintf(" (Options: %s)",
- join(
- ", ",
- map { $kv{$_} . " [$_]" } @k
- )
- );
- }else{
- }
-
- push( @getoptions, $mod_desc );
-
- # Values to copy over: required, hidden, default, values
- my %attr = ();
- if ( $self->required() ) {
- $attr{required} = $self->required();
- }
- if ( $self->hidden() ) {
- $attr{hidden} = $self->hidden();
- }
- if ( $self->default() ) {
- $attr{default} = $self->default();
- }
- push( @getoptions, \%attr );
- return \@getoptions;
-}
-
-
-sub getopt_identifier {
- my ($self) = @_;
- if ( defined( $self->short() ) && length($self->short()) > 0 ) {
- return sprintf( "%s|%s%s%s", $self->name(), $self->short(), $self->getopt_format(), ( $self->multiple() ? '@' : '' ), );
- }
- else {
- return sprintf( "%s%s%s", $self->name(), $self->getopt_format(), ( $self->multiple() ? '@' : '' ), )
-
- }
-}
-
-
-sub get_galaxy_command_identifier {
- my ($self) = @_;
- if($self->multiple()){
- return sprintf('%s.%s', $self->get_repeat_idx_name(), $self->get_galaxy_cli_identifier());
- }else{
- return $self->get_galaxy_cli_identifier();
- }
-}
-
-
-sub get_galaxy_cli_identifier {
- my ($self) = @_;
- return $self->name();
-}
-
-
-sub is_optional {
- my ($self) = @_;
- # Want coerced to int.
- #return !$self->required();
- if($self->required()){
- return 0;
- }else{
- return 1;
- }
-}
-
-
-sub is_optional_galaxy {
- my ($self) = @_;
- return $self->is_optional() ? "True" : "False";
-}
-
-
-sub update_index {
- my ($self) = @_;
- if($self->multiple()){
- my $size = scalar( @{ $self->value() } );
- # E.g:
- # [1,2,3] , size = 3
- # index = 3
- # size = 3-1 = 2
- # index -> 0
- if ( $self->_index() ge $size - 1 ) {
- $self->_index(0);
- }
- else {
- $self->_index( $self->_index() + 1 );
- }
- }
-}
-
-
-sub reset_index {
- my ($self) = @_;
- $self->_index(0);
-}
-
-
-sub get_value {
- my ($self) = @_;
- if ( defined $self->value() ) {
- if ( $self->multiple ) {
- my @data = @{ $self->value() };
- return $data[ $self->_index() ];
- }
- else {
- return $self->value();
- }
- }else{
- return;
- }
-}
-
-
-sub get_default {
- my ($self) = @_;
- if ( defined $self->default() ) {
- if ( $self->multiple ) {
- my @data = @{ $self->default() };
- return $data[ $self->_index() ];
- }
- else {
- return $self->default();
- }
- }else{
- return;
- }
-}
-
-
-
-sub validate {
- my ($self) = @_;
- if ( $self->multiple() ) {
- my $errors = 0;
- if( ref($self->value()) ne 'ARRAY' ){
- carp "Author specified a non-array default value for " . $self->name() . ", which allows multiple values. Script author should modify the default value to be an ArrayRef.";
- }
- for my $val ( @{ $self->value() } ) {
- if($self->validate_individual($val) == 0){
- $errors++;
- }
- }
- # Must cast to number otherwise it returns "" which is bad since I use
- # 1/0 as T/F (true = good, false = bad)
- return 0+($errors == 0);
- }
- else {
- return 0+$self->validate_individual($self->value());
- }
-}
-
-
-sub get_repeat_idx_name {
- my ($self) = @_;
- return 'item';
-}
-
-
-sub get_repeat_name {
- my ($self) = @_;
- if($self->multiple()){
- return sprintf('repeat_%s', $self->get_galaxy_cli_identifier());
- }else{
- confess "Tried to get repeat name for non-multiple item";
- }
-}
-
-
-sub handle_possible_galaxy_input_repeat_start {
- my ($self, $xml_writer ) = @_;
- if ( $self->multiple() ) {
- my $title = $self->get_galaxy_cli_identifier();
- $title =~ s/_/ /g;
- # Convert To Title Case (http://www.davekb.com/browse_programming_tips:perl_title_case:txt)
- $title =~ s/(\w+)/\u\L$1/g;
- $xml_writer->startTag(
- 'repeat',
- 'name' => $self->get_repeat_name(),
- 'title' => $title,
- );
- }
-}
-
-
-sub handle_possible_galaxy_input_repeat_end {
- my ($self, $xml_writer ) = @_;
- if ( $self->multiple() ) {
- $xml_writer->endTag('repeat');
- }
-}
-
-
-
-sub handle_possible_galaxy_command_repeat_start {
- my ( $self ) = @_;
- if($self->multiple()){
- return sprintf("#for \$%s in \$%s:\n",
- $self->get_repeat_idx_name(),
- $self->get_repeat_name()
- );
- }else{
- return '';
- }
-}
-
-
-sub handle_possible_galaxy_command_repeat_end {
- my ( $self ) = @_;
- if($self->multiple()){
- return "#end for\n";
- }else{
- return '';
- }
-}
-
-sub get_default_input_parameters {
- my ( $self, $type ) = @_;
- my %params = (
- name => $self->get_galaxy_cli_identifier(),
- optional => $self->is_optional_galaxy(),
- label => $self->get_galaxy_cli_identifier(),
- help => $self->description(),
- type => $type,
- );
-
- # Multiple values would return ARRAY(0xAAAAAAA) locations, so we have to
- # handle those semi-intelligently until galaxy can handle default values
- # for repeats
- if($self->multiple() && defined $self->default()){
- if(ref($self->default()) ne 'ARRAY'){
- carp "Author specified a non-array default value for " . $self->name() . ", which allows multiple values. Script author should modify the default value to be an ArrayRef.";
- }
- $params{value} = ${$self->default}[0];
- }elsif(!$self->multiple() && defined $self->default()){
- $params{value} = $self->default();
- }
-
- return %params;
-}
-
-no Moose::Role;
-
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Parameter
-
-=head1 VERSION
-
-version 1.99.4
-
-=head2 galaxy_command
-
- $file_param->galaxy_command(); # where $file_param is a CPT::Parameter::*
-
-Returns the portion of the command used in the block in galaxy XML files
-
-=head2 getOptionsArray
-
-When called on a CPT::Parameter::* object, it will collapse the object into a GetOpt::Long compatible array
-
-=head2 getopt_identifier
-
-Used for backwards compatability with existing defaults => { 'file|f=s' => "Blah" } format
-
-=head2 get_galaxy_identifier
-
-Returns the identifier associated with a given variable. This identifier is what the Cheetah template knows the variable as (given the correct context).
-
-For non-multiple variables it should be the name of the variable.
-
-For multiple variables it will reference the repeat item name and then the variable name (e.g., C< $item.label >)
-
-=head2 get_galaxy_cli_identifier
-
-Returns the command line identifier (i.e., the command line flag) associated
-with a given parameter. For a `--format` flag, this would return "format".
-This should work out of the box, as CLI parameters have the same name as we
-specify them with (even if they're repeated)
-
-=head2 is_optional
-
-If required, it is NOT optional; If not reqiured, it IS optional
-
-=head2 is_optional_galaxy
-
-Returns is_optional() as "True" or "False" for convenience and reduced code duplication
-
-=head2 update_index
-
-Convenience method to increment the index. This wraps around.
-
-=head2 reset_index
-
-convenience method to zero the index (i.e., the next get_value request will start at the beginning again)
-
-=head2 get_value
-
-Returns the value in the current index.
-
-=head2 get_default
-
-Returns the default in the current index. Something to note, please bear in
-mind this you are trying to access an array based on an index which wraps
-according to value() not according to default(). This means you may not reach
-the end of default/reach over the end of default depending on how many values
-the user actually passes
-
-=head2 validate
-
-Validation logic was eventually moved out here, as the logic for validaton is
-identical everywhere, and requires slightly different behaviour based on
-wheterh or not it's a single/multiple valued item.
-
-=head2 get_repeat_idx_name
-
-Function to obtain the name of the item as it is called inside the repeat. This
-is necessary to know which variable we are referring to within a loop.
-
-=head2 get_repeat_name
-
-Function to obtain the name of the repeat. It is necessary that this is used
-identically in the command section as well as in the input section.
-
-=head2 handle_possible_galaxy_input_repeat_start
-
-If the feature is repeated, this should automatically handle the start of that
-repeat
-
-=head2 handle_possible_galaxy_input_repeat_end
-
-If the feature is repeated, this should automatically handle the end of that
-repeat
-
-=head2 handle_possible_galaxy_command_repeat_start
-
-If the feature is repeated, this should automatically handle the start of that
-repeat with a
-
- #for $item in $repeat_name:
-
-=head2 handle_possible_galaxy_command_repeat_end
-
-If the feature is repeated, this should automatically handle the end of that
-repeat with
-
- #end for
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Parameter/Empty.pm
--- a/cpt_psm_recombine/lib/CPT/Parameter/Empty.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,56 +0,0 @@
-package CPT::Parameter::Empty;
-use Moose;
-with 'CPT::Parameter';
-
-has 'name' => (is => 'rw', isa => 'Any');
-
-sub getOptionsArray{
- my ($self) = @_;
- return [];
-}
-sub validate_individual{
- return 1;
-}
-sub galaxy_input{
- return;
-}
-sub galaxy_output{
- return;
-}
-sub galaxy_command{
- return;
-}
-sub getopt_format{
- return '';
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Parameter::Empty
-
-=head1 VERSION
-
-version 1.99.4
-
-=head1 AUTHOR
-
-Eric Rasche
-
-=head1 COPYRIGHT AND LICENSE
-
-This software is Copyright (c) 2014 by Eric Rasche.
-
-This is free software, licensed under:
-
- The GNU General Public License, Version 3, June 2007
-
-=cut
diff -r b18e8268bf4e -r 97ef96676b48 cpt_psm_recombine/lib/CPT/Parameter/File/Input.pm
--- a/cpt_psm_recombine/lib/CPT/Parameter/File/Input.pm Tue Jul 05 05:05:13 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-package CPT::Parameter::File::Input;
-use Moose;
-with 'CPT::Parameter';
-
-has 'format' => ( is => 'rw', isa => 'Str' );
-has 'file_format' => ( is => 'rw', isa => 'ArrayRef' );
-
-# Format is something like "fastq", "bam", etc.
-# This means when you write the parameter string it'll be
-# ['file|f=s', 'Input Bam File', { required => 1, format=> 'file/input/bam'} ]
-# Which will specify that you require a BAM file.
-
-
-sub galaxy_input {
- my ( $self, $xml_writer ) = @_;
- $self->handle_possible_galaxy_input_repeat_start($xml_writer);
- my %params = $self->get_default_input_parameters('data');
- if( defined $self->file_format() ){
- $params{format} = join( ',', @{$self->file_format()} );
- }
- $xml_writer->startTag(
- 'param',
- %params
- );
- $xml_writer->endTag('param');
- $self->handle_possible_galaxy_input_repeat_end($xml_writer);
-}
-
-
-sub galaxy_output {
- my ($self, $xml_writer) = @_;
- return $xml_writer;
-}
-
-
-sub validate_individual {
- my ($self, $val) = @_;
-
- # Maybe do format validation here? Maybe?
- if ( -e $val ) {
- return 1;
- }else{
- push(@{$self->errors()}, sprintf( "File [%s] supplied to option %s does not exist", $val,$self->name()));
- return 0;
- }
-}
-
-
-sub getopt_format {
- return '=s';
-}
-
-no Moose;
-1;
-
-__END__
-
-=pod
-
-=encoding UTF-8
-
-=head1 NAME
-
-CPT::Parameter::File::Input
-
-=head1 VERSION
-
-version 1.99.4
-
-=head2 galaxy_input
-
- $file_param->galaxy_input($xml_writer); # where $file_param is a CPT::Parameter::*
-
-Utilises the $xml_writer to add a block in the