annotate gtf_to_bed.xml @ 0:ed0d0eda36a9 draft default tip

"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
author padge
date Wed, 29 Sep 2021 13:50:53 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
1 <tool name="gtf_to_bed" id="gtf_to_bed" version="0.01">
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
2 <!--Source in git at: https://github.com/fubar2/toolfactory-->
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
3 <!--Created by admin@galaxy.org at 29/09/2021 09:26:48 using the Galaxy Tool Factory.-->
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
4 <description>Takes as input a GTF file and writes a BED file in 12 column format</description>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
5 <requirements>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
6 <requirement type="package">perl</requirement>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
7 </requirements>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
8 <stdio>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
9 <exit_code range="1:" level="fatal"/>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
10 </stdio>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
11 <version_command><![CDATA[echo "0.01"]]></version_command>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
12 <command><![CDATA[perl
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
13 $runme
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
14 $input_gtf
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
15 $converted_from_gtf]]></command>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
16 <configfiles>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
17 <configfile name="runme"><![CDATA[#raw
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
18
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
19 #!/usr/bin/perl
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
20 # written by Guy Bottu for the GenePattern server of VIB BioinforlmaticsCore,
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
21 # takes as input a GTF file and writes a BED file in 12 column format
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
22 # with information about transcripts, for use with RSeqC.
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
23 #
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
24 # The "thick" information is about the coding region, ideally it goes from
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
25 # start codon to stop codon, but is information is lacking (e.g. because
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
26 # of missing sequence or missing annotation), we use the CDS information.
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
27 # For some transcripts there are multiple start or stop codons. We amways
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
28 # choose the "thick" so that is has maximum length.
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
29 #
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
30 # If there is no CDS information (as for ncRNA) the "thick" will have just a
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
31 # repeat of the transcript start position, as per BED convention.
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
32 #
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
33 # modified for integration under GenePattern
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
34 #
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
35 # usage : perl gtf_to_bed.pl <GTF file> <output file>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
36 use List::Util qw (min max);
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
37
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
38 $gtf = $ARGV[0];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
39 $gtf =~ /.*\/([^\/]+)\.gtf3?/;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
40 # print $gtf;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
41 $bed = $ARGV[1];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
42
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
43 open GTF, $gtf;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
44 open BED, ">$bed";
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
45 LINEPARSER: while (<GTF>) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
46 if (/^#/) { next LINEPARSER } # skip comment lines
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
47 @fields = split /\t/;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
48 $chrom = $fields[0]; $type = $fields[2]; $beginpos = $fields[3];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
49 $endpos = $fields[4]; $strand = $fields[6];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
50 chomp $fields[8]; $documentation = $fields[8];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
51 $documentation =~ /transcript_id "([^"]+)";/;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
52 $transcript_id = $1;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
53 if ($strand ne '+' and $strand ne '-') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
54 print "WARNING : $transcript_id has strand information $strand\n";
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
55 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
56 if ($type eq 'transcript') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
57 $chrom{$transcript_id} = $chrom;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
58 $strand{$transcript_id} = $strand;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
59 $transcript_beginpos{$transcript_id} = $beginpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
60 $transcript_endpos{$transcript_id} = $endpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
61 } elsif ($type eq 'exon') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
62
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
63 $documentation =~ /exon_number "([^"]+)";/;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
64 $exon_number = $1;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
65 # print $exon_number;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
66 $exon_beginpos{$transcript_id}[$exon_number] = $beginpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
67 $exon_endpos{$transcript_id}[$exon_number] = $endpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
68 } elsif ($type eq 'start_codon') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
69 if (not exists $ORFpos{$transcript_id}[0]
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
70 or ($strand eq '+' and $beginpos < $ORFpos{$transcript_id}[0])
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
71 or ($strand eq '-' and $endpos > $ORFpos{$transcript_id}[1])) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
72 $ORFpos{$transcript_id}[0] = $beginpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
73 $ORFpos{$transcript_id}[1] = $endpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
74 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
75 } elsif ($type eq 'stop_codon') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
76 if (not exists $ORFpos{$transcript_id}[2]
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
77 or ($strand eq '+' and $endpos > $ORFpos{$transcript_id}[3])
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
78 or ($strand eq '-' and $beginpos < $ORFpos{$transcript_id}[2])) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
79 $ORFpos{$transcript_id}[2] = $beginpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
80 $ORFpos{$transcript_id}[3] = $endpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
81 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
82 } elsif ($type eq 'CDS') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
83 if (not exists $CDSpos{$transcript_id}[0]
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
84 or $beginpos < $CDSpos{$transcript_id}[0]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
85 $CDSpos{$transcript_id}[0] = $beginpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
86 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
87 if (not exists $CDSpos{$transcript_id}[1]
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
88 or $endpos > $CDSpos{$transcript_id}[1]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
89 $CDSpos{$transcript_id}[1] = $endpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
90 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
91 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
92 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
93
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
94 foreach $transcript_id (sort keys %transcript_beginpos) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
95 $beginpos = $transcript_beginpos{$transcript_id} - 1;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
96 ## in BED numbering starts with 0, not 1 like in GTF
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
97 $endpos = $transcript_endpos{$transcript_id};
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
98 print BED "$chrom{$transcript_id}\t$beginpos\t$endpos\t$transcript_id\t0\t$strand{$transcript_id}";
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
99
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
100 if (exists $ORFpos{$transcript_id}[0] or exists $ORFpos{$transcript_id}[2]
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
101 or exists $CDSpos{$transcript_id}[0]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
102 if ($strand{$transcript_id} eq '+') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
103 if (exists $ORFpos{$transcript_id}[0]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
104 if (exists $CDSpos{$transcript_id}[0]) { # both start_codon and CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
105 $beginthickpos = min($ORFpos{$transcript_id}[0],
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
106 $CDSpos{$transcript_id}[0]);
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
107 } else { # only start_codon
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
108 $beginthickpos = $ORFpos{$transcript_id}[0];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
109 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
110 } elsif (exists $CDSpos{$transcript_id}[0]) { # only CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
111 $beginthickpos = $CDSpos{$transcript_id}[0];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
112 } else { # -- (but there is a stop_codon)
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
113 $beginthickpos = $transcript_beginpos{$transcript_id};
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
114 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
115 if (exists $ORFpos{$transcript_id}[3]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
116 if (exists $CDSpos{$transcript_id}[1]) { # both stop_codon and CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
117 $endthickpos = max($ORFpos{$transcript_id}[3],
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
118 $CDSpos{$transcript_id}[1]);
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
119 } else { # only stop_codon
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
120 $endthickpos = $ORFpos{$transcript_id}[3];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
121 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
122 } elsif (exists $CDSpos{$transcript_id}[1]) { # only CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
123 $endthickpos = $CDSpos{$transcript_id}[1];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
124 } else { # -- (but there is a start_codon)
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
125 $endthickpos = $transcript_endpos{$transcript_id};
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
126 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
127 } elsif ($strand{$transcript_id} eq '-') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
128 if (exists $ORFpos{$transcript_id}[2]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
129 if (exists $CDSpos{$transcript_id}[0]) { # both stop_codon and CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
130 $beginthickpos = min($ORFpos{$transcript_id}[2],
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
131 $CDSpos{$transcript_id}[0]);
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
132 } else { # only stop_codon
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
133 $beginthickpos = $ORFpos{$transcript_id}[2];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
134 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
135 } elsif (exists $CDSpos{$transcript_id}[0]) { # only CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
136 $beginthickpos = $CDSpos{$transcript_id}[0];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
137 } else { # -- (but there is a start_codon)
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
138 $beginthickpos = $transcript_beginpos{$transcript_id};
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
139 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
140 if (exists $ORFpos{$transcript_id}[1]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
141 if (exists $CDSpos{$transcript_id}[1]) { # both start_codon and CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
142 $endthickpos = max($ORFpos{$transcript_id}[1],
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
143 $CDSpos{$transcript_id}[1]);
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
144 } else { # only start_codon
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
145 $endthickpos = $ORFpos{$transcript_id}[1];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
146 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
147 } elsif (exists $CDSpos{$transcript_id}[1]) { # only CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
148 $endthickpos = $CDSpos{$transcript_id}[1];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
149 } else { # -- (but there is a stop_codon)
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
150 $endthickpos = $transcript_endpos{$transcript_id};
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
151 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
152 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
153 $beginthickpos -= 1;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
154 } else {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
155 $beginthickpos = $beginpos; $endthickpos = $beginpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
156 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
157 print BED "\t$beginthickpos\t$endthickpos";
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
158
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
159 $blocksizes = ''; $blockstarts = '';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
160 $Nexons = $#{$exon_beginpos{$transcript_id}};
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
161 ## In some GTF files the exons of a transcript on the reverse strand
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
162 ## are numbered according to their position on the forward strand
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
163 ## and in others according to their position on the reverse strand
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
164 if ($Nexons == 1) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
165 $blocksizes .= $exon_endpos{$transcript_id}[1] - $exon_beginpos{$transcript_id}[1] + 1 . ',';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
166 $blockstarts .= $exon_beginpos{$transcript_id}[1] - $transcript_beginpos{$transcript_id} . ',';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
167 } else {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
168 if ($exon_beginpos{$transcript_id}[2] > $exon_beginpos{$transcript_id}[1]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
169 foreach $exon_number (1 .. $Nexons) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
170 $blocksizes .= $exon_endpos{$transcript_id}[$exon_number] - $exon_beginpos{$transcript_id}[$exon_number] + 1 . ',';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
171 $blockstarts .= $exon_beginpos{$transcript_id}[$exon_number] - $transcript_beginpos{$transcript_id} . ',';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
172 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
173 } else { # (is <)
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
174 for($exon_number = $Nexons ; $exon_number > 0 ; $exon_number--) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
175 $blocksizes .= $exon_endpos{$transcript_id}[$exon_number] - $exon_beginpos{$transcript_id}[$exon_number] + 1 . ',';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
176 $blockstarts .= $exon_beginpos{$transcript_id}[$exon_number] - $transcript_beginpos{$transcript_id} . ',';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
177 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
178 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
179 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
180 print BED "\t0\t$Nexons\t$blocksizes\t$blockstarts\n";
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
181 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
182 close( GTF );
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
183 close( BED );
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
184
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
185 #end raw]]></configfile>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
186 </configfiles>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
187 <inputs>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
188 <param name="input_gtf" type="data" optional="false" label="input_gtf" help="Input Gene Transfer Format (.gtf) file" format="gtf,txt" multiple="false"/>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
189 </inputs>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
190 <outputs>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
191 <data name="converted_from_gtf" format="bed" label="converted_from_gtf" hidden="false"/>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
192 </outputs>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
193 <tests>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
194 <test>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
195 <output name="converted_from_gtf" value="test_output.bed" compare="diff" lines_diff="0"/>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
196 <param name="input_gtf" value="test_input.gtf"/>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
197 </test>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
198 </tests>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
199 <help><![CDATA[
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
200
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
201 Conversion script written by Guy Bottu for the GenePattern server of VIB BioinforlmaticsCore, takes as input a GTF file and writes a BED file in 12 column format with information about transcripts, for use with RSeqC. Modified for integration under GenePattern.
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
202
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
203
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
204
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
205 The "thick" information is about the coding region, ideally it goes from start codon to stop codon, but if information is lacking (e.g. because of missing sequence or missing annotation), we use the CDS information.
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
206
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
207
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
208
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
209 For some transcripts there are multiple start or stop codons. We always choose the "thick" so that is has maximum length.
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
210
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
211 If there is no CDS information (as for ncRNA) the "thick" will have just a repeat of the transcript start position, as per BED convention.
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
212
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
213
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
214
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
215
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
216
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
217 usage : perl gtf_to_bed.pl <GTF file> <output file>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
218
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
219
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
220
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
221 ------
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
222
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
223
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
224 Script::
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
225
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
226 #!/usr/bin/perl
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
227 # written by Guy Bottu for the GenePattern server of VIB BioinforlmaticsCore,
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
228 # takes as input a GTF file and writes a BED file in 12 column format
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
229 # with information about transcripts, for use with RSeqC.
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
230 #
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
231 # The "thick" information is about the coding region, ideally it goes from
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
232 # start codon to stop codon, but is information is lacking (e.g. because
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
233 # of missing sequence or missing annotation), we use the CDS information.
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
234 # For some transcripts there are multiple start or stop codons. We amways
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
235 # choose the "thick" so that is has maximum length.
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
236 #
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
237 # If there is no CDS information (as for ncRNA) the "thick" will have just a
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
238 # repeat of the transcript start position, as per BED convention.
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
239 #
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
240 # modified for integration under GenePattern
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
241 #
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
242 # usage : perl gtf_to_bed.pl <GTF file> <output file>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
243 use List::Util qw (min max);
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
244 $gtf = $ARGV[0];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
245 $gtf =~ /.*\/([^\/]+)\.gtf3?/;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
246 # print $gtf;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
247 $bed = $ARGV[1];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
248 open GTF, $gtf;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
249 open BED, ">$bed";
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
250 LINEPARSER: while (<GTF>) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
251 if (/^#/) { next LINEPARSER } # skip comment lines
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
252 @fields = split /\t/;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
253 $chrom = $fields[0]; $type = $fields[2]; $beginpos = $fields[3];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
254 $endpos = $fields[4]; $strand = $fields[6];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
255 chomp $fields[8]; $documentation = $fields[8];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
256 $documentation =~ /transcript_id "([^"]+)";/;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
257 $transcript_id = $1;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
258 if ($strand ne '+' and $strand ne '-') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
259 print "WARNING : $transcript_id has strand information $strand\n";
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
260 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
261 if ($type eq 'transcript') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
262 $chrom{$transcript_id} = $chrom;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
263 $strand{$transcript_id} = $strand;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
264 $transcript_beginpos{$transcript_id} = $beginpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
265 $transcript_endpos{$transcript_id} = $endpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
266 } elsif ($type eq 'exon') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
267 $documentation =~ /exon_number "([^"]+)";/;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
268 $exon_number = $1;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
269 # print $exon_number;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
270 $exon_beginpos{$transcript_id}[$exon_number] = $beginpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
271 $exon_endpos{$transcript_id}[$exon_number] = $endpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
272 } elsif ($type eq 'start_codon') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
273 if (not exists $ORFpos{$transcript_id}[0]
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
274 or ($strand eq '+' and $beginpos < $ORFpos{$transcript_id}[0])
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
275 or ($strand eq '-' and $endpos > $ORFpos{$transcript_id}[1])) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
276 $ORFpos{$transcript_id}[0] = $beginpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
277 $ORFpos{$transcript_id}[1] = $endpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
278 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
279 } elsif ($type eq 'stop_codon') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
280 if (not exists $ORFpos{$transcript_id}[2]
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
281 or ($strand eq '+' and $endpos > $ORFpos{$transcript_id}[3])
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
282 or ($strand eq '-' and $beginpos < $ORFpos{$transcript_id}[2])) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
283 $ORFpos{$transcript_id}[2] = $beginpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
284 $ORFpos{$transcript_id}[3] = $endpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
285 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
286 } elsif ($type eq 'CDS') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
287 if (not exists $CDSpos{$transcript_id}[0]
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
288 or $beginpos < $CDSpos{$transcript_id}[0]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
289 $CDSpos{$transcript_id}[0] = $beginpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
290 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
291 if (not exists $CDSpos{$transcript_id}[1]
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
292 or $endpos > $CDSpos{$transcript_id}[1]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
293 $CDSpos{$transcript_id}[1] = $endpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
294 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
295 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
296 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
297 foreach $transcript_id (sort keys %transcript_beginpos) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
298 $beginpos = $transcript_beginpos{$transcript_id} - 1;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
299 ## in BED numbering starts with 0, not 1 like in GTF
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
300 $endpos = $transcript_endpos{$transcript_id};
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
301 print BED "$chrom{$transcript_id}\t$beginpos\t$endpos\t$transcript_id\t0\t$strand{$transcript_id}";
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
302 if (exists $ORFpos{$transcript_id}[0] or exists $ORFpos{$transcript_id}[2]
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
303 or exists $CDSpos{$transcript_id}[0]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
304 if ($strand{$transcript_id} eq '+') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
305 if (exists $ORFpos{$transcript_id}[0]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
306 if (exists $CDSpos{$transcript_id}[0]) { # both start_codon and CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
307 $beginthickpos = min($ORFpos{$transcript_id}[0],
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
308 $CDSpos{$transcript_id}[0]);
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
309 } else { # only start_codon
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
310 $beginthickpos = $ORFpos{$transcript_id}[0];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
311 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
312 } elsif (exists $CDSpos{$transcript_id}[0]) { # only CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
313 $beginthickpos = $CDSpos{$transcript_id}[0];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
314 } else { # -- (but there is a stop_codon)
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
315 $beginthickpos = $transcript_beginpos{$transcript_id};
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
316 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
317 if (exists $ORFpos{$transcript_id}[3]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
318 if (exists $CDSpos{$transcript_id}[1]) { # both stop_codon and CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
319 $endthickpos = max($ORFpos{$transcript_id}[3],
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
320 $CDSpos{$transcript_id}[1]);
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
321 } else { # only stop_codon
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
322 $endthickpos = $ORFpos{$transcript_id}[3];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
323 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
324 } elsif (exists $CDSpos{$transcript_id}[1]) { # only CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
325 $endthickpos = $CDSpos{$transcript_id}[1];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
326 } else { # -- (but there is a start_codon)
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
327 $endthickpos = $transcript_endpos{$transcript_id};
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
328 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
329 } elsif ($strand{$transcript_id} eq '-') {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
330 if (exists $ORFpos{$transcript_id}[2]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
331 if (exists $CDSpos{$transcript_id}[0]) { # both stop_codon and CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
332 $beginthickpos = min($ORFpos{$transcript_id}[2],
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
333 $CDSpos{$transcript_id}[0]);
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
334 } else { # only stop_codon
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
335 $beginthickpos = $ORFpos{$transcript_id}[2];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
336 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
337 } elsif (exists $CDSpos{$transcript_id}[0]) { # only CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
338 $beginthickpos = $CDSpos{$transcript_id}[0];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
339 } else { # -- (but there is a start_codon)
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
340 $beginthickpos = $transcript_beginpos{$transcript_id};
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
341 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
342 if (exists $ORFpos{$transcript_id}[1]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
343 if (exists $CDSpos{$transcript_id}[1]) { # both start_codon and CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
344 $endthickpos = max($ORFpos{$transcript_id}[1],
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
345 $CDSpos{$transcript_id}[1]);
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
346 } else { # only start_codon
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
347 $endthickpos = $ORFpos{$transcript_id}[1];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
348 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
349 } elsif (exists $CDSpos{$transcript_id}[1]) { # only CDS
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
350 $endthickpos = $CDSpos{$transcript_id}[1];
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
351 } else { # -- (but there is a stop_codon)
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
352 $endthickpos = $transcript_endpos{$transcript_id};
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
353 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
354 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
355 $beginthickpos -= 1;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
356 } else {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
357 $beginthickpos = $beginpos; $endthickpos = $beginpos;
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
358 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
359 print BED "\t$beginthickpos\t$endthickpos";
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
360 $blocksizes = ''; $blockstarts = '';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
361 $Nexons = $#{$exon_beginpos{$transcript_id}};
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
362 ## In some GTF files the exons of a transcript on the reverse strand
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
363 ## are numbered according to their position on the forward strand
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
364 ## and in others according to their position on the reverse strand
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
365 if ($Nexons == 1) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
366 $blocksizes .= $exon_endpos{$transcript_id}[1] - $exon_beginpos{$transcript_id}[1] + 1 . ',';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
367 $blockstarts .= $exon_beginpos{$transcript_id}[1] - $transcript_beginpos{$transcript_id} . ',';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
368 } else {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
369 if ($exon_beginpos{$transcript_id}[2] > $exon_beginpos{$transcript_id}[1]) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
370 foreach $exon_number (1 .. $Nexons) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
371 $blocksizes .= $exon_endpos{$transcript_id}[$exon_number] - $exon_beginpos{$transcript_id}[$exon_number] + 1 . ',';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
372 $blockstarts .= $exon_beginpos{$transcript_id}[$exon_number] - $transcript_beginpos{$transcript_id} . ',';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
373 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
374 } else { # (is <)
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
375 for($exon_number = $Nexons ; $exon_number > 0 ; $exon_number--) {
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
376 $blocksizes .= $exon_endpos{$transcript_id}[$exon_number] - $exon_beginpos{$transcript_id}[$exon_number] + 1 . ',';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
377 $blockstarts .= $exon_beginpos{$transcript_id}[$exon_number] - $transcript_beginpos{$transcript_id} . ',';
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
378 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
379 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
380 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
381 print BED "\t0\t$Nexons\t$blocksizes\t$blockstarts\n";
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
382 }
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
383 close( GTF );
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
384 close( BED );
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
385
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
386 ]]></help>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
387 <citations>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
388 <citation type="doi">10.1093/bioinformatics/bts573</citation>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
389 </citations>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
390 </tool>
ed0d0eda36a9 "planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff changeset
391