Mercurial > repos > padge > gtf_to_bed_script
annotate gtf_to_bed.xml @ 0:ed0d0eda36a9 draft default tip
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
author | padge |
---|---|
date | Wed, 29 Sep 2021 13:50:53 +0000 |
parents | |
children |
rev | line source |
---|---|
0
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
1 <tool name="gtf_to_bed" id="gtf_to_bed" version="0.01"> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
2 <!--Source in git at: https://github.com/fubar2/toolfactory--> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
3 <!--Created by admin@galaxy.org at 29/09/2021 09:26:48 using the Galaxy Tool Factory.--> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
4 <description>Takes as input a GTF file and writes a BED file in 12 column format</description> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
5 <requirements> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
6 <requirement type="package">perl</requirement> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
7 </requirements> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
8 <stdio> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
9 <exit_code range="1:" level="fatal"/> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
10 </stdio> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
11 <version_command><![CDATA[echo "0.01"]]></version_command> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
12 <command><![CDATA[perl |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
13 $runme |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
14 $input_gtf |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
15 $converted_from_gtf]]></command> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
16 <configfiles> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
17 <configfile name="runme"><![CDATA[#raw |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
18 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
19 #!/usr/bin/perl |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
20 # written by Guy Bottu for the GenePattern server of VIB BioinforlmaticsCore, |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
21 # takes as input a GTF file and writes a BED file in 12 column format |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
22 # with information about transcripts, for use with RSeqC. |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
23 # |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
24 # The "thick" information is about the coding region, ideally it goes from |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
25 # start codon to stop codon, but is information is lacking (e.g. because |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
26 # of missing sequence or missing annotation), we use the CDS information. |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
27 # For some transcripts there are multiple start or stop codons. We amways |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
28 # choose the "thick" so that is has maximum length. |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
29 # |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
30 # If there is no CDS information (as for ncRNA) the "thick" will have just a |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
31 # repeat of the transcript start position, as per BED convention. |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
32 # |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
33 # modified for integration under GenePattern |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
34 # |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
35 # usage : perl gtf_to_bed.pl <GTF file> <output file> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
36 use List::Util qw (min max); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
37 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
38 $gtf = $ARGV[0]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
39 $gtf =~ /.*\/([^\/]+)\.gtf3?/; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
40 # print $gtf; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
41 $bed = $ARGV[1]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
42 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
43 open GTF, $gtf; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
44 open BED, ">$bed"; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
45 LINEPARSER: while (<GTF>) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
46 if (/^#/) { next LINEPARSER } # skip comment lines |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
47 @fields = split /\t/; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
48 $chrom = $fields[0]; $type = $fields[2]; $beginpos = $fields[3]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
49 $endpos = $fields[4]; $strand = $fields[6]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
50 chomp $fields[8]; $documentation = $fields[8]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
51 $documentation =~ /transcript_id "([^"]+)";/; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
52 $transcript_id = $1; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
53 if ($strand ne '+' and $strand ne '-') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
54 print "WARNING : $transcript_id has strand information $strand\n"; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
55 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
56 if ($type eq 'transcript') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
57 $chrom{$transcript_id} = $chrom; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
58 $strand{$transcript_id} = $strand; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
59 $transcript_beginpos{$transcript_id} = $beginpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
60 $transcript_endpos{$transcript_id} = $endpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
61 } elsif ($type eq 'exon') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
62 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
63 $documentation =~ /exon_number "([^"]+)";/; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
64 $exon_number = $1; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
65 # print $exon_number; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
66 $exon_beginpos{$transcript_id}[$exon_number] = $beginpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
67 $exon_endpos{$transcript_id}[$exon_number] = $endpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
68 } elsif ($type eq 'start_codon') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
69 if (not exists $ORFpos{$transcript_id}[0] |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
70 or ($strand eq '+' and $beginpos < $ORFpos{$transcript_id}[0]) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
71 or ($strand eq '-' and $endpos > $ORFpos{$transcript_id}[1])) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
72 $ORFpos{$transcript_id}[0] = $beginpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
73 $ORFpos{$transcript_id}[1] = $endpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
74 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
75 } elsif ($type eq 'stop_codon') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
76 if (not exists $ORFpos{$transcript_id}[2] |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
77 or ($strand eq '+' and $endpos > $ORFpos{$transcript_id}[3]) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
78 or ($strand eq '-' and $beginpos < $ORFpos{$transcript_id}[2])) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
79 $ORFpos{$transcript_id}[2] = $beginpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
80 $ORFpos{$transcript_id}[3] = $endpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
81 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
82 } elsif ($type eq 'CDS') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
83 if (not exists $CDSpos{$transcript_id}[0] |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
84 or $beginpos < $CDSpos{$transcript_id}[0]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
85 $CDSpos{$transcript_id}[0] = $beginpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
86 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
87 if (not exists $CDSpos{$transcript_id}[1] |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
88 or $endpos > $CDSpos{$transcript_id}[1]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
89 $CDSpos{$transcript_id}[1] = $endpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
90 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
91 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
92 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
93 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
94 foreach $transcript_id (sort keys %transcript_beginpos) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
95 $beginpos = $transcript_beginpos{$transcript_id} - 1; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
96 ## in BED numbering starts with 0, not 1 like in GTF |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
97 $endpos = $transcript_endpos{$transcript_id}; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
98 print BED "$chrom{$transcript_id}\t$beginpos\t$endpos\t$transcript_id\t0\t$strand{$transcript_id}"; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
99 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
100 if (exists $ORFpos{$transcript_id}[0] or exists $ORFpos{$transcript_id}[2] |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
101 or exists $CDSpos{$transcript_id}[0]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
102 if ($strand{$transcript_id} eq '+') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
103 if (exists $ORFpos{$transcript_id}[0]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
104 if (exists $CDSpos{$transcript_id}[0]) { # both start_codon and CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
105 $beginthickpos = min($ORFpos{$transcript_id}[0], |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
106 $CDSpos{$transcript_id}[0]); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
107 } else { # only start_codon |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
108 $beginthickpos = $ORFpos{$transcript_id}[0]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
109 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
110 } elsif (exists $CDSpos{$transcript_id}[0]) { # only CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
111 $beginthickpos = $CDSpos{$transcript_id}[0]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
112 } else { # -- (but there is a stop_codon) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
113 $beginthickpos = $transcript_beginpos{$transcript_id}; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
114 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
115 if (exists $ORFpos{$transcript_id}[3]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
116 if (exists $CDSpos{$transcript_id}[1]) { # both stop_codon and CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
117 $endthickpos = max($ORFpos{$transcript_id}[3], |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
118 $CDSpos{$transcript_id}[1]); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
119 } else { # only stop_codon |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
120 $endthickpos = $ORFpos{$transcript_id}[3]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
121 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
122 } elsif (exists $CDSpos{$transcript_id}[1]) { # only CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
123 $endthickpos = $CDSpos{$transcript_id}[1]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
124 } else { # -- (but there is a start_codon) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
125 $endthickpos = $transcript_endpos{$transcript_id}; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
126 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
127 } elsif ($strand{$transcript_id} eq '-') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
128 if (exists $ORFpos{$transcript_id}[2]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
129 if (exists $CDSpos{$transcript_id}[0]) { # both stop_codon and CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
130 $beginthickpos = min($ORFpos{$transcript_id}[2], |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
131 $CDSpos{$transcript_id}[0]); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
132 } else { # only stop_codon |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
133 $beginthickpos = $ORFpos{$transcript_id}[2]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
134 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
135 } elsif (exists $CDSpos{$transcript_id}[0]) { # only CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
136 $beginthickpos = $CDSpos{$transcript_id}[0]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
137 } else { # -- (but there is a start_codon) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
138 $beginthickpos = $transcript_beginpos{$transcript_id}; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
139 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
140 if (exists $ORFpos{$transcript_id}[1]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
141 if (exists $CDSpos{$transcript_id}[1]) { # both start_codon and CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
142 $endthickpos = max($ORFpos{$transcript_id}[1], |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
143 $CDSpos{$transcript_id}[1]); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
144 } else { # only start_codon |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
145 $endthickpos = $ORFpos{$transcript_id}[1]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
146 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
147 } elsif (exists $CDSpos{$transcript_id}[1]) { # only CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
148 $endthickpos = $CDSpos{$transcript_id}[1]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
149 } else { # -- (but there is a stop_codon) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
150 $endthickpos = $transcript_endpos{$transcript_id}; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
151 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
152 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
153 $beginthickpos -= 1; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
154 } else { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
155 $beginthickpos = $beginpos; $endthickpos = $beginpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
156 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
157 print BED "\t$beginthickpos\t$endthickpos"; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
158 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
159 $blocksizes = ''; $blockstarts = ''; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
160 $Nexons = $#{$exon_beginpos{$transcript_id}}; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
161 ## In some GTF files the exons of a transcript on the reverse strand |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
162 ## are numbered according to their position on the forward strand |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
163 ## and in others according to their position on the reverse strand |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
164 if ($Nexons == 1) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
165 $blocksizes .= $exon_endpos{$transcript_id}[1] - $exon_beginpos{$transcript_id}[1] + 1 . ','; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
166 $blockstarts .= $exon_beginpos{$transcript_id}[1] - $transcript_beginpos{$transcript_id} . ','; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
167 } else { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
168 if ($exon_beginpos{$transcript_id}[2] > $exon_beginpos{$transcript_id}[1]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
169 foreach $exon_number (1 .. $Nexons) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
170 $blocksizes .= $exon_endpos{$transcript_id}[$exon_number] - $exon_beginpos{$transcript_id}[$exon_number] + 1 . ','; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
171 $blockstarts .= $exon_beginpos{$transcript_id}[$exon_number] - $transcript_beginpos{$transcript_id} . ','; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
172 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
173 } else { # (is <) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
174 for($exon_number = $Nexons ; $exon_number > 0 ; $exon_number--) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
175 $blocksizes .= $exon_endpos{$transcript_id}[$exon_number] - $exon_beginpos{$transcript_id}[$exon_number] + 1 . ','; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
176 $blockstarts .= $exon_beginpos{$transcript_id}[$exon_number] - $transcript_beginpos{$transcript_id} . ','; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
177 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
178 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
179 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
180 print BED "\t0\t$Nexons\t$blocksizes\t$blockstarts\n"; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
181 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
182 close( GTF ); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
183 close( BED ); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
184 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
185 #end raw]]></configfile> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
186 </configfiles> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
187 <inputs> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
188 <param name="input_gtf" type="data" optional="false" label="input_gtf" help="Input Gene Transfer Format (.gtf) file" format="gtf,txt" multiple="false"/> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
189 </inputs> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
190 <outputs> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
191 <data name="converted_from_gtf" format="bed" label="converted_from_gtf" hidden="false"/> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
192 </outputs> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
193 <tests> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
194 <test> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
195 <output name="converted_from_gtf" value="test_output.bed" compare="diff" lines_diff="0"/> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
196 <param name="input_gtf" value="test_input.gtf"/> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
197 </test> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
198 </tests> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
199 <help><![CDATA[ |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
200 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
201 Conversion script written by Guy Bottu for the GenePattern server of VIB BioinforlmaticsCore, takes as input a GTF file and writes a BED file in 12 column format with information about transcripts, for use with RSeqC. Modified for integration under GenePattern. |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
202 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
203 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
204 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
205 The "thick" information is about the coding region, ideally it goes from start codon to stop codon, but if information is lacking (e.g. because of missing sequence or missing annotation), we use the CDS information. |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
206 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
207 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
208 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
209 For some transcripts there are multiple start or stop codons. We always choose the "thick" so that is has maximum length. |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
210 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
211 If there is no CDS information (as for ncRNA) the "thick" will have just a repeat of the transcript start position, as per BED convention. |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
212 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
213 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
214 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
215 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
216 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
217 usage : perl gtf_to_bed.pl <GTF file> <output file> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
218 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
219 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
220 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
221 ------ |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
222 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
223 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
224 Script:: |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
225 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
226 #!/usr/bin/perl |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
227 # written by Guy Bottu for the GenePattern server of VIB BioinforlmaticsCore, |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
228 # takes as input a GTF file and writes a BED file in 12 column format |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
229 # with information about transcripts, for use with RSeqC. |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
230 # |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
231 # The "thick" information is about the coding region, ideally it goes from |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
232 # start codon to stop codon, but is information is lacking (e.g. because |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
233 # of missing sequence or missing annotation), we use the CDS information. |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
234 # For some transcripts there are multiple start or stop codons. We amways |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
235 # choose the "thick" so that is has maximum length. |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
236 # |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
237 # If there is no CDS information (as for ncRNA) the "thick" will have just a |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
238 # repeat of the transcript start position, as per BED convention. |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
239 # |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
240 # modified for integration under GenePattern |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
241 # |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
242 # usage : perl gtf_to_bed.pl <GTF file> <output file> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
243 use List::Util qw (min max); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
244 $gtf = $ARGV[0]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
245 $gtf =~ /.*\/([^\/]+)\.gtf3?/; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
246 # print $gtf; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
247 $bed = $ARGV[1]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
248 open GTF, $gtf; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
249 open BED, ">$bed"; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
250 LINEPARSER: while (<GTF>) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
251 if (/^#/) { next LINEPARSER } # skip comment lines |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
252 @fields = split /\t/; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
253 $chrom = $fields[0]; $type = $fields[2]; $beginpos = $fields[3]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
254 $endpos = $fields[4]; $strand = $fields[6]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
255 chomp $fields[8]; $documentation = $fields[8]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
256 $documentation =~ /transcript_id "([^"]+)";/; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
257 $transcript_id = $1; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
258 if ($strand ne '+' and $strand ne '-') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
259 print "WARNING : $transcript_id has strand information $strand\n"; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
260 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
261 if ($type eq 'transcript') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
262 $chrom{$transcript_id} = $chrom; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
263 $strand{$transcript_id} = $strand; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
264 $transcript_beginpos{$transcript_id} = $beginpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
265 $transcript_endpos{$transcript_id} = $endpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
266 } elsif ($type eq 'exon') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
267 $documentation =~ /exon_number "([^"]+)";/; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
268 $exon_number = $1; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
269 # print $exon_number; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
270 $exon_beginpos{$transcript_id}[$exon_number] = $beginpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
271 $exon_endpos{$transcript_id}[$exon_number] = $endpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
272 } elsif ($type eq 'start_codon') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
273 if (not exists $ORFpos{$transcript_id}[0] |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
274 or ($strand eq '+' and $beginpos < $ORFpos{$transcript_id}[0]) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
275 or ($strand eq '-' and $endpos > $ORFpos{$transcript_id}[1])) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
276 $ORFpos{$transcript_id}[0] = $beginpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
277 $ORFpos{$transcript_id}[1] = $endpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
278 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
279 } elsif ($type eq 'stop_codon') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
280 if (not exists $ORFpos{$transcript_id}[2] |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
281 or ($strand eq '+' and $endpos > $ORFpos{$transcript_id}[3]) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
282 or ($strand eq '-' and $beginpos < $ORFpos{$transcript_id}[2])) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
283 $ORFpos{$transcript_id}[2] = $beginpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
284 $ORFpos{$transcript_id}[3] = $endpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
285 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
286 } elsif ($type eq 'CDS') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
287 if (not exists $CDSpos{$transcript_id}[0] |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
288 or $beginpos < $CDSpos{$transcript_id}[0]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
289 $CDSpos{$transcript_id}[0] = $beginpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
290 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
291 if (not exists $CDSpos{$transcript_id}[1] |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
292 or $endpos > $CDSpos{$transcript_id}[1]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
293 $CDSpos{$transcript_id}[1] = $endpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
294 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
295 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
296 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
297 foreach $transcript_id (sort keys %transcript_beginpos) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
298 $beginpos = $transcript_beginpos{$transcript_id} - 1; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
299 ## in BED numbering starts with 0, not 1 like in GTF |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
300 $endpos = $transcript_endpos{$transcript_id}; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
301 print BED "$chrom{$transcript_id}\t$beginpos\t$endpos\t$transcript_id\t0\t$strand{$transcript_id}"; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
302 if (exists $ORFpos{$transcript_id}[0] or exists $ORFpos{$transcript_id}[2] |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
303 or exists $CDSpos{$transcript_id}[0]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
304 if ($strand{$transcript_id} eq '+') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
305 if (exists $ORFpos{$transcript_id}[0]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
306 if (exists $CDSpos{$transcript_id}[0]) { # both start_codon and CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
307 $beginthickpos = min($ORFpos{$transcript_id}[0], |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
308 $CDSpos{$transcript_id}[0]); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
309 } else { # only start_codon |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
310 $beginthickpos = $ORFpos{$transcript_id}[0]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
311 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
312 } elsif (exists $CDSpos{$transcript_id}[0]) { # only CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
313 $beginthickpos = $CDSpos{$transcript_id}[0]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
314 } else { # -- (but there is a stop_codon) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
315 $beginthickpos = $transcript_beginpos{$transcript_id}; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
316 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
317 if (exists $ORFpos{$transcript_id}[3]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
318 if (exists $CDSpos{$transcript_id}[1]) { # both stop_codon and CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
319 $endthickpos = max($ORFpos{$transcript_id}[3], |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
320 $CDSpos{$transcript_id}[1]); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
321 } else { # only stop_codon |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
322 $endthickpos = $ORFpos{$transcript_id}[3]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
323 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
324 } elsif (exists $CDSpos{$transcript_id}[1]) { # only CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
325 $endthickpos = $CDSpos{$transcript_id}[1]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
326 } else { # -- (but there is a start_codon) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
327 $endthickpos = $transcript_endpos{$transcript_id}; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
328 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
329 } elsif ($strand{$transcript_id} eq '-') { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
330 if (exists $ORFpos{$transcript_id}[2]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
331 if (exists $CDSpos{$transcript_id}[0]) { # both stop_codon and CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
332 $beginthickpos = min($ORFpos{$transcript_id}[2], |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
333 $CDSpos{$transcript_id}[0]); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
334 } else { # only stop_codon |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
335 $beginthickpos = $ORFpos{$transcript_id}[2]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
336 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
337 } elsif (exists $CDSpos{$transcript_id}[0]) { # only CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
338 $beginthickpos = $CDSpos{$transcript_id}[0]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
339 } else { # -- (but there is a start_codon) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
340 $beginthickpos = $transcript_beginpos{$transcript_id}; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
341 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
342 if (exists $ORFpos{$transcript_id}[1]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
343 if (exists $CDSpos{$transcript_id}[1]) { # both start_codon and CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
344 $endthickpos = max($ORFpos{$transcript_id}[1], |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
345 $CDSpos{$transcript_id}[1]); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
346 } else { # only start_codon |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
347 $endthickpos = $ORFpos{$transcript_id}[1]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
348 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
349 } elsif (exists $CDSpos{$transcript_id}[1]) { # only CDS |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
350 $endthickpos = $CDSpos{$transcript_id}[1]; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
351 } else { # -- (but there is a stop_codon) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
352 $endthickpos = $transcript_endpos{$transcript_id}; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
353 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
354 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
355 $beginthickpos -= 1; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
356 } else { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
357 $beginthickpos = $beginpos; $endthickpos = $beginpos; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
358 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
359 print BED "\t$beginthickpos\t$endthickpos"; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
360 $blocksizes = ''; $blockstarts = ''; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
361 $Nexons = $#{$exon_beginpos{$transcript_id}}; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
362 ## In some GTF files the exons of a transcript on the reverse strand |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
363 ## are numbered according to their position on the forward strand |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
364 ## and in others according to their position on the reverse strand |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
365 if ($Nexons == 1) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
366 $blocksizes .= $exon_endpos{$transcript_id}[1] - $exon_beginpos{$transcript_id}[1] + 1 . ','; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
367 $blockstarts .= $exon_beginpos{$transcript_id}[1] - $transcript_beginpos{$transcript_id} . ','; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
368 } else { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
369 if ($exon_beginpos{$transcript_id}[2] > $exon_beginpos{$transcript_id}[1]) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
370 foreach $exon_number (1 .. $Nexons) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
371 $blocksizes .= $exon_endpos{$transcript_id}[$exon_number] - $exon_beginpos{$transcript_id}[$exon_number] + 1 . ','; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
372 $blockstarts .= $exon_beginpos{$transcript_id}[$exon_number] - $transcript_beginpos{$transcript_id} . ','; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
373 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
374 } else { # (is <) |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
375 for($exon_number = $Nexons ; $exon_number > 0 ; $exon_number--) { |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
376 $blocksizes .= $exon_endpos{$transcript_id}[$exon_number] - $exon_beginpos{$transcript_id}[$exon_number] + 1 . ','; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
377 $blockstarts .= $exon_beginpos{$transcript_id}[$exon_number] - $transcript_beginpos{$transcript_id} . ','; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
378 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
379 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
380 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
381 print BED "\t0\t$Nexons\t$blocksizes\t$blockstarts\n"; |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
382 } |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
383 close( GTF ); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
384 close( BED ); |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
385 |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
386 ]]></help> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
387 <citations> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
388 <citation type="doi">10.1093/bioinformatics/bts573</citation> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
389 </citations> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
390 </tool> |
ed0d0eda36a9
"planemo upload for repository https://github.com/usegalaxy-be/galaxytools/tree/main/gtf_to_bed commit 66fba7c9dccfddadce13aad591f441c66c3c309b-dirty"
padge
parents:
diff
changeset
|
391 |