Mercurial > repos > bioitcore > splicetrap
comparison bin/gtf2bed.pl @ 1:adc0f7765d85 draft
planemo upload
| author | bioitcore |
|---|---|
| date | Thu, 07 Sep 2017 15:06:58 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d4ca551ca300 | 1:adc0f7765d85 |
|---|---|
| 1 # rewrite on Sep 7th,2022 | |
| 2 | |
| 3 #part of package SpliceTrap | |
| 4 | |
| 5 #Jie Wu | |
| 6 use strict; | |
| 7 | |
| 8 my $inputfilename = $ARGV[0]; | |
| 9 | |
| 10 # input file is a gtf file, | |
| 11 # "transcript_id" is required for each line and should not be ambiguous. | |
| 12 # only the "exon" lines are used | |
| 13 | |
| 14 my %chr_hash; | |
| 15 my %strand_hash; | |
| 16 my %tx_exons; #tx_exons{$tx_id){$start} = $size; | |
| 17 | |
| 18 my $linenum = 0; | |
| 19 | |
| 20 open(input, $inputfilename); | |
| 21 | |
| 22 while(my $line=<input>) | |
| 23 { | |
| 24 $linenum++; | |
| 25 my @a = split("\t",$line); | |
| 26 if ($a[2] eq "exon") | |
| 27 { | |
| 28 my $txid; | |
| 29 if($a[8]=~/transcript_id "(\S*?)"/) | |
| 30 { | |
| 31 $txid = $1; | |
| 32 } | |
| 33 else | |
| 34 { | |
| 35 die ("$inputfilename format error! No transcript_id in line $linenum \n"); | |
| 36 } | |
| 37 | |
| 38 if( exists $chr_hash{$txid} and $chr_hash{$txid} ne $a[0]) | |
| 39 { | |
| 40 warn ("$inputfilename: ambiguous transcript_id in line $linenum: $txid Skipped \n"); | |
| 41 next; | |
| 42 } | |
| 43 if( exists $strand_hash{$txid} and $strand_hash{$txid} ne $a[6]) | |
| 44 { | |
| 45 warn ("$inputfilename: ambiguous transcript_id in line $linenum: $txid Skipped\n"); | |
| 46 } | |
| 47 $chr_hash{$txid} = $a[0]; | |
| 48 $strand_hash{$txid} = $a[6]; | |
| 49 $tx_exons{$txid}{$a[3]} = $a[4] - $a[3] +1; | |
| 50 | |
| 51 } | |
| 52 | |
| 53 } | |
| 54 | |
| 55 foreach my $txid (keys %chr_hash) | |
| 56 { | |
| 57 my @starts; | |
| 58 my @sizes; | |
| 59 foreach my $start (sort {$a<=>$b} (keys %{$tx_exons{$txid}} ) ) | |
| 60 { | |
| 61 push (@starts, $start); | |
| 62 push (@sizes, $tx_exons{$txid}{$start}); | |
| 63 } | |
| 64 my $exon_num = scalar(@sizes); | |
| 65 my $starts_str = ""; | |
| 66 for(my $i = 0; $i < $exon_num; $i++) | |
| 67 { | |
| 68 $starts_str = $starts_str.($starts[$i] - $starts[0]).","; | |
| 69 if($i>0) | |
| 70 { | |
| 71 warn "$txid, intron size..".($starts[$i]-$starts[$i-1])."\n" if ($starts[$i]-$starts[$i-1]>1000000); | |
| 72 } | |
| 73 } | |
| 74 my $sizes_str = join(",",@sizes); | |
| 75 my $end = $starts[$exon_num-1] + $sizes[$exon_num-1] -1; | |
| 76 print join("\t",$chr_hash{$txid}, $starts[0]-1, $end, $txid,"0",$strand_hash{$txid},$starts[0]-1, $end, "255,0,0",$exon_num,$sizes_str, $starts_str); | |
| 77 print "\n"; | |
| 78 } | |
| 79 | |
| 80 | |
| 81 close(input); |
