Mercurial > repos > dereeper > pangenome_explorer
comparison PanExplorer_workflow/Perl/Naegleria/generateMauveJson.pl @ 1:032f6b3806a3 draft
Uploaded
| author | dereeper |
|---|---|
| date | Thu, 30 May 2024 11:16:08 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:3cbb01081cde | 1:032f6b3806a3 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 use strict; | |
| 4 | |
| 5 my $chrom_focus = $ARGV[0]; | |
| 6 | |
| 7 for (my $i=37; $i >= 1; $i--){print "NL$i;";}exit; | |
| 8 | |
| 9 print "[\n"; | |
| 10 my %gene_positions; | |
| 11 open(G,"Map_annotation_ID_NFGwada.gff3.gff3"); | |
| 12 while(<G>){ | |
| 13 my $line = $_; | |
| 14 $line =~s/\n//g;$line =~s/\r//g; | |
| 15 my @infos = split("\t",$line); | |
| 16 if ($infos[2] eq "mRNA" && /ID=([^;]+);/){ | |
| 17 my $gene = $1; | |
| 18 my $start = $infos[3]; | |
| 19 my $end = $infos[4]; | |
| 20 my $chr = $infos[0]; | |
| 21 #if ($chrom_focus && $chrom_focus ne $chr){next;} | |
| 22 $gene_positions{$gene} = "$chr-$start-$end"; | |
| 23 } | |
| 24 } | |
| 25 close(G); | |
| 26 | |
| 27 open(G,"Map_annotation_ID_NLova7.gff3.gff3"); | |
| 28 while(<G>){ | |
| 29 my $line = $_; | |
| 30 $line =~s/\n//g;$line =~s/\r//g; | |
| 31 my @infos = split("\t",$line); | |
| 32 if ($infos[2] eq "mRNA" && /ID=([^;]+);/){ | |
| 33 my $gene = $1; | |
| 34 my $start = $infos[3]; | |
| 35 my $end = $infos[4]; | |
| 36 my $chr = $infos[0]; | |
| 37 #if ($chrom_focus && $chrom_focus ne $chr){next;} | |
| 38 $gene_positions{$gene} = "$chr-$start-$end"; | |
| 39 } | |
| 40 } | |
| 41 close(G); | |
| 42 | |
| 43 my $lines = ""; | |
| 44 open(F,"orthofinder_matrix.txt"); | |
| 45 <F>; | |
| 46 while(<F>){ | |
| 47 my $line = $_; | |
| 48 $line =~s/\n//g;$line =~s/\r//g; | |
| 49 my @infos = split("\t",$line); | |
| 50 my $nb_found = 0; | |
| 51 my $index = 0; | |
| 52 for (my $i = 1; $i <= $#infos; $i++){ | |
| 53 my $val = $infos[$i]; | |
| 54 if ($val =~/\w+/){ | |
| 55 $nb_found++; | |
| 56 $index = $i; | |
| 57 } | |
| 58 } | |
| 59 if ($nb_found == 1){ | |
| 60 #print "$index\n"; | |
| 61 } | |
| 62 #next; | |
| 63 if ($nb_found == $#infos){ | |
| 64 my $gene1 = $infos[1]; | |
| 65 my $gene2 = $infos[7]; | |
| 66 if ($gene1 !~/,/ && $gene2 !~/,/){ | |
| 67 my ($chr1,$start1,$end1) = split(/-/,$gene_positions{$gene1}); | |
| 68 my ($chr2,$start2,$end2) = split(/-/,$gene_positions{$gene2}); | |
| 69 $chr2 = lc($chr2); | |
| 70 #if ($chr1 eq $chr2 && $chr1 eq "$chrom_focus"){ | |
| 71 if ($chr1 ne $chr2 && $chr2 =~/\w+/){ | |
| 72 #if ($chr1 eq $chr2){ | |
| 73 if ($chr1=~/chr(\d+)/){ | |
| 74 #my $nb = $1 * 1400000; | |
| 75 my $nb = 0; | |
| 76 $start1 += $nb; | |
| 77 $end1 += $nb; | |
| 78 } | |
| 79 if ($chr2=~/chr(\d+)/){ | |
| 80 #my $nb = $1 * 1400000; | |
| 81 my $nb = 0; | |
| 82 $start2 += $nb; | |
| 83 $end2 += $nb; | |
| 84 } | |
| 85 #[{"name": "574556.4.fasta","start": 717247,"end": 718620,"strand": "-","lcb_idx": 1}, | |
| 86 #$lines .= "[{\"name\": \"1.fasta\",\"start\": $start1,\"end\": $end1,\"strand\": \"-\",\"lcb_idx\": 1},{\"name\": \"2.fasta\",\"start\": $start2,\"end\": $end2,\"strand\": \"-\",\"lcb_idx\": 2}],\n"; | |
| 87 $chr1 =~s/chr/NF/g; | |
| 88 $chr2 =~s/chr/NL/g; | |
| 89 print "$chr1 $start1 $end1 $chr2 $start2 $end2\n"; | |
| 90 } | |
| 91 } | |
| 92 } | |
| 93 } | |
| 94 close(F); | |
| 95 | |
| 96 chop($lines); | |
| 97 chop($lines); | |
| 98 #print "$lines\n]"; |
