Mercurial > repos > amadeo > amadeo
comparison Tools/Matrix/gene-TF-matrix-csv-galaxy.pl @ 0:229d36377838 draft
Uploaded
author | amadeo |
---|---|
date | Mon, 05 Sep 2016 05:53:08 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:229d36377838 |
---|---|
1 #!/usr/bin/perl -w | |
2 $|=1; | |
3 use strict; | |
4 use warnings; | |
5 | |
6 | |
7 | |
8 # Script to create csv formatted gene vs TF matrix from a filtered gff | |
9 # file. GFF file can contain just Positive or Just neagtive strand | |
10 # TFBS. Has two types of matrix produced: (0) resence/Abscence with only | |
11 # 1 and 0s. With option=0. (1) counts of TFs with numbers 1,3,5 etc. | |
12 | |
13 | |
14 | |
15 | |
16 my $line; | |
17 my $line3; | |
18 my @cols; | |
19 my @TF_array; | |
20 my @gene_array; | |
21 my %matrix_1= (); | |
22 my %matrix_2= (); | |
23 my $TF; | |
24 my $gene; | |
25 my %matrix; | |
26 my $matrixType; | |
27 | |
28 if(@ARGV < 3){ | |
29 print "\nUsage: gene-TF-matrix.pl fimo-nol-P.gff/fimo-nol-N.gff gene-matrix-P.csv/gene-matrix-N.csv | |
30 \n Options: Presence/Abscence=0 counts=1\n\n"; | |
31 exit(0); | |
32 } | |
33 open (FIMO, "<$ARGV[0]") || | |
34 die "File '$ARGV[0]' not found\n" ; | |
35 open(MATRIX, ">$ARGV[1]") || | |
36 die "File '>$ARGV[1]' not found\n"; | |
37 | |
38 $matrixType = $ARGV[2]; | |
39 print "MatrixTYpe is $matrixType\n"; | |
40 | |
41 #Put all the motifs and genes in two separate arrays: each appears | |
42 #only once in each array. | |
43 while (<FIMO>) { | |
44 $line=$_; | |
45 if ($line!~/^##/) {#ignore header line | |
46 @cols=split; | |
47 $TF= substr $cols[8],5,8; | |
48 if (not exists $matrix_1{$TF}) { | |
49 $matrix_1{$TF}=""; | |
50 push @TF_array, $TF; | |
51 } | |
52 $gene=substr $cols[0],0,21; | |
53 if (not exists $matrix_2{$gene}) { | |
54 $matrix_2{$gene}=""; | |
55 push @gene_array, $gene | |
56 } | |
57 } | |
58 } | |
59 | |
60 my $n_motifs=scalar @TF_array; | |
61 my $n_genes=scalar@gene_array; | |
62 #printf "Scalar motifs is %d\n", scalar@TF_array; | |
63 #printf "Scalar genes is %d\n", scalar@gene_array; | |
64 | |
65 close(FIMO); | |
66 #I want to create a hash on which each gene has a list of 0s. Then I want to "read" the .gff file | |
67 #and if a gene has a certain TF it will add "+1" to the possition of the TF, and it will look like this. | |
68 | |
69 | |
70 open (FIMO, "$ARGV[0]") || | |
71 die "File '$ARGV[0]' not found\n" ; | |
72 | |
73 #$matrix{"PGSC0003DMG400006788"}=(0,0,1,0,2,0,3,0,0,...,0) | |
74 | |
75 #Filling 2d gene/motif array with zeros to start | |
76 foreach my $element (@gene_array){ | |
77 my @auxilary_list = (); | |
78 for (my $i=1; $i <= $n_motifs; $i++){ | |
79 $auxilary_list[$i-1] =0; | |
80 } | |
81 $matrix{$element}=\@auxilary_list; | |
82 } | |
83 | |
84 #This is how I want to read the .gff file and check if a gene has a certain TF. I dont consider the positions yet. I just | |
85 # want to see if this first step works. | |
86 | |
87 while (<FIMO>){ | |
88 $line3 = $_; | |
89 if ($line3!~/^##/) { | |
90 for (my $j=0; $j < scalar@gene_array; $j++){ | |
91 for (my $h=0; $h < scalar@TF_array; $h++){ | |
92 #printf "Genes[%d] -%s- Motifs[%d] -%s- \n",$j, $gene_array[$j], $h, $TF_array[$h]; | |
93 if (($line3 =~/$gene_array[$j]/) and ($line3 =~/$TF_array[$h]/)) { | |
94 if ($matrixType ==0){${$matrix{$gene_array[$j]}}[$h]=1;} | |
95 if ($matrixType ==1){${$matrix{$gene_array[$j]}}[$h]++;} | |
96 } | |
97 } | |
98 } | |
99 } | |
100 } | |
101 | |
102 printf MATRIX "Gene,"; | |
103 for (my $h=0; $h < scalar@TF_array; $h++){ | |
104 if ($h!=scalar@TF_array-1) { | |
105 printf MATRIX "$TF_array[$h],"; | |
106 } | |
107 else{printf MATRIX "$TF_array[$h]"} | |
108 } | |
109 printf MATRIX "\n"; | |
110 foreach my $element(sort keys %matrix){ | |
111 printf MATRIX "$element,"; | |
112 for (my $r=0; $r<scalar@{$matrix{$element}};$r++){ | |
113 if ($r!=scalar@{$matrix{$element}}-1) { | |
114 printf MATRIX "$matrix{$element}[$r]," | |
115 } | |
116 else{printf MATRIX "$matrix{$element}[$r]"} | |
117 | |
118 } | |
119 printf MATRIX "\n" | |
120 } | |
121 |