annotate splicetrap/bin/rmap2eland.pl @ 7:37a16ff93dd9 draft default tip

planemo upload
author bioitcore
date Thu, 12 Oct 2017 16:26:36 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
1 use strict;
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
2
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
3 my $rmapfilename=$ARGV[0];
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
4 my $readsfilename=$ARGV[1];
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
5 my $elandfilename=$ARGV[2];
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
6
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
7 my $detectformat=`head -c 1 $readsfilename`;
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
8
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
9 #system("grep \"$detectformat\" $readsfilename |sort >$readsfilename.sort");
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
10 system("awk 'NR%2==1' $readsfilename |sort >$readsfilename.sort");
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
11 system("sort -k4,4 $rmapfilename >$rmapfilename.sort");
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
12
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
13
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
14 open(readsfile, $readsfilename.".sort");
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
15
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
16
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
17
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
18 #$looplinenumbers=2 if ($detectformat eq ">");
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
19 open(rmapfile, $rmapfilename.".sort");
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
20 open(elandfile, ">".$elandfilename);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
21
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
22 while(my $rmapline=<rmapfile>)
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
23 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
24 chomp($rmapline);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
25 my ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
26 while(my $readline=<readsfile>)
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
27 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
28 if($readline=~/^$detectformat/)
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
29 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
30 chomp($readline);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
31 my $readname=substr($readline, 1, length($readline)-1);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
32
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
33
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
34 if($readname ne $rmapreadname)
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
35 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
36 print elandfile $readname,"\tNA\tNM\n";
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
37 next;
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
38 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
39 else
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
40 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
41 my @mapped_ids=();
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
42 my @mapped_pos=();
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
43 my @mapped_strand=();
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
44 push(@mapped_ids, $mapped_id);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
45 push(@mapped_pos,$start);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
46 push(@mapped_strand,$strand);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
47 while(1)
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
48 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
49 $rmapline=<rmapfile>;
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
50 chomp($rmapline);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
51 ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
52 if( $rmapreadname eq $readname )
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
53 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
54 push(@mapped_ids, $mapped_id);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
55 push(@mapped_pos,$start);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
56 push(@mapped_strand,$strand);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
57 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
58 else
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
59 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
60 seek(rmapfile, -1*length($rmapline)-1,1);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
61 print elandfile $readname,"\t";
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
62 print elandfile "NA\t";
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
63 print elandfile scalar(@mapped_ids),":0:0\t";
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
64 for(my $i=0;$i<@mapped_ids;$i++)
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
65 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
66 print elandfile "/",$mapped_ids[$i];
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
67 print elandfile ":",$mapped_pos[$i]+1;
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
68 if($mapped_strand[$i] eq "+")
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
69 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
70 print elandfile "F0,";
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
71 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
72 else
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
73 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
74 print elandfile "R0,";
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
75 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
76
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
77 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
78 print elandfile "\n";
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
79 last;
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
80 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
81 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
82 last;
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
83
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
84 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
85 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
86 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
87 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
88
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
89 while(my $readline=<readsfile>)
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
90 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
91 if($readline=~/^$detectformat/)
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
92 {
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
93 chomp($readline);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
94 my $readname=substr($readline, 1, length($readline)-1);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
95 print elandfile $readname,"\tNA\tNM\n";
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
96 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
97 }
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
98
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
99 close(elandfile);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
100 close(rmapfile);
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
101
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
102
37a16ff93dd9 planemo upload
bioitcore
parents:
diff changeset
103 close(readsfile);