annotate mlstAddFields.pl @ 13:59e137488c63 draft

Uploaded
author estrain
date Wed, 24 May 2023 17:24:23 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
59e137488c63 Uploaded
estrain
parents:
diff changeset
1 #!/usr/bin/perl
59e137488c63 Uploaded
estrain
parents:
diff changeset
2 #
59e137488c63 Uploaded
estrain
parents:
diff changeset
3
59e137488c63 Uploaded
estrain
parents:
diff changeset
4 use List::MoreUtils qw(first_index indexes);
59e137488c63 Uploaded
estrain
parents:
diff changeset
5
59e137488c63 Uploaded
estrain
parents:
diff changeset
6 open MLST,$ARGV[0];
59e137488c63 Uploaded
estrain
parents:
diff changeset
7
59e137488c63 Uploaded
estrain
parents:
diff changeset
8 @mlstout=split(/\t/,<MLST>);
59e137488c63 Uploaded
estrain
parents:
diff changeset
9
59e137488c63 Uploaded
estrain
parents:
diff changeset
10 $schema=$mlstout[1];
59e137488c63 Uploaded
estrain
parents:
diff changeset
11 $mlstST=$mlstout[2];
59e137488c63 Uploaded
estrain
parents:
diff changeset
12
59e137488c63 Uploaded
estrain
parents:
diff changeset
13 # Assumes mlst command is installed and available in the path
59e137488c63 Uploaded
estrain
parents:
diff changeset
14 # Retrieve the location of the MLST databases
59e137488c63 Uploaded
estrain
parents:
diff changeset
15 $mlstdesc=`mlst -h | grep \"db/pubmlst\"`;
59e137488c63 Uploaded
estrain
parents:
diff changeset
16 @vals = split(/\'/,$mlstdesc);
59e137488c63 Uploaded
estrain
parents:
diff changeset
17 $mlstloc=$vals[1];
59e137488c63 Uploaded
estrain
parents:
diff changeset
18 $mlstloc=~s/bin\/\.\.//g;
59e137488c63 Uploaded
estrain
parents:
diff changeset
19
59e137488c63 Uploaded
estrain
parents:
diff changeset
20 my(%schema);
59e137488c63 Uploaded
estrain
parents:
diff changeset
21
59e137488c63 Uploaded
estrain
parents:
diff changeset
22 $mlstfile = "$mlstloc/$schema/$schema.txt";
59e137488c63 Uploaded
estrain
parents:
diff changeset
23 open IN,$mlstfile;
59e137488c63 Uploaded
estrain
parents:
diff changeset
24 @headers=split(/\t/,<IN>);
59e137488c63 Uploaded
estrain
parents:
diff changeset
25 my $clonal = first_index { /clonal\_complex/ } @headers;
59e137488c63 Uploaded
estrain
parents:
diff changeset
26 my $cc = first_index { /CC/ } @headers;
59e137488c63 Uploaded
estrain
parents:
diff changeset
27 my $lineage = first_index { /Lineage/ } @headers;
59e137488c63 Uploaded
estrain
parents:
diff changeset
28 my $species = first_index { /species/ } @headers;
59e137488c63 Uploaded
estrain
parents:
diff changeset
29 while($line=<IN>) {
59e137488c63 Uploaded
estrain
parents:
diff changeset
30 chomp($line);
59e137488c63 Uploaded
estrain
parents:
diff changeset
31 my(@vals)=split(/\t/,$line);
59e137488c63 Uploaded
estrain
parents:
diff changeset
32 my(@desc);
59e137488c63 Uploaded
estrain
parents:
diff changeset
33 if ($clonal>0 && length($vals[$clonal])>0) {
59e137488c63 Uploaded
estrain
parents:
diff changeset
34 push(@desc,"clonal_complex=".$vals[$clonal]);
59e137488c63 Uploaded
estrain
parents:
diff changeset
35 }
59e137488c63 Uploaded
estrain
parents:
diff changeset
36 if ($cc>0 && length($vals[$clonal])>0) {
59e137488c63 Uploaded
estrain
parents:
diff changeset
37 push(@desc,"CC=".$vals[$cc]);
59e137488c63 Uploaded
estrain
parents:
diff changeset
38 }
59e137488c63 Uploaded
estrain
parents:
diff changeset
39 if ($lineage>0 && length($vals[$linage])>0) {
59e137488c63 Uploaded
estrain
parents:
diff changeset
40 push(@desc,"Lineage=".$vals[$lineage]);
59e137488c63 Uploaded
estrain
parents:
diff changeset
41 }
59e137488c63 Uploaded
estrain
parents:
diff changeset
42 if ($species>0 && length($vals[$species])>0) {
59e137488c63 Uploaded
estrain
parents:
diff changeset
43 push(@desc,"species=".$vals[$species]);
59e137488c63 Uploaded
estrain
parents:
diff changeset
44 }
59e137488c63 Uploaded
estrain
parents:
diff changeset
45 $schema{$vals[0]}=join(',',@desc);
59e137488c63 Uploaded
estrain
parents:
diff changeset
46 }
59e137488c63 Uploaded
estrain
parents:
diff changeset
47
59e137488c63 Uploaded
estrain
parents:
diff changeset
48 print shift(@mlstout)."\t".shift(@mlstout)."\t".shift(@mlstout)."\t";
59e137488c63 Uploaded
estrain
parents:
diff changeset
49 if(exists($schema{$mlstST})) {
59e137488c63 Uploaded
estrain
parents:
diff changeset
50 print $schema{$mlstST};
59e137488c63 Uploaded
estrain
parents:
diff changeset
51 }
59e137488c63 Uploaded
estrain
parents:
diff changeset
52 foreach(@mlstout) {
59e137488c63 Uploaded
estrain
parents:
diff changeset
53 print "\t".$_;
59e137488c63 Uploaded
estrain
parents:
diff changeset
54 }