0
|
1 use DBI;
|
|
2
|
|
3 #
|
|
4 # Database stuff
|
|
5 #
|
|
6
|
|
7 $dbh; # database handler
|
|
8 $db_name = 'largaespada_NGS_insertion_test';
|
|
9 #my $db_username = 'insertion_larga';
|
|
10 #my $db_password = 'uwtc.Pzc92';
|
|
11 $db_username = 'sarvera';
|
|
12 $db_password = 'fn101bcc';
|
|
13
|
|
14 $db_host = '127.0.0.1;mysql_local_infile=1';
|
|
15 $db_type = 'mysql';
|
|
16 $data_source = "DBI:$db_type:database=$db_name;host=$db_host";
|
|
17
|
|
18 #
|
|
19 # global values
|
|
20 #
|
|
21 # allows multiple active instances to be running in the db...
|
|
22 $proj = 'merge_science';
|
|
23 $library_percent ='0.0001';
|
|
24
|
|
25 $CIS_total_pvalue = '0.05';
|
|
26 $CIS_library_pvalue = '0.05';
|
|
27 $CIS_region_pvalue = '0.05';
|
|
28
|
|
29 $cocis_threshold ='0.001';
|
|
30
|
|
31 $dbh = DBI->connect($data_source, $db_username, $db_password,
|
|
32 { RaiseError => 1, AutoCommit => 0 });
|
|
33
|
|
34 sub resolve_barcodes {
|
|
35 #2.1 map the barcodes to the sequences..must change (sequence, #) to length of barcode +1 based on barcode sequence length!.
|
|
36 #Currently set up for 6,10,12 base barcodes.
|
|
37 ####for 6mer barcodes...
|
|
38 #$sth = $dbh->prepare("create table illumina_decoded_$proj select library, id, substring(sequence,7) as decoded_sequence from barcode_$proj,illumina_raw_$proj where sequence like concat(seq,'%')");
|
|
39 ####for 10mer barcodes...
|
|
40 $sth = $dbh->prepare("create table illumina_decoded_$proj select library, id, substring(sequence,11) as decoded_sequence from barcode_$proj,illumina_raw_$proj where sequence like concat(seq,'%')");
|
|
41 #for 12mer barcodes...
|
|
42 #$sth = $dbh->prepare("create table illumina_decoded_$proj select library, id, substring(sequence,13) as decoded_sequence from barcode_$proj,illumina_raw_$proj where sequence like concat(seq,'%')");
|
|
43
|
|
44 $sth->execute;
|
|
45
|
|
46 }
|
|
47
|
|
48 sub resolve_IRDR {
|
|
49 #2.3
|
|
50 #different options available this section needs to be optomized for each dataset
|
|
51 #for lung 454 data
|
|
52 $sth = $dbh->prepare("create table illumina_without_IRDR_$proj select library,id,substring(decoded_sequence,32) as insertion_sequence, 'good' as type from illumina_decoded_$proj where decoded_sequence like '_____TGTATGTAAACTTCCGACTTCAACTG%'");
|
|
53
|
|
54 $sth->execute;
|
|
55 $sth = $dbh->prepare("insert into illumina_without_IRDR_$proj select library,id,substring(decoded_sequence,31) as insertion_sequence, 'good' as type from illumina_decoded_$proj where decoded_sequence like '_____TGTATGTAACTTCCGACTTCAACTG%'");
|
|
56
|
|
57
|
|
58
|
|
59 #$sth = $dbh->prepare("create table illumina_without_IRDR_$proj select library,id,substring(decoded_sequence,31) as insertion_sequence, 'good' as type from illumina_decoded_$proj where decoded_sequence like 'TTAAGTGTATGTAACTTCCGACTTCAACTG%'");
|
|
60
|
|
61 #for illumina data
|
|
62 #$sth = $dbh->prepare("create table illumina_without_IRDR_$proj select library,id,substring(decoded_sequence,27) as insertion_sequence, 'good' as type from illumina_decoded_$proj where decoded_sequence like 'TGTATGTAAACTTCCGACTTCAACTG%'");
|
|
63
|
|
64 #for illumina_2 data
|
|
65 #$sth = $dbh->prepare("create table illumina_without_IRDR_$proj select library,id,substring(decoded_sequence,30) as insertion_sequence, 'good' as type from illumina_decoded_$proj where decoded_sequence like '___TGTATGTAAACTTCCGACTTCAACTG%'");
|
|
66
|
|
67 #for MULV virus
|
|
68 #$sth = $dbh->prepare("create table illumina_without_IRDR_$proj select library,id,substring(decoded_sequence,26) as insertion_sequence, 'good' as type from illumina_decoded_$proj where decoded_sequence like 'CCAAACCTACAGGTGGGGTCTTTCA%'");
|
|
69 $sth->execute;
|
|
70 }
|