\<Country\><\/b>(.*)\</; + if($1){ + $country2 = $1; # my $country $country[0]; + } + else{ + $country2 = "ND"; + } + #open COUNTRY, "echo $_ | grep -oP '(?<=Country>).*?(?=)' "; # Non-greedy match (Notice the '?' after '*' in .*) + #while () { + # chomp(); + # print $_; + #} + #my ($substr) = ($string =~ /period_1_(.*)\.ssa/); + } + close (XML) or die "close file error : $!"; + + open (RUNXML, "<$run.html") or die "open : $!"; + if ($country2 eq "ND"){ + while () { + chomp(); + my $ostring1 = "geographic location (country and/or sea region)</TAG>

<VALUE>"; + my $ostring2 = "<"; + $_ =~ /$ostring1(.*?)$ostring2/; #/
\<Country\><\/b>(.*)\</; + if($1){ + print "Country2 = ".$1."\n"; + $country2 = $1; # my $country $country[0]; + } + else{ + $country2 = "ND"; + } + + } + } + close (RUNXML) or die "close file error : $!"; + + #geographic location (country and/or sea region)</TAG>
<VALUE> + #< + #close (COUNTRY) or die "close file error : $!"; + + open (CSV, "<$run.csv") or die "open : $!"; + while () { + chomp(); + if ($_ =~ m/$run/) { + @tabCSV = split (/,/, $_) ; + } + } + close (CSV) or die "close file error : $!"; + + my $tmpCenter = ""; + if($hashCenter{$tabCSV[41]}) { $tmpCenter = $hashCenter{$tabCSV[41]}; } + + print SUM "$run\t$tabCSV[1]\t$tabCSV[4]\t$tabCSV[8]\t$tabCSV[27]\t$tabCSV[28]\t$tabCSV[41] ($tmpCenter)\t$tabCSV[44]\t$country2\t$tabCSV[12]\t$tabCSV[13]\t$tabCSV[14]\t$tabCSV[15]\t$tabCSV[18]\t$tabCSV[19]\n"; + +} + + +close (SUM) or die "close file error : $!"; + +#my $end = time(); + +#my $total = $end - $start; + +#print "***** Total time (in seconds) is: $total *****\n"; +unlink glob ('*.html'); +unlink glob ('*.csv'); + diff -r 000000000000 -r 1bc09d59c6d7 SRArunInfo.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SRArunInfo.xml Fri Sep 03 22:33:14 2021 +0000 @@ -0,0 +1,37 @@ + + Provide information from SRR/ERR run accessions + + + + + + + + + + + + + + + + + + + + +