changeset 0:7a27a48d57c0 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/openalex commit 7bac5b8acf6091006591be468a252e57793db4d8
author iuc
date Sat, 31 May 2025 12:25:39 +0000
parents
children
files openalex.xml openalex_fetch.py test-data/expected_citing_papers.tsv test-data/expected_citing_papers_doi.tsv test-data/expected_citing_papers_title.tsv test-data/expected_citing_papers_wddownload.tsv test-data/expected_summary.txt test-data/expected_summary_doi.txt test-data/expected_summary_title.txt test-data/expected_summary_wddownload.txt
diffstat 10 files changed, 392 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/openalex.xml	Sat May 31 12:25:39 2025 +0000
@@ -0,0 +1,88 @@
+<tool id="openalex_explorer" name="OpenAlex explorer" version="0.1.0+galaxy0" profile="23.1">
+    <description>Fetch citing papers from OpenAlex using DOI, openAlex ID, or title</description>
+    <requirements>
+        <requirement type="package" version="3.10">python</requirement>
+        <requirement type="package" version="2.31.0">requests</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+python3 '$__tool_directory__/openalex_fetch.py'
+$input_type '$identifier'
+--max-citations '$max_citations'
+$download_pdfs
+]]></command>
+    <inputs>
+        <param name="input_type" type="select" label="Input type" help="Select whether the identifier is an OpenAlex ID, DOI, or Title">
+            <option value="--doi" selected="true">DOI</option>
+            <option value="--id">OpenAlex ID</option>
+            <option value="--title">Title</option>
+        </param>
+        <param name="identifier" type="text" label="Identifier" help="Enter the OpenAlex ID, DOI, or Title depending on your selection above"/>
+        <param name="download_pdfs" type="boolean" truevalue="--download" falsevalue="" label="Download available OA PDFs?" value="false"/>
+        <param name="max_citations" type="select" label="Max citing papers to fetch" help="Limit the number of citing papers to fetch">
+            <option value="10">10</option>
+            <option value="20">20</option>
+            <option value="50" selected="true">50</option>
+            <option value="100">100</option>
+            <option value="all">All</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="summary_out" format="txt" label="Citation Summary" from_work_dir="summary.txt"/>
+        <data name="tsv_out" format="tabular" label="Citing Papers TSV" from_work_dir="citing_papers.tsv"/>
+        <collection name="pdf_outputs" label="Downloaded PDFs" type="list" format="pdf">
+            <discover_datasets pattern="__designation_and_ext__" directory="downloads"/>
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_type" value="--id"/>
+            <param name="identifier" value="W4398182689"/>
+            <param name="max_citations" value="10"/>
+            <param name="download_pdfs" value="false"/>
+            <output name="summary_out" file="expected_summary.txt"/>
+            <output name="tsv_out" file="expected_citing_papers.tsv"/>
+        </test>
+        <test>
+            <param name="input_type" value="--doi"/>
+            <param name="identifier" value="10.1093/nar/gkae410"/>
+            <param name="max_citations" value="50"/>
+            <param name="download_pdfs" value="false"/>
+            <output name="summary_out" file="expected_summary_doi.txt"/>
+            <output name="tsv_out" file="expected_citing_papers_doi.tsv"/>
+        </test>
+        <test>
+            <param name="input_type" value="--title"/>
+            <param name="identifier" value="The Galaxy platform for accessible, reproducible, and collaborative data analyses: 2024 update"/>
+            <param name="max_citations" value="50"/>
+            <param name="download_pdfs" value="false"/>
+            <output name="summary_out" file="expected_summary_title.txt"/>
+            <output name="tsv_out" file="expected_citing_papers_title.tsv"/>
+        </test>
+        <test>
+            <param name="input_type" value="--id"/>
+            <param name="identifier" value="W2088676066"/>
+            <param name="max_citations" value="10"/>
+            <param name="download_pdfs" value="true"/>
+            <output name="summary_out" file="expected_summary_wddownload.txt"/>
+            <output name="tsv_out" file="expected_citing_papers_wddownload.tsv"/>
+            <output_collection name="pdf_outputs" type="list" count="1">
+                <!-- Also working with the line below but I guess count check would be suffice -->
+                <!-- <element name="NAC-MYB-based transcriptional regulation of secondary cell wall biosynthesis in land plants" file="downloads/NAC-MYB-based transcriptional regulation of secondary cell wall biosynthesis in land plants.pdf"/> -->
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+    This tool fetches citing papers from OpenAlex for a paper specified by OpenAlex ID, DOI, or Title.
+
+        You can optionally download available Open Access PDFs.
+
+        **Outputs:**
+
+        - summary.txt: summary of total, OA, and closed access citing papers
+
+        - citing_papers.tsv: list of citing papers with details (title, DOI, OA)
+    ]]></help>
+    <citations>
+        <citation type="doi">10.48550/arXiv.2205.01833</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/openalex_fetch.py	Sat May 31 12:25:39 2025 +0000
@@ -0,0 +1,168 @@
+import argparse
+import os
+
+import requests
+
+
+# doi
+def get_openalex_id_from_doi(doi):
+    url = f'https://api.openalex.org/works/https://doi.org/{doi}'
+    response = requests.get(url)
+    response.raise_for_status()
+    return response.json()['id'].split('/')[-1]
+
+
+# title
+def get_openalex_id_from_title(title):
+    url = f'https://api.openalex.org/works?search={title}'
+    response = requests.get(url)
+    response.raise_for_status()
+    results = response.json().get('results', [])
+    if not results:
+        raise ValueError("No paper found with the given title.")
+    return results[0]['id'].split('/')[-1]
+
+
+# fetch papers
+def fetch_citing_papers(openalex_id, max_citations=None):
+    all_citing_papers = []
+    per_page = 200
+    page = 1
+
+    work_url = f'https://api.openalex.org/works/{openalex_id}'
+    response = requests.get(work_url)
+    response.raise_for_status()
+    work_data = response.json()
+
+    cited_by_url = work_data.get('cited_by_api_url')
+    if not cited_by_url:
+        raise ValueError("This work has no citing papers.")
+
+    while True:
+        paged_url = f"{cited_by_url}&per_page={per_page}&page={page}"
+        response = requests.get(paged_url)
+        response.raise_for_status()
+        data = response.json()
+
+        results = data.get('results', [])
+        if not results:
+            break
+
+        all_citing_papers.extend(results)
+
+        if max_citations and len(all_citing_papers) >= max_citations:
+            all_citing_papers = all_citing_papers[:max_citations]
+            break
+
+        if len(results) < per_page:
+            break
+
+        page += 1
+
+    return all_citing_papers
+
+
+def download_pdf(url, title, folder_name):
+    try:
+        if not os.path.exists(folder_name):
+            os.makedirs(folder_name)
+        response = requests.get(url)
+        if response.status_code == 200:
+            safe_title = "".join(x for x in title if x.isalnum() or x in " _-").rstrip()
+            file_path = os.path.join(folder_name, f"{safe_title}.pdf")
+            with open(file_path, 'wb') as f:
+                f.write(response.content)
+            print(f"[✓] Downloaded: {file_path}")
+        else:
+            print(f"[x] Failed to download: {url}")
+    except Exception as e:
+        print(f"[!] Error downloading {url}: {e}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Fetch citing papers from OpenAlex")
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument('--id', help='OpenAlex ID of the paper (e.g., W2088676066)')
+    group.add_argument('--doi', help='DOI of the paper')
+    group.add_argument('--title', help='Title of the paper')
+
+    parser.add_argument('--download', action='store_true', help='Download available OA PDFs')
+    parser.add_argument('--max-citations', type=str, default="50", dest='max_citations', help="Max citing papers to fetch or 'all'")
+    parser.add_argument('--output-dir', default='.', help='Directory to save output files')
+    args = parser.parse_args()
+
+    output_dir = args.output_dir
+    summary_path = os.path.join(output_dir, "summary.txt")
+    tsv_path = os.path.join(output_dir, "citing_papers.tsv")
+    download_dir = os.path.join(output_dir, "downloads")
+
+    if args.max_citations.lower() == "all":
+        max_citations = None
+    else:
+        max_citations = int(args.max_citations)
+
+    try:
+        if args.title:
+            openalex_id = get_openalex_id_from_title(args.title)
+        elif args.doi:
+            openalex_id = get_openalex_id_from_doi(args.doi)
+        else:
+            openalex_id = args.id
+
+        citing_papers = fetch_citing_papers(openalex_id, max_citations=max_citations)
+
+        is_oa = 0
+        is_not_oa = 0
+
+        for paper in citing_papers:
+            if not paper['locations']:
+                continue
+            location = paper['locations'][0]
+            is_open = location.get('is_oa', False)
+            landing_url = location.get('landing_page_url', 'No URL')
+
+            if is_open:
+                is_oa += 1
+                print("[OA]", landing_url)
+                if args.download:
+                    pdf_url = location.get('pdf_url')
+                    if pdf_url:
+                        download_pdf(pdf_url, paper['title'], download_dir)
+                    else:
+                        print(f"[!] No direct PDF URL for: {paper['title']}")
+
+            else:
+                is_not_oa += 1
+                print("[Closed]", landing_url)
+
+        print("\nSummary:")
+        print("Total citing papers:", len(citing_papers))
+        print("Open Access papers:", is_oa)
+        print("Closed Access papers:", is_not_oa)
+
+        # save summary
+        with open(summary_path, "w") as f:
+            f.write(f"Total citing papers: {len(citing_papers)}\n")
+            f.write(f"Open Access papers: {is_oa}\n")
+            f.write(f"Closed Access papers: {is_not_oa}\n")
+
+        # save  citing papers to a TSV file
+        with open(tsv_path, "w", encoding="utf-8") as f:
+            f.write("Title\tDOI\tIs_OA\n")
+            for paper in citing_papers:
+                raw_title = paper.get("title") or "N/A"
+                title = raw_title.replace("\t", " ")
+                doi = paper.get("doi", "N/A")
+                location = paper['locations'][0] if paper['locations'] else {}
+                is_oa = location.get("is_oa", False)
+                # landing_url = location.get("landing_page_url", "N/A")
+                # pdf_url = location.get("pdf_url", "N/A")
+
+                f.write(f"{title}\t{doi}\t{is_oa}\n")
+
+    except Exception as e:
+        print(f"[!] Error: {e}")
+
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_citing_papers.tsv	Sat May 31 12:25:39 2025 +0000
@@ -0,0 +1,11 @@
+Title	DOI	Is_OA
+Transforming Clinical Research: The Power of High-Throughput Omics Integration	https://doi.org/10.3390/proteomes12030025	True
+Database resources of the National Center for Biotechnology Information in 2025	https://doi.org/10.1093/nar/gkae979	True
+The efflux pump ABCC1/MRP1 constitutively restricts PROTAC sensitivity in cancer cells	https://doi.org/10.1016/j.chembiol.2024.11.009	True
+The cell colony development is connected with the accumulation of embryogenesis-related proteins and dynamic distribution of cell wall components in in vitro cultures of Fagopyrum tataricum and Fagopyrum esculentum	https://doi.org/10.1186/s12870-025-06119-3	True
+16S rRNA sequencing reveals synergistic effects of silkworm feces and earthworms on soil microbial diversity and resilience under elevated temperatures	https://doi.org/10.1016/j.apsoil.2025.105952	False
+Evolution of sexual systems and regressive evolution in <i>Riccia</i>	https://doi.org/10.1111/nph.20454	True
+Applying the FAIR Principles to computational workflows	https://doi.org/10.1038/s41597-025-04451-9	True
+A case for global standardisation of genomics and wastewater-based epidemiology	https://doi.org/10.1016/j.lanmic.2025.101092	True
+Bioactive Polyphenolic Compounds from Propolis of Tetragonula carbonaria in the Gibberagee Region, New South Wales, Australia	https://doi.org/10.3390/foods14060965	True
+Comparative genomics reveals genetic diversity and differential metabolic potentials of the species of Arachnia and suggests reclassification of Arachnia propionica E10012 (=NBRC_14587) as novel species	https://doi.org/10.1007/s00203-025-04302-6	False
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_citing_papers_doi.tsv	Sat May 31 12:25:39 2025 +0000
@@ -0,0 +1,51 @@
+Title	DOI	Is_OA
+Transforming Clinical Research: The Power of High-Throughput Omics Integration	https://doi.org/10.3390/proteomes12030025	True
+Database resources of the National Center for Biotechnology Information in 2025	https://doi.org/10.1093/nar/gkae979	True
+The efflux pump ABCC1/MRP1 constitutively restricts PROTAC sensitivity in cancer cells	https://doi.org/10.1016/j.chembiol.2024.11.009	True
+The cell colony development is connected with the accumulation of embryogenesis-related proteins and dynamic distribution of cell wall components in in vitro cultures of Fagopyrum tataricum and Fagopyrum esculentum	https://doi.org/10.1186/s12870-025-06119-3	True
+16S rRNA sequencing reveals synergistic effects of silkworm feces and earthworms on soil microbial diversity and resilience under elevated temperatures	https://doi.org/10.1016/j.apsoil.2025.105952	False
+Evolution of sexual systems and regressive evolution in <i>Riccia</i>	https://doi.org/10.1111/nph.20454	True
+Applying the FAIR Principles to computational workflows	https://doi.org/10.1038/s41597-025-04451-9	True
+A case for global standardisation of genomics and wastewater-based epidemiology	https://doi.org/10.1016/j.lanmic.2025.101092	True
+Bioactive Polyphenolic Compounds from Propolis of Tetragonula carbonaria in the Gibberagee Region, New South Wales, Australia	https://doi.org/10.3390/foods14060965	True
+Comparative genomics reveals genetic diversity and differential metabolic potentials of the species of Arachnia and suggests reclassification of Arachnia propionica E10012 (=NBRC_14587) as novel species	https://doi.org/10.1007/s00203-025-04302-6	False
+A novel <i>Erwiniaceae</i> gut symbiont modulates gene expression of the intracellular bacterium <i>Cardinium</i> in the stored product mite <i>Tyrophagus putrescentiae</i>	https://doi.org/10.1128/msphere.00879-24	True
+Playbook workflow builder: Interactive construction of bioinformatics workflows	https://doi.org/10.1371/journal.pcbi.1012901	True
+Quiescent cell re-entry is limited by macroautophagy-induced lysosomal damage	https://doi.org/10.1016/j.cell.2025.03.009	False
+Targets of the transcription factor Six1 identify previously unreported candidate deafness genes	https://doi.org/10.1242/dev.204533	True
+iNAP 2.0: Harnessing metabolic complementarity in microbial network analysis	https://doi.org/10.1002/imt2.235	True
+A Novel Bacitracin-like Peptide from Mangrove-Isolated Bacillus paralicheniformis NNS4-3 against MRSA and Its Genomic Insights	https://doi.org/10.3390/antibiotics13080716	True
+Learning and teaching biological data science in the Bioconductor community	https://doi.org/10.1371/journal.pcbi.1012925	True
+Investigating proteogenomic divergence in patient-derived xenograft models of ovarian cancer	https://doi.org/10.1038/s41598-024-84874-3	True
+Three mitochondrial genomes of Kibakoganea Nagai, 1984 (Coleoptera: Scarabaeidae: Rutelinae) and phylogenetic relationship of Rutelini	https://doi.org/10.1016/j.aspen.2024.102369	False
+Galaxy @Sciensano: a comprehensive bioinformatics portal for genomics-based microbial typing, characterization, and outbreak detection	https://doi.org/10.1186/s12864-024-11182-5	True
+Comparative Analysis of Seventeen Mitochondrial Genomes of Mileewinae Leafhoppers, Including the Unique Species <i>Mileewa digitata</i> (Hemiptera: Cicadellidae: Mileewinae) From Xizang, China, and New Insights Into Phylogenetic Relationships Within Mileewini	https://doi.org/10.1002/ece3.70830	True
+Reproducible research policies and software/data management in scientific computing journals: a survey, discussion, and perspectives	https://doi.org/10.3389/fcomp.2024.1491823	True
+Investigation of Exome-Wide Tumor Heterogeneity on Colorectal Tissue-Based Single Cells	https://doi.org/10.3390/ijms26020737	True
+The central clock drives metabolic rhythms in muscle stem cells	https://doi.org/10.1101/2025.01.15.633124	True
+Bone Marrow Stromal Cells Generate a Pro-Healing Inflammasome When Cultured on Titanium–Aluminum–Vanadium Surfaces with Microscale/Nanoscale Structural Features	https://doi.org/10.3390/biomimetics10010066	True
+Holzapfeliella saturejae sp. nov. isolated from flowers of winter savoury Satureja montana L.	https://doi.org/10.1099/ijsem.0.006654	False
+Validating a clinically based MS-MLPA threshold through comparison with Sanger sequencing in glioblastoma patients	https://doi.org/10.1186/s13148-025-01822-2	True
+Fine-tuning mechanical constraints uncouple patterning and gene expression in murine pseudo-embryos	https://doi.org/10.1101/2025.01.28.635012	True
+Integrative analysis of patient-derived tumoroids and ex vivo organoid modeling of ARID1A loss in bladder cancer reveals therapeutic molecular targets	https://doi.org/10.1016/j.canlet.2025.217506	True
+Predicting coarse-grained representations of biogeochemical cycles from metabarcoding data	https://doi.org/10.1101/2025.01.30.635649	True
+Transcriptome atlases of rat brain regions and their adaptation to diabetes resolution following gastrectomy in the Goto-Kakizaki rat	https://doi.org/10.1186/s13041-025-01176-z	True
+Comparative Chloroplast Genomics and Phylogeny of Crinum brachynema (Amaryllidaceae): A Narrow Endemic Geophyte of the Western Ghats of India	https://doi.org/10.1007/s11105-025-01536-y	False
+Guidance framework to apply best practices in ecological data analysis: lessons learned from building Galaxy-Ecology	https://doi.org/10.1093/gigascience/giae122	True
+Association of escitalopram-induced shifts in gut microbiota and sphingolipid metabolism with depression-like behavior in wistar-kyoto rats	https://doi.org/10.1038/s41398-025-03277-8	True
+RBM43 controls PGC1α translation and a PGC1α-STING signaling axis	https://doi.org/10.1016/j.cmet.2025.01.013	False
+Two transmembrane transcriptional regulators coordinate to activate chitin-induced natural transformation in Vibrio cholerae	https://doi.org/10.1371/journal.pgen.1011606	True
+Biodiversity of strains belonging to the freshwater genus Aquirufa in a riparian forest restoration area in Salzburg, Austria, with a focus on the description of Aquirufa salirivi sp. nov. and Aquirufa novilacunae sp. nov	https://doi.org/10.1007/s10123-025-00642-x	True
+Aquirufa esocilacus sp. nov., Aquirufa originis sp. nov., Aquirufa avitistagni, and Aquirufa echingensis sp. nov. discovered in small freshwater habitats in Austria during a citizen science project	https://doi.org/10.1007/s00203-025-04275-6	True
+Metatranscriptomes of activated sludge microbiomes from saline wastewater treatment plant	https://doi.org/10.1038/s41597-025-04682-w	True
+High-Throughput Sequencing Enables Rapid Analyses of Nematode Mitochondrial Genomes from an Environmental Sample	https://doi.org/10.3390/pathogens14030234	True
+Hydrogen Radical Chemistry at High-Symmetry {2Fe2S} Centers Probed Using a Muonium Surrogate	https://doi.org/10.1021/acs.inorgchem.4c05126	True
+Isolation and characterization of fMGyn-Pae01, a phiKZ-like jumbo phage infecting Pseudomonas aeruginosa	https://doi.org/10.1186/s12985-025-02679-w	True
+A single amino acid variant in the variable region I of AAV capsid confers liver detargeting	https://doi.org/10.1101/2025.03.04.641478	True
+Transposon insertion causes ctnnb2 transcript instability that results in the maternal effect zebrafish ichabod (ich) mutation	https://doi.org/10.1101/2025.02.28.640854	True
+Comprehensive identification of hub mRNAs and lncRNAs in colorectal cancer using galaxy: an in silico transcriptome analysis	https://doi.org/10.1007/s12672-025-02026-z	True
+Phylogenomic Analysis Reveals Evolutionary Relationships of Tropical Drosophilidae: From <i>Drosophila</i> to <i>Scaptodrosophila</i>	https://doi.org/10.1002/ece3.71100	True
+Genome sequencing and identification of candidate variants for a red/black color polymorphism in tilapia (Oreochromis spp.)	https://doi.org/10.1016/j.aquaculture.2025.742411	True
+Higher Order Volatile Fatty Acid Metabolism and Atypical Polyhydroxyalkanoate Production in Fermentation-Enhanced Biological Phosphorus Removal	https://doi.org/10.1016/j.watres.2025.123503	True
+Phosphorelay changes and plasticity underlie the life history evolution of Bacillus subtilis sporulation and germination in serial batch culture	https://doi.org/10.1099/mic.0.001540	False
+Comammox Nitrospira dominates the nitrifying bacterial community in the external canister biofilter in the tank fish farm	https://doi.org/10.1016/j.jwpe.2025.107494	False
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_citing_papers_title.tsv	Sat May 31 12:25:39 2025 +0000
@@ -0,0 +1,51 @@
+Title	DOI	Is_OA
+Transforming Clinical Research: The Power of High-Throughput Omics Integration	https://doi.org/10.3390/proteomes12030025	True
+Database resources of the National Center for Biotechnology Information in 2025	https://doi.org/10.1093/nar/gkae979	True
+The efflux pump ABCC1/MRP1 constitutively restricts PROTAC sensitivity in cancer cells	https://doi.org/10.1016/j.chembiol.2024.11.009	True
+The cell colony development is connected with the accumulation of embryogenesis-related proteins and dynamic distribution of cell wall components in in vitro cultures of Fagopyrum tataricum and Fagopyrum esculentum	https://doi.org/10.1186/s12870-025-06119-3	True
+16S rRNA sequencing reveals synergistic effects of silkworm feces and earthworms on soil microbial diversity and resilience under elevated temperatures	https://doi.org/10.1016/j.apsoil.2025.105952	False
+Evolution of sexual systems and regressive evolution in <i>Riccia</i>	https://doi.org/10.1111/nph.20454	True
+Applying the FAIR Principles to computational workflows	https://doi.org/10.1038/s41597-025-04451-9	True
+A case for global standardisation of genomics and wastewater-based epidemiology	https://doi.org/10.1016/j.lanmic.2025.101092	True
+Bioactive Polyphenolic Compounds from Propolis of Tetragonula carbonaria in the Gibberagee Region, New South Wales, Australia	https://doi.org/10.3390/foods14060965	True
+Comparative genomics reveals genetic diversity and differential metabolic potentials of the species of Arachnia and suggests reclassification of Arachnia propionica E10012 (=NBRC_14587) as novel species	https://doi.org/10.1007/s00203-025-04302-6	False
+A novel <i>Erwiniaceae</i> gut symbiont modulates gene expression of the intracellular bacterium <i>Cardinium</i> in the stored product mite <i>Tyrophagus putrescentiae</i>	https://doi.org/10.1128/msphere.00879-24	True
+Playbook workflow builder: Interactive construction of bioinformatics workflows	https://doi.org/10.1371/journal.pcbi.1012901	True
+Quiescent cell re-entry is limited by macroautophagy-induced lysosomal damage	https://doi.org/10.1016/j.cell.2025.03.009	False
+Targets of the transcription factor Six1 identify previously unreported candidate deafness genes	https://doi.org/10.1242/dev.204533	True
+iNAP 2.0: Harnessing metabolic complementarity in microbial network analysis	https://doi.org/10.1002/imt2.235	True
+A Novel Bacitracin-like Peptide from Mangrove-Isolated Bacillus paralicheniformis NNS4-3 against MRSA and Its Genomic Insights	https://doi.org/10.3390/antibiotics13080716	True
+Learning and teaching biological data science in the Bioconductor community	https://doi.org/10.1371/journal.pcbi.1012925	True
+Investigating proteogenomic divergence in patient-derived xenograft models of ovarian cancer	https://doi.org/10.1038/s41598-024-84874-3	True
+Three mitochondrial genomes of Kibakoganea Nagai, 1984 (Coleoptera: Scarabaeidae: Rutelinae) and phylogenetic relationship of Rutelini	https://doi.org/10.1016/j.aspen.2024.102369	False
+Galaxy @Sciensano: a comprehensive bioinformatics portal for genomics-based microbial typing, characterization, and outbreak detection	https://doi.org/10.1186/s12864-024-11182-5	True
+Comparative Analysis of Seventeen Mitochondrial Genomes of Mileewinae Leafhoppers, Including the Unique Species <i>Mileewa digitata</i> (Hemiptera: Cicadellidae: Mileewinae) From Xizang, China, and New Insights Into Phylogenetic Relationships Within Mileewini	https://doi.org/10.1002/ece3.70830	True
+Reproducible research policies and software/data management in scientific computing journals: a survey, discussion, and perspectives	https://doi.org/10.3389/fcomp.2024.1491823	True
+Investigation of Exome-Wide Tumor Heterogeneity on Colorectal Tissue-Based Single Cells	https://doi.org/10.3390/ijms26020737	True
+The central clock drives metabolic rhythms in muscle stem cells	https://doi.org/10.1101/2025.01.15.633124	True
+Bone Marrow Stromal Cells Generate a Pro-Healing Inflammasome When Cultured on Titanium–Aluminum–Vanadium Surfaces with Microscale/Nanoscale Structural Features	https://doi.org/10.3390/biomimetics10010066	True
+Holzapfeliella saturejae sp. nov. isolated from flowers of winter savoury Satureja montana L.	https://doi.org/10.1099/ijsem.0.006654	False
+Validating a clinically based MS-MLPA threshold through comparison with Sanger sequencing in glioblastoma patients	https://doi.org/10.1186/s13148-025-01822-2	True
+Fine-tuning mechanical constraints uncouple patterning and gene expression in murine pseudo-embryos	https://doi.org/10.1101/2025.01.28.635012	True
+Integrative analysis of patient-derived tumoroids and ex vivo organoid modeling of ARID1A loss in bladder cancer reveals therapeutic molecular targets	https://doi.org/10.1016/j.canlet.2025.217506	True
+Predicting coarse-grained representations of biogeochemical cycles from metabarcoding data	https://doi.org/10.1101/2025.01.30.635649	True
+Transcriptome atlases of rat brain regions and their adaptation to diabetes resolution following gastrectomy in the Goto-Kakizaki rat	https://doi.org/10.1186/s13041-025-01176-z	True
+Comparative Chloroplast Genomics and Phylogeny of Crinum brachynema (Amaryllidaceae): A Narrow Endemic Geophyte of the Western Ghats of India	https://doi.org/10.1007/s11105-025-01536-y	False
+Guidance framework to apply best practices in ecological data analysis: lessons learned from building Galaxy-Ecology	https://doi.org/10.1093/gigascience/giae122	True
+Association of escitalopram-induced shifts in gut microbiota and sphingolipid metabolism with depression-like behavior in wistar-kyoto rats	https://doi.org/10.1038/s41398-025-03277-8	True
+RBM43 controls PGC1α translation and a PGC1α-STING signaling axis	https://doi.org/10.1016/j.cmet.2025.01.013	False
+Two transmembrane transcriptional regulators coordinate to activate chitin-induced natural transformation in Vibrio cholerae	https://doi.org/10.1371/journal.pgen.1011606	True
+Biodiversity of strains belonging to the freshwater genus Aquirufa in a riparian forest restoration area in Salzburg, Austria, with a focus on the description of Aquirufa salirivi sp. nov. and Aquirufa novilacunae sp. nov	https://doi.org/10.1007/s10123-025-00642-x	True
+Aquirufa esocilacus sp. nov., Aquirufa originis sp. nov., Aquirufa avitistagni, and Aquirufa echingensis sp. nov. discovered in small freshwater habitats in Austria during a citizen science project	https://doi.org/10.1007/s00203-025-04275-6	True
+Metatranscriptomes of activated sludge microbiomes from saline wastewater treatment plant	https://doi.org/10.1038/s41597-025-04682-w	True
+High-Throughput Sequencing Enables Rapid Analyses of Nematode Mitochondrial Genomes from an Environmental Sample	https://doi.org/10.3390/pathogens14030234	True
+Hydrogen Radical Chemistry at High-Symmetry {2Fe2S} Centers Probed Using a Muonium Surrogate	https://doi.org/10.1021/acs.inorgchem.4c05126	True
+Isolation and characterization of fMGyn-Pae01, a phiKZ-like jumbo phage infecting Pseudomonas aeruginosa	https://doi.org/10.1186/s12985-025-02679-w	True
+A single amino acid variant in the variable region I of AAV capsid confers liver detargeting	https://doi.org/10.1101/2025.03.04.641478	True
+Transposon insertion causes ctnnb2 transcript instability that results in the maternal effect zebrafish ichabod (ich) mutation	https://doi.org/10.1101/2025.02.28.640854	True
+Comprehensive identification of hub mRNAs and lncRNAs in colorectal cancer using galaxy: an in silico transcriptome analysis	https://doi.org/10.1007/s12672-025-02026-z	True
+Phylogenomic Analysis Reveals Evolutionary Relationships of Tropical Drosophilidae: From <i>Drosophila</i> to <i>Scaptodrosophila</i>	https://doi.org/10.1002/ece3.71100	True
+Genome sequencing and identification of candidate variants for a red/black color polymorphism in tilapia (Oreochromis spp.)	https://doi.org/10.1016/j.aquaculture.2025.742411	True
+Higher Order Volatile Fatty Acid Metabolism and Atypical Polyhydroxyalkanoate Production in Fermentation-Enhanced Biological Phosphorus Removal	https://doi.org/10.1016/j.watres.2025.123503	True
+Phosphorelay changes and plasticity underlie the life history evolution of Bacillus subtilis sporulation and germination in serial batch culture	https://doi.org/10.1099/mic.0.001540	False
+Comammox Nitrospira dominates the nitrifying bacterial community in the external canister biofilter in the tank fish farm	https://doi.org/10.1016/j.jwpe.2025.107494	False
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_citing_papers_wddownload.tsv	Sat May 31 12:25:39 2025 +0000
@@ -0,0 +1,11 @@
+Title	DOI	Is_OA
+Insights into Land Plant Evolution Garnered from the Marchantia polymorpha Genome	https://doi.org/10.1016/j.cell.2017.09.030	True
+NAC-MYB-based transcriptional regulation of secondary cell wall biosynthesis in land plants	https://doi.org/10.3389/fpls.2015.00288	True
+<i>Marchantia polymorpha</i>: Taxonomy, Phylogeny and Morphology of a Model System	https://doi.org/10.1093/pcp/pcv192	True
+Lignification: Flexibility, Biosynthesis and Regulation	https://doi.org/10.1016/j.tplants.2016.04.006	False
+Plant vascular development: from early specification to differentiation	https://doi.org/10.1038/nrm.2015.6	False
+The Moss <i>Physcomitrium</i> (<i>Physcomitrella</i>) <i>patens</i>: A Model Organism for Non-Seed Plants	https://doi.org/10.1105/tpc.19.00828	True
+Functions and Regulation of Programmed Cell Death in Plant Development	https://doi.org/10.1146/annurev-cellbio-111315-124915	False
+Only in dying, life: programmed cell death during plant development	https://doi.org/10.1016/j.tplants.2014.10.003	False
+A Transcriptome Atlas of Physcomitrella patens Provides Insights into the Evolution and Development of Land Plants	https://doi.org/10.1016/j.molp.2015.12.002	True
+Insights into the Diversification and Evolution of R2R3-MYB Transcription Factors in Plants	https://doi.org/10.1104/pp.19.01082	True
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_summary.txt	Sat May 31 12:25:39 2025 +0000
@@ -0,0 +1,3 @@
+Total citing papers: 10
+Open Access papers: 8
+Closed Access papers: 2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_summary_doi.txt	Sat May 31 12:25:39 2025 +0000
@@ -0,0 +1,3 @@
+Total citing papers: 50
+Open Access papers: 41
+Closed Access papers: 9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_summary_title.txt	Sat May 31 12:25:39 2025 +0000
@@ -0,0 +1,3 @@
+Total citing papers: 50
+Open Access papers: 41
+Closed Access papers: 9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected_summary_wddownload.txt	Sat May 31 12:25:39 2025 +0000
@@ -0,0 +1,3 @@
+Total citing papers: 10
+Open Access papers: 6
+Closed Access papers: 4