{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T00:53:11Z","timestamp":1773795191345,"version":"3.50.1"},"reference-count":44,"publisher":"Oxford University Press (OUP)","issue":"5","license":[{"start":{"date-parts":[[2025,4,16]],"date-time":"2025-04-16T00:00:00Z","timestamp":1744761600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100005642","name":"University of Sassari","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100005642","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000780","name":"European Union","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000780","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,6]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>De novo assembly creates reference genomes that underpin many modern biodiversity and conservation studies. Large numbers of new genomes are being assembled by labs around the world. To avoid duplication of efforts and variable data quality, we desire a best-practice assembly process, implemented as an automated portable workflow.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>Here, we present Colora, a Snakemake workflow that produces chromosome-scale de novo primary or phased genome assemblies complete with organelles using Pacific Biosciences HiFi, Hi-C, and optionally Oxford Nanopore Technologies reads as input. Colora is a user-friendly, versatile, and reproducible pipeline that is ready to use by researchers looking for an automated way to obtain high-quality de novo genome assemblies.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>The source code of Colora is available on GitHub (https:\/\/github.com\/LiaOb21\/colora) and has been deposited in Zenodo under DOI https:\/\/doi.org\/10.5281\/zenodo.13321576. Colora is also available at the Snakemake Workflow Catalog (https:\/\/snakemake.github.io\/snakemake-workflow-catalog\/? usage=LiaOb21%2Fcolora).<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btaf175","type":"journal-article","created":{"date-parts":[[2025,4,14]],"date-time":"2025-04-14T07:36:58Z","timestamp":1744616218000},"source":"Crossref","is-referenced-by-count":2,"title":["Colora: a Snakemake workflow for complete chromosome-scale\n                    <i>de novo<\/i>\n                    genome assembly"],"prefix":"10.1093","volume":"41","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3208-323X","authenticated-orcid":false,"given":"Lia","family":"Obinu","sequence":"first","affiliation":[{"name":"Department of Agricultural Sciences, University of Sassari , Viale Italia 39\/a , Sassari, Sardinia, 07100,","place":["Italy"]},{"name":"Edinburgh Genomics, The University of Edinburgh , Ashworth Laboratories, The King's Buildings, Charlotte Auerbach Rd , Edinburgh, Scotland, EH9 3FL,","place":["United Kingdom"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2470-9519","authenticated-orcid":false,"given":"Timothy","family":"Booth","sequence":"additional","affiliation":[{"name":"Edinburgh Genomics, The University of Edinburgh , Ashworth Laboratories, The King's Buildings, Charlotte Auerbach Rd , Edinburgh, Scotland, EH9 3FL,","place":["United Kingdom"]}]},{"given":"Heleen","family":"De Weerd","sequence":"additional","affiliation":[{"name":"Edinburgh Genomics, The University of Edinburgh , Ashworth Laboratories, The King's Buildings, Charlotte Auerbach Rd , Edinburgh, Scotland, EH9 3FL,","place":["United Kingdom"]}]},{"given":"Urmi","family":"Trivedi","sequence":"additional","affiliation":[{"name":"Edinburgh Genomics, The University of Edinburgh , Ashworth Laboratories, The King's Buildings, Charlotte Auerbach Rd , Edinburgh, Scotland, EH9 3FL,","place":["United Kingdom"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7126-9048","authenticated-orcid":false,"given":"Andrea","family":"Porceddu","sequence":"additional","affiliation":[{"name":"Department of Agricultural Sciences, University of Sassari , Viale Italia 39\/a , Sassari, Sardinia, 07100,","place":["Italy"]}]}],"member":"286","published-online":{"date-parts":[[2025,4,16]]},"reference":[{"key":"2025051307512060000_btaf175-B1","author":"Anaconda Software Distribution","year":"2024"},{"key":"2025051307512060000_btaf175-B2","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1186\/S13104-022-05978-5\/FIGURES\/2","article-title":"Snakecube: containerized and automated pipeline for de novo genome assembly in HPC environments","volume":"15","author":"Angelova","year":"2022","journal-title":"BMC Res Notes"},{"key":"2025051307512060000_btaf175-B3","author":"Arima Genomics","year":"2017"},{"key":"2025051307512060000_btaf175-B4","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1186\/S13059-024-03198-7","article-title":"Rapid and sensitive detection of genome contamination at scale with FCS-GX","volume":"25","author":"Astashyn","year":"2024","journal-title":"Genome Biol"},{"key":"2025051307512060000_btaf175-B5","author":"Broad Institute","year":"2019"},{"key":"2025051307512060000_btaf175-B6","doi-asserted-by":"publisher","first-page":"i884","DOI":"10.1093\/BIOINFORMATICS\/BTY560","article-title":"fastp: an ultra-fast all-in-one fastq preprocessor","volume":"34","author":"Chen","year":"2018","journal-title":"Bioinformatics"},{"key":"2025051307512060000_btaf175-B7","doi-asserted-by":"publisher","first-page":"312, 12","DOI":"10.1186\/s13059-021-02527-4","article-title":"Accurate long-read de novo assembly evaluation with inspector","volume":"22","author":"Chen","year":"2021","journal-title":"Genome Biol"},{"key":"2025051307512060000_btaf175-B8","doi-asserted-by":"publisher","first-page":"2666","DOI":"10.1093\/bioinformatics\/bty149","article-title":"Nanopack: visualizing and processing long-read sequencing data","volume":"34","author":"De Coster","year":"2018","journal-title":"Bioinformatics"},{"key":"2025051307512060000_btaf175-B9","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1186\/1471-2105-14-160\/FIGURES\/8","article-title":"Disk-based k-mer counting on a pc","volume":"14","author":"Deorowicz","year":"2013","journal-title":"BMC Bioinformatics"},{"key":"2025051307512060000_btaf175-B10","doi-asserted-by":"publisher","first-page":"1569","DOI":"10.1093\/BIOINFORMATICS\/BTV022","article-title":"Kmc 2: fast and resource-frugal k-mer counting","volume":"31","author":"Deorowicz","year":"2015","journal-title":"Bioinformatics"},{"key":"2025051307512060000_btaf175-B11","doi-asserted-by":"publisher","first-page":"4214","DOI":"10.1093\/BIOINFORMATICS\/BTAC460","article-title":"Gfastats: conversion, evaluation and manipulation of genome sequences using assembly graphs","volume":"38","author":"Formenti","year":"2022","journal-title":"Bioinformatics"},{"key":"2025051307512060000_btaf175-B12","doi-asserted-by":"publisher","first-page":"475","DOI":"10.1038\/s41592-018-0046-7","article-title":"Bioconda: sustainable and comprehensive software distribution for the life sciences","volume":"15","author":"Gr\u00fcning","year":"2018","journal-title":"Nat Methods"},{"key":"2025051307512060000_btaf175-B13","doi-asserted-by":"publisher","first-page":"2896","DOI":"10.1093\/BIOINFORMATICS\/BTAA025","article-title":"Identifying and removing haplotypic duplication in primary genome assemblies","volume":"36","author":"Guan","year":"2020","journal-title":"Bioinformatics (Oxford, England)"},{"key":"2025051307512060000_btaf175-B14","doi-asserted-by":"publisher","first-page":"612","DOI":"10.1186\/s12859-021-04453-5","article-title":"Efficient iterative hi-c scaffolder based on n-best neighbors","volume":"22","author":"Guan","year":"2021","journal-title":"BMC Bioinformatics"},{"key":"2025051307512060000_btaf175-B15","author":"Harry","year":"2024"},{"key":"2025051307512060000_btaf175-B16","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1093\/GIGASCIENCE\/GIAA153","article-title":"Significantly improving the quality of genome assemblies through curation","volume":"10","author":"Howe","year":"2021","journal-title":"Gigascience"},{"key":"2025051307512060000_btaf175-B17","doi-asserted-by":"publisher","first-page":"866","DOI":"10.1093\/bib\/bbx147","article-title":"Comprehensive evaluation of non-hybrid genome assembly tools for third-generation PacBio long-read sequence data","volume":"20","author":"Jayakumar","year":"2019","journal-title":"Brief Bioinform"},{"key":"2025051307512060000_btaf175-B18","doi-asserted-by":"publisher","first-page":"700","DOI":"10.1016\/J.TPLANTS.2019.05.003","article-title":"Tools and strategies for long-read sequencing and de novo assembly of plant genomes","volume":"24","author":"Jung","year":"2019","journal-title":"Trends Plant Sci"},{"key":"2025051307512060000_btaf175-B19","doi-asserted-by":"publisher","first-page":"796","DOI":"10.1038\/35048692","volume-title":"Nature","author":"Kaul","year":"2000"},{"key":"2025051307512060000_btaf175-B20","doi-asserted-by":"publisher","first-page":"2759","DOI":"10.1093\/BIOINFORMATICS\/BTX304","article-title":"Kmc 3: counting and manipulating k-mer statistics","volume":"33","author":"Kokot","year":"2017","journal-title":"Bioinformatics"},{"key":"2025051307512060000_btaf175-B21","author":"Krasheninnikova","year":"2024"},{"key":"2025051307512060000_btaf175-B22","doi-asserted-by":"publisher","first-page":"e0177459","DOI":"10.1371\/JOURNAL.PONE.0177459","article-title":"Singularity: scientific containers for mobility of compute","volume":"12","author":"Kurtzer","year":"2017","journal-title":"PLoS One"},{"key":"2025051307512060000_btaf175-B23","doi-asserted-by":"publisher","first-page":"2520","DOI":"10.1093\/BIOINFORMATICS\/BTS480","article-title":"Snakemake\u2014a scalable bioinformatics workflow engine","volume":"28","author":"K\u00f6ster","year":"2012","journal-title":"Bioinformatics"},{"key":"2025051307512060000_btaf175-B24","doi-asserted-by":"publisher","first-page":"367","DOI":"10.1038\/s41587-023-02100-3","article-title":"Scalable, accessible and reproducible reference genome assembly and evaluation in galaxy","volume":"42","author":"Larivi\u00e8re","year":"2024","journal-title":"Nat Biotechnol"},{"key":"2025051307512060000_btaf175-B25","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1007\/978-3-030-11048-2_15","author":"Li","year":"2019"},{"key":"2025051307512060000_btaf175-B26","doi-asserted-by":"publisher","first-page":"1754","DOI":"10.1093\/bioinformatics\/btp324","article-title":"Fast and accurate short read alignment with burrows-wheeler transform","volume":"25","author":"Li","year":"2009","journal-title":"Bioinformatics"},{"key":"2025051307512060000_btaf175-B27","doi-asserted-by":"publisher","first-page":"505","DOI":"10.1038\/s41588-024-01657-2","article-title":"Near-gapless and haplotype-resolved apple genomes provide insights into the genetic basis of rootstock-induced dwarfing","volume":"56","author":"Li","year":"2024","journal-title":"Nat Genet"},{"key":"2025051307512060000_btaf175-B28","doi-asserted-by":"publisher","first-page":"4358","DOI":"10.1038\/S41467-024-48718-Y","article-title":"DNA methylation-based high-resolution mapping of long-distance chromosomal interactions in nucleosome-depleted regions","volume":"15","author":"Li","year":"2024","journal-title":"Nat Commun"},{"key":"2025051307512060000_btaf175-B29","doi-asserted-by":"publisher","DOI":"10.1093\/g3journal\/jkad077","article-title":"A highly contiguous genome assembly reveals sources of genomic novelty in the symbiotic fungus Rhizophagus irregularis","volume":"13","author":"Manley","year":"2023","journal-title":"G3 (Bethesda)"},{"key":"2025051307512060000_btaf175-B30","doi-asserted-by":"publisher","first-page":"4647","DOI":"10.1093\/MOLBEV\/MSAB199","article-title":"Busco update: novel and streamlined workflows along with broader and deeper phylogenetic coverage for scoring of eukaryotic, prokaryotic, and viral genomes","volume":"38","author":"Manni","year":"2021","journal-title":"Mol Biol Evol"},{"key":"2025051307512060000_btaf175-B31","first-page":"2","volume":"239","year":"2014","journal-title":"Linux J"},{"key":"2025051307512060000_btaf175-B32","doi-asserted-by":"publisher","first-page":"i142","DOI":"10.1093\/bioinformatics\/bty266","article-title":"Versatile genome assembly evaluation with quast-lg","volume":"34","author":"Mikheenko","year":"2018","journal-title":"Bioinformatics"},{"key":"2025051307512060000_btaf175-B33","author":"National Bioinformatics Infrastructure Sweden (NBIS)","year":"2024"},{"key":"2025051307512060000_btaf175-B34","author":"National Center for Biotechnology Information","year":"2024"},{"key":"2025051307512060000_btaf175-B35","doi-asserted-by":"publisher","first-page":"1462923","DOI":"10.3389\/fbinf.2024.1462923","article-title":"Benchmarking of Hi-C tools for scaffolding plant genomes obtained from pacbio hifi and ont reads","volume":"4","author":"Obinu","year":"2024","journal-title":"Front Bioinform"},{"key":"2025051307512060000_btaf175-B36","doi-asserted-by":"publisher","first-page":"1432","DOI":"10.1038\/s41467-020-14998-3","article-title":"Genomescope 2.0 and smudgeplot for reference-free profiling of polyploid genomes","volume":"11","author":"Rhyker Ranallo-Benavidez","year":"2020","journal-title":"Nat Commun"},{"key":"2025051307512060000_btaf175-B37","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1186\/S13059-020-02134-9\/FIGURES\/6","article-title":"Merqury: reference-free quality, completeness, and phasing assessment for genome assemblies","volume":"21","author":"Rhie","year":"2020","journal-title":"Genome Biol"},{"key":"2025051307512060000_btaf175-B38","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1186\/1471-2229-10-87","article-title":"Dating and functional characterization of duplicated genes in the apple (Malus domestica borkh.) by analyzing EST data","volume":"10","author":"Sanzol","year":"2010","journal-title":"BMC Plant Biol"},{"key":"2025051307512060000_btaf175-B39","doi-asserted-by":"publisher","first-page":"3210","DOI":"10.1093\/bioinformatics\/btv351","article-title":"Busco: assessing genome assembly and annotation completeness with single-copy orthologs","volume":"31","author":"Sim\u00e3o","year":"2015","journal-title":"Bioinformatics"},{"key":"2025051307512060000_btaf175-B40","doi-asserted-by":"publisher","first-page":"2142","DOI":"10.1038\/s41564-023-01495-8","article-title":"Arbuscular mycorrhizal fungi heterokaryons have two nuclear populations with distinct roles in host\u2013plant interactions","volume":"8","author":"Sperschneider","year":"2023","journal-title":"Nat Microbiol"},{"key":"2025051307512060000_btaf175-B41","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/J.GPB.2021.08.003","article-title":"High-quality Arabidopsis thaliana genome assembly with nanopore and HIFI long reads","volume":"20","author":"Wang","year":"2022","journal-title":"Genomics Proteomics Bioinf"},{"key":"2025051307512060000_btaf175-B42","doi-asserted-by":"publisher","first-page":"3350","DOI":"10.1093\/BIOINFORMATICS\/BTV383","article-title":"Bandage: interactive visualization of de novo genome assemblies","volume":"31","author":"Wick","year":"2015","journal-title":"Bioinformatics"},{"key":"2025051307512060000_btaf175-B43","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btac808","article-title":"Yahs: yet another hi-c scaffolding tool","volume":"39","author":"Zhou","year":"2023","journal-title":"Bioinformatics"},{"key":"2025051307512060000_btaf175-B44","doi-asserted-by":"publisher","author":"Zhou","year":"2024","DOI":"10.1101\/2024.10.23.619857"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btaf175\/62940330\/btaf175.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/5\/btaf175\/62940330\/btaf175.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/41\/5\/btaf175\/62940330\/btaf175.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,13]],"date-time":"2025-05-13T07:51:29Z","timestamp":1747122689000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btaf175\/8114633"}},"subtitle":[],"editor":[{"given":"Peter","family":"Robinson","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2025,4,16]]},"references-count":44,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,5,6]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btaf175","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2024.09.10.612003","asserted-by":"object"}]},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2025,5]]},"published":{"date-parts":[[2025,4,16]]},"article-number":"btaf175"}}