{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T04:46:19Z","timestamp":1776746779936,"version":"3.51.2"},"reference-count":33,"publisher":"Oxford University Press (OUP)","issue":"9","license":[{"start":{"date-parts":[[2023,9,5]],"date-time":"2023-09-05T00:00:00Z","timestamp":1693872000000},"content-version":"vor","delay-in-days":4,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Samsung Research"},{"name":"Ministry of Science, ICT & Future Planning","award":["NRF-2022M3C1A3081366"],"award-info":[{"award-number":["NRF-2022M3C1A3081366"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,9,2]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>DNA-based data storage is one of the most attractive research areas for future archival storage. However, it faces the problems of high writing and reading costs for practical use. There have been many efforts to resolve this problem, but existing schemes are not fully suitable for DNA-based data storage, and more cost reduction is needed.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>We propose whole encoding and decoding procedures for DNA storage. The encoding procedure consists of a carefully designed single low-density parity-check code as an inter-oligo code, which corrects errors and dropouts efficiently. We apply new clustering and alignment methods that operate on variable-length reads to aid the decoding performance. We use edit distance and quality scores during the sequence analysis-aided decoding procedure, which can discard abnormal reads and utilize high-quality soft information. We store 548.83 KB of an image file in DNA oligos and achieve a writing cost reduction of 7.46% and a significant reading cost reduction of 26.57% and 19.41% compared with the two previous works.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>Data and codes for all the algorithms proposed in this study are available at: https:\/\/github.com\/sjpark0905\/DNA-LDPC-codes.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btad548","type":"journal-article","created":{"date-parts":[[2023,9,5]],"date-time":"2023-09-05T16:22:47Z","timestamp":1693930967000},"source":"Crossref","is-referenced-by-count":12,"title":["Reducing cost in DNA-based data storage by sequence analysis-aided soft information decoding of variable-length reads"],"prefix":"10.1093","volume":"39","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4524-0302","authenticated-orcid":false,"given":"Seong-Joon","family":"Park","sequence":"first","affiliation":[{"name":"Department of Electrical and Computer Engineering, Seoul National University , Seoul 08826, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1762-5915","authenticated-orcid":false,"given":"Sunghwan","family":"Kim","sequence":"additional","affiliation":[{"name":"University of Ulsan Department of Electrical, Electronic and Computer Engineering, , Ulsan 44610, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1055-618X","authenticated-orcid":false,"given":"Jaeho","family":"Jeong","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Seoul National University , Seoul 08826, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6346-4182","authenticated-orcid":false,"given":"Albert","family":"No","sequence":"additional","affiliation":[{"name":"Department of Electronic and Electrical Engineering, Hongik University , Seoul 04066, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jong-Seon","family":"No","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, Seoul National University , Seoul 08826, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7854-7792","authenticated-orcid":false,"given":"Hosung","family":"Park","sequence":"additional","affiliation":[{"name":"Department of Computer Engineering, Chonnam National University , Gwangju 61186, South Korea"},{"name":"Department of ICT Convergence System Engineering, Chonnam National University , Gwangju 61186, South Korea"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2023,9,5]]},"reference":[{"key":"2023112302533790300_btad548-B1","doi-asserted-by":"crossref","first-page":"1229","DOI":"10.1038\/s41587-019-0240-x","article-title":"Data storage in DNA with fewer synthesis cycles using composite DNA letters","volume":"37","author":"Anavy","year":"2019","journal-title":"Nat Biotechnol"},{"key":"2023112302533790300_btad548-B2","doi-asserted-by":"crossref","first-page":"1011","DOI":"10.1016\/j.procs.2016.05.398","article-title":"Forward error correction for DNA data storage","volume":"80","author":"Blawat","year":"2016","journal-title":"Procedia Comput. Sci"},{"key":"2023112302533790300_btad548-B3","first-page":"6582","article-title":"A DNA-based archival storage system","volume":"9","author":"Bornholt","year":"2016","journal-title":"ACM"},{"key":"2023112302533790300_btad548-B4","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1038\/s41540-022-00233-w","article-title":"Adaptive coding for DNA storage with high storage density and low coverage","volume":"8","author":"Cao","year":"2022","journal-title":"NPJ Syst Biol Appl"},{"key":"2023112302533790300_btad548-B5","doi-asserted-by":"crossref","first-page":"456","DOI":"10.1038\/s41576-019-0125-3","article-title":"Molecular digital data storage using DNA","volume":"20","author":"Ceze","year":"2019","journal-title":"Nat Rev Genet"},{"key":"2023112302533790300_btad548-B6","first-page":"147","author":"Chandak","year":"2019"},{"key":"2023112302533790300_btad548-B7","first-page":"8822","author":"Chandak","year":"2020"},{"key":"2023112302533790300_btad548-B8","doi-asserted-by":"crossref","first-page":"2001249","DOI":"10.1002\/adma.202001249","article-title":"DNA micro-disks for the management of DNA-based data storage with index and write-once-read-many (WORM) memory features","volume":"32","author":"Choi","year":"2020","journal-title":"Adv Mater"},{"key":"2023112302533790300_btad548-B9","doi-asserted-by":"crossref","first-page":"6582","DOI":"10.1038\/s41598-019-43105-w","article-title":"High information capacity DNA-based data storage with augmented encoding characters using degenerate bases","volume":"9","author":"Choi","year":"2019","journal-title":"Sci Rep"},{"key":"2023112302533790300_btad548-B10","doi-asserted-by":"crossref","first-page":"1628","DOI":"10.1126\/science.1226355","article-title":"Next-generation digital information storage in DNA","volume":"337","author":"Church","year":"2012","journal-title":"Science"},{"key":"2023112302533790300_btad548-B11","doi-asserted-by":"crossref","first-page":"317","DOI":"10.1109\/LCOMM.2003.814716","article-title":"A class of low-density parity-check codes constructed based on Reed\u2013Solomon codes with two information symbols","volume":"7","author":"Djurdjevic","year":"2003","journal-title":"IEEE Commun Lett"},{"key":"2023112302533790300_btad548-B12","first-page":"1792","author":"Edgar","year":"2004"},{"key":"2023112302533790300_btad548-B13","doi-asserted-by":"crossref","first-page":"950","DOI":"10.1126\/science.aaj2038","article-title":"DNA fountain enables a robust and efficient storage architecture","volume":"355","author":"Erlich","year":"2017","journal-title":"Science"},{"key":"2023112302533790300_btad548-B14","doi-asserted-by":"crossref","first-page":"77","DOI":"10.1038\/nature11875","article-title":"Towards practical, high-capacity, low-maintenance information storage in synthesized DNA","volume":"494","author":"Goldman","year":"2013","journal-title":"Nature"},{"key":"2023112302533790300_btad548-B15","doi-asserted-by":"crossref","first-page":"2552","DOI":"10.1002\/anie.201411378","article-title":"Robust chemical preservation of digital information on DNA in silica with error-correcting codes","volume":"54","author":"Grass","year":"2015","journal-title":"Angew Chem Int Ed Engl"},{"key":"2023112302533790300_btad548-B16","doi-asserted-by":"crossref","first-page":"9663","DOI":"10.1038\/s41598-019-45832-6","article-title":"A characterization of the DNA data storage channel","volume":"9","author":"Heckel","year":"2019","journal-title":"Sci Rep"},{"key":"2023112302533790300_btad548-B17","doi-asserted-by":"crossref","first-page":"3136","DOI":"10.1093\/bioinformatics\/btab246","article-title":"Cooperative sequence clustering and decoding for DNA storage system with fountain codes","volume":"37","author":"Jeong","year":"2021","journal-title":"Bioinformatics"},{"key":"2023112302533790300_btad548-B18","doi-asserted-by":"crossref","first-page":"39","DOI":"10.1038\/s41587-019-0356-z","article-title":"A DNA-of-things storage architecture to create materials with embedded memory","volume":"38","author":"Koch","year":"2020","journal-title":"Nat Biotechnol"},{"key":"2023112302533790300_btad548-B19","doi-asserted-by":"crossref","first-page":"1851","DOI":"10.1101\/gr.078212.108","article-title":"Mapping short DNA sequencing reads and calling variants using mapping quality scores","volume":"18","author":"Li","year":"2008","journal-title":"Genome Res"},{"key":"2023112302533790300_btad548-B20","doi-asserted-by":"crossref","first-page":"2957","DOI":"10.1093\/bioinformatics\/btr507","article-title":"FLASH: fast length adjustment of short reads to improve genome assemblies","volume":"27","author":"Mago\u010d","year":"2011","journal-title":"Bioinformatics"},{"key":"2023112302533790300_btad548-B21","doi-asserted-by":"crossref","first-page":"eabi6714","DOI":"10.1126\/sciadv.abi6714","article-title":"Scaling DNA data storage with nanoscale electrode wells","volume":"7","author":"Nguyen","year":"2021","journal-title":"Sci Adv"},{"key":"2023112302533790300_btad548-B22","doi-asserted-by":"crossref","first-page":"e2001094","DOI":"10.1002\/smtd.202001094","article-title":"An empirical comparison of preservation methods for synthetic DNA data storage","volume":"5","author":"Organick","year":"2021","journal-title":"Small Methods"},{"key":"2023112302533790300_btad548-B23","doi-asserted-by":"crossref","first-page":"242","DOI":"10.1038\/nbt.4079","article-title":"Random access in large-scale DNA data storage","volume":"36","author":"Organick","year":"2018","journal-title":"Nat Biotechnol"},{"key":"2023112302533790300_btad548-B24","doi-asserted-by":"crossref","first-page":"28","DOI":"10.1109\/MCOM.2018.1700839","article-title":"Design of low-density parity check codes for 5G new radio","volume":"56","author":"Richardson","year":"2018","journal-title":"IEEE Commun Mag"},{"key":"2023112302533790300_btad548-B25","doi-asserted-by":"crossref","DOI":"10.1017\/CBO9780511791338","volume-title":"Modern Coding Theory","author":"Richardson","year":"2008"},{"key":"2023112302533790300_btad548-B26","doi-asserted-by":"crossref","first-page":"R51","DOI":"10.1186\/gb-2013-14-5-r51","article-title":"Characterizing and measuring bias in sequence data","volume":"14","author":"Ross","year":"2013","journal-title":"Genome Biol"},{"key":"2023112302533790300_btad548-B27","doi-asserted-by":"crossref","first-page":"1350","DOI":"10.1126\/science.2999980","article-title":"Enzymatic amplification of beta-globin genomic sequences and restriction site analysis for diagnosis of sickle cell anemia","volume":"230","author":"Saiki","year":"1985","journal-title":"Science"},{"key":"2023112302533790300_btad548-B28","doi-asserted-by":"crossref","first-page":"913","DOI":"10.1038\/nmeth.2137","article-title":"Accurate gene synthesis with tag-directed retrieval of sequence-verified DNA molecules","volume":"9","author":"Schwartz","year":"2012","journal-title":"Nat Methods"},{"key":"2023112302533790300_btad548-B29","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1186\/s12859-016-0976-y","article-title":"Illumina error profiles: resolving fine-scale variation in metagenomic sequencing data","volume":"17","author":"Schirmer","year":"2016","journal-title":"BMC Bioinformatics"},{"key":"2023112302533790300_btad548-B30","doi-asserted-by":"crossref","first-page":"5361","DOI":"10.1038\/s41467-022-33046-w","article-title":"Robust data storage in DNA by de Brujin graph-based de novo strand assembly","volume":"13","author":"Song","year":"2022","journal-title":"Nat Commun"},{"key":"2023112302533790300_btad548-B31","doi-asserted-by":"crossref","first-page":"916615","DOI":"10.3389\/fbioe.2022.916615","article-title":"Hidden addressing encoding for DNA storage","volume":"10","author":"Wang","year":"2022","journal-title":"Front Bioeng Biotechnol"},{"key":"2023112302533790300_btad548-B32","doi-asserted-by":"crossref","first-page":"5011","DOI":"10.1038\/s41598-017-05188-1","article-title":"Portable and error-free DNA-based data storage","volume":"7","author":"Yazdi","year":"2017","journal-title":"Sci Rep"},{"key":"2023112302533790300_btad548-B33","doi-asserted-by":"crossref","first-page":"49","DOI":"10.3389\/fbioe.2014.00049","article-title":"The essential component in DNA-based information storage system: robust error-tolerating module","volume":"2","author":"Yim","year":"2014","journal-title":"Front Bioeng Biotechnol"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btad548\/51359188\/btad548.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/9\/btad548\/53689952\/btad548.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/9\/btad548\/53689952\/btad548.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,23]],"date-time":"2023-11-23T02:54:19Z","timestamp":1700708059000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btad548\/7260508"}},"subtitle":[],"editor":[{"given":"Can","family":"Alkan","sequence":"additional","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2023,9,1]]},"references-count":33,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2023,9,2]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btad548","relation":{},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2023,9,1]]},"published":{"date-parts":[[2023,9,1]]},"article-number":"btad548"}}