{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T20:34:22Z","timestamp":1772138062012,"version":"3.50.1"},"reference-count":30,"publisher":"Oxford University Press (OUP)","issue":"8","license":[{"start":{"date-parts":[[2023,7,25]],"date-time":"2023-07-25T00:00:00Z","timestamp":1690243200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006769","name":"Russian Science Foundation","doi-asserted-by":"publisher","award":["20-74-10075"],"award-info":[{"award-number":["20-74-10075"]}],"id":[{"id":"10.13039\/501100006769","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,8,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>The increasing volume of data from high-throughput experiments including parallel reporter assays facilitates the development of complex deep-learning approaches for modeling DNA regulatory grammar.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>Here, we introduce LegNet, an EfficientNetV2-inspired convolutional network for modeling short gene regulatory regions. By approaching the sequence-to-expression regression problem as a soft classification task, LegNet secured first place for the autosome.org team in the DREAM 2022 challenge of predicting gene expression from gigantic parallel reporter assays. Using published data, here, we demonstrate that LegNet outperforms existing models and accurately predicts gene expression per se as well as the effects of single-nucleotide variants. Furthermore, we show how LegNet can be used in a diffusion network manner for the rational design of promoter sequences yielding the desired expression level.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>https:\/\/github.com\/autosome-ru\/LegNet. The GitHub repository includes Jupyter Notebook tutorials and Python scripts under the MIT license to reproduce the results presented in the study.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btad457","type":"journal-article","created":{"date-parts":[[2023,7,25]],"date-time":"2023-07-25T00:44:51Z","timestamp":1690245891000},"source":"Crossref","is-referenced-by-count":39,"title":["LegNet: a best-in-class deep learning model for short DNA regulatory regions"],"prefix":"10.1093","volume":"39","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7960-9385","authenticated-orcid":false,"given":"Dmitry","family":"Penzar","sequence":"first","affiliation":[{"name":"Vavilov Institute of General Genetics , Moscow 119991, Russia"},{"name":"Institute of Protein Research , Pushchino 142290, Russia"},{"name":"Institute of Translational Medicine, Pirogov Russian National Research Medical University , Moscow 117997, Russia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0593-2726","authenticated-orcid":false,"given":"Daria","family":"Nogina","sequence":"additional","affiliation":[{"name":"Faculty of Bioengineering and Bioinformatics, Lomonosov Moscow State University , Moscow 119991, Russia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2835-3708","authenticated-orcid":false,"given":"Elizaveta","family":"Noskova","sequence":"additional","affiliation":[{"name":"Faculty of Bioengineering and Bioinformatics, Lomonosov Moscow State University , Moscow 119991, Russia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9450-4629","authenticated-orcid":false,"given":"Arsenii","family":"Zinkevich","sequence":"additional","affiliation":[{"name":"Vavilov Institute of General Genetics , Moscow 119991, Russia"},{"name":"Faculty of Bioengineering and Bioinformatics, Lomonosov Moscow State University , Moscow 119991, Russia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0751-8286","authenticated-orcid":false,"given":"Georgy","family":"Meshcheryakov","sequence":"additional","affiliation":[{"name":"Institute of Protein Research , Pushchino 142290, Russia"}]},{"given":"Andrey","family":"Lando","sequence":"additional","affiliation":[{"name":"Yandex N.V. , Moscow 119021, Russia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0387-5430","authenticated-orcid":false,"given":"Abdul Muntakim","family":"Rafi","sequence":"additional","affiliation":[{"name":"School of Biomedical Engineering, University of British Columbia , Vancouver, BC V6T 1Z4, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8935-5921","authenticated-orcid":false,"given":"Carl","family":"de Boer","sequence":"additional","affiliation":[{"name":"School of Biomedical Engineering, University of British Columbia , Vancouver, BC V6T 1Z4, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6554-8128","authenticated-orcid":false,"given":"Ivan V","family":"Kulakovskiy","sequence":"additional","affiliation":[{"name":"Vavilov Institute of General Genetics , Moscow 119991, Russia"},{"name":"Institute of Protein Research , Pushchino 142290, Russia"},{"name":"Laboratory of Regulatory Genomics, Institute of Fundamental Medicine and Biology, Kazan Federal University , Kazan 420008, Russia"}]}],"member":"286","published-online":{"date-parts":[[2023,7,25]]},"reference":[{"key":"2023080401535859900_btad457-B1","author":"Avdeyev","year":"2023"},{"key":"2023080401535859900_btad457-B2","doi-asserted-by":"crossref","first-page":"1196","DOI":"10.1038\/s41592-021-01252-x","article-title":"Effective gene expression prediction from sequence by integrating long-range interactions","volume":"18","author":"Avsec","year":"2021","journal-title":"Nat Methods"},{"key":"2023080401535859900_btad457-B3","author":"Bansal","year":"2022"},{"key":"2023080401535859900_btad457-B4","author":"Bello","year":"2021"},{"key":"2023080401535859900_btad457-B5","doi-asserted-by":"crossref","first-page":"24","DOI":"10.3389\/fgene.2016.00024","article-title":"Analysis of genomic sequence motifs for deciphering transcription factor binding and transcriptional regulation in eukaryotic cells","volume":"7","author":"Boeva","year":"2016","journal-title":"Front Genet"},{"key":"2023080401535859900_btad457-B6","author":"Chen","year":"2023"},{"key":"2023080401535859900_btad457-B7","doi-asserted-by":"crossref","first-page":"613","DOI":"10.1038\/s41588-022-01048-5","article-title":"DeepSTARR predicts enhancer activity from DNA sequence and enables the de novo design of synthetic enhancers","volume":"54","author":"de Almeida","year":"2022","journal-title":"Nat Genet"},{"key":"2023080401535859900_btad457-B8","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1038\/s41587-019-0315-8","article-title":"Deciphering eukaryotic gene-regulatory logic with 100 million random promoters","volume":"38","author":"de Boer","year":"2020","journal-title":"Nat Biotechnol"},{"key":"2023080401535859900_btad457-B9","doi-asserted-by":"crossref","first-page":"1111","DOI":"10.1038\/s41592-020-0958-x","article-title":"Predicting 3d genome folding from DNA sequence with Akita","volume":"17","author":"Fudenberg","year":"2020","journal-title":"Nat Methods"},{"key":"2023080401535859900_btad457-B10","author":"Huang","year":"2018"},{"key":"2023080401535859900_btad457-B11","doi-asserted-by":"crossref","first-page":"316","DOI":"10.1038\/nmeth.4143","article-title":"SMiLE-seq identifies binding motifs of single and dimeric transcription factors","volume":"14","author":"Isakova","year":"2017","journal-title":"Nat Methods"},{"key":"2023080401535859900_btad457-B12","doi-asserted-by":"crossref","first-page":"327","DOI":"10.1016\/j.cell.2012.12.009","article-title":"DNA-binding specificities of human transcription factors","volume":"152","author":"Jolma","year":"2013","journal-title":"Cell"},{"key":"2023080401535859900_btad457-B13","doi-asserted-by":"crossref","first-page":"56","DOI":"10.1186\/s13059-023-02899-9","article-title":"Current sequence-based models capture gene expression determinants in promoters but mostly ignore distal enhancers","volume":"24","author":"Karollus","year":"2023","journal-title":"Genome Biol"},{"key":"2023080401535859900_btad457-B14","doi-asserted-by":"crossref","first-page":"332","DOI":"10.1093\/nar\/30.1.332","article-title":"TRANSCompel: a database on composite regulatory elements in eukaryotic genes","volume":"30","author":"Kel-Margoulis","year":"2002","journal-title":"Nucleic Acids Res"},{"key":"2023080401535859900_btad457-B15","doi-asserted-by":"crossref","first-page":"1083","DOI":"10.1038\/s41592-020-0965-y","article-title":"A systematic evaluation of the design and context dependencies of massively parallel reporter assays","volume":"17","author":"Klein","year":"2020","journal-title":"Nat Methods"},{"key":"2023080401535859900_btad457-B16","author":"Kossaifi","year":"2018"},{"key":"2023080401535859900_btad457-B17","doi-asserted-by":"crossref","first-page":"2113","DOI":"10.1038\/s41467-020-15977-4","article-title":"Model-driven generation of artificial yeast promoters","volume":"11","author":"Kotopka","year":"2020","journal-title":"Nat Commun"},{"key":"2023080401535859900_btad457-B18","doi-asserted-by":"crossref","first-page":"1123","DOI":"10.1126\/science.ade2574","article-title":"Evolutionary-scale prediction of atomic level protein structure with a language model","volume":"379","author":"Lin","year":"2023","journal-title":"Science"},{"key":"2023080401535859900_btad457-B19","doi-asserted-by":"crossref","first-page":"194765","DOI":"10.1016\/j.bbagrm.2021.194765","article-title":"A GO catalogue of human DNA-binding transcription factors","volume":"1864","author":"Lovering","year":"2021","journal-title":"Biochim Biophys Acta Gene Regul Mech"},{"key":"2023080401535859900_btad457-B20","author":"Luvizon","year":"2017"},{"key":"2023080401535859900_btad457-B21","author":"Rafi","year":"2023"},{"key":"2023080401535859900_btad457-B22","doi-asserted-by":"crossref","first-page":"283","DOI":"10.1038\/s41588-021-01009-4","article-title":"Sequence determinants of human gene regulatory elements","volume":"54","author":"Sahu","year":"2022","journal-title":"Nat Genet"},{"key":"2023080401535859900_btad457-B23","author":"Sasse","year":"2023"},{"key":"2023080401535859900_btad457-B24","doi-asserted-by":"crossref","first-page":"53","DOI":"10.3200\/JEXE.71.1.53-70","article-title":"Testing dependent correlations with nonoverlapping variables: a monte carlo simulation","volume":"73","author":"Silver","year":"2004","journal-title":"J Exp Educ"},{"key":"2023080401535859900_btad457-B25","author":"Smith","year":"2017"},{"key":"2023080401535859900_btad457-B26","author":"Tan","year":"2021"},{"key":"2023080401535859900_btad457-B27","doi-asserted-by":"crossref","first-page":"455","DOI":"10.1038\/s41586-022-04506-6","article-title":"The evolution, evolvability and engineering of gene regulatory DNA","volume":"603","author":"Vaishnav","year":"2022","journal-title":"Nature"},{"key":"2023080401535859900_btad457-B28","doi-asserted-by":"crossref","first-page":"6403","DOI":"10.1093\/nar\/gkaa325","article-title":"Synthetic promoter design in escherichia coli based on a deep generative network","volume":"48","author":"Wang","year":"2020","journal-title":"Nucleic Acids Res"},{"key":"2023080401535859900_btad457-B29","doi-asserted-by":"crossref","first-page":"276","DOI":"10.1038\/nrg1315","article-title":"Applied bioinformatics for the identification of regulatory elements","volume":"5","author":"Wasserman","year":"2004","journal-title":"Nat Rev Genet"},{"key":"2023080401535859900_btad457-B30","doi-asserted-by":"crossref","first-page":"5099","DOI":"10.1038\/s41467-022-32818-8","article-title":"Controlling gene expression with deep generative design of regulatory DNA","volume":"13","author":"Zrimec","year":"2022","journal-title":"Nat Commun"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btad457\/50959387\/btad457.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/8\/btad457\/51035847\/btad457.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/8\/btad457\/51035847\/btad457.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,3]],"date-time":"2023-08-03T21:54:23Z","timestamp":1691099663000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btad457\/7230784"}},"subtitle":[],"editor":[{"given":"Inanc","family":"Birol","sequence":"additional","affiliation":[]}],"short-title":[],"issued":{"date-parts":[[2023,7,25]]},"references-count":30,"journal-issue":{"issue":"8","published-print":{"date-parts":[[2023,8,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btad457","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2022.12.22.521582","asserted-by":"object"}]},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2023,8,1]]},"published":{"date-parts":[[2023,7,25]]},"article-number":"btad457"}}