{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,12]],"date-time":"2026-06-12T19:36:13Z","timestamp":1781292973427,"version":"3.54.1"},"reference-count":29,"publisher":"Oxford University Press (OUP)","issue":"5","license":[{"start":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T00:00:00Z","timestamp":1715126400000},"content-version":"vor","delay-in-days":7,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100000266","name":"EPSRC","doi-asserted-by":"publisher","award":["EP\/S021566\/1"],"award-info":[{"award-number":["EP\/S021566\/1"]}],"id":[{"id":"10.13039\/501100000266","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,5,2]]},"abstract":"<jats:title>Abstract<\/jats:title>\n                  <jats:sec>\n                    <jats:title>Motivation<\/jats:title>\n                    <jats:p>Protein domains are fundamental units of protein structure and play a pivotal role in understanding folding, function, evolution, and design. The advent of accurate structure prediction techniques has resulted in an influx of new structural data, making the partitioning of these structures into domains essential for inferring evolutionary relationships and functional classification.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Results<\/jats:title>\n                    <jats:p>This article presents Chainsaw, a supervised learning approach to domain parsing that achieves accuracy that surpasses current state-of-the-art methods. Chainsaw uses a fully convolutional neural network which is trained to predict the probability that each pair of residues is in the same domain. Domain predictions are then derived from these pairwise predictions using an algorithm that searches for the most likely assignment of residues to domains given the set of pairwise co-membership probabilities. Chainsaw matches CATH domain annotations in 78% of protein domains versus 72% for the next closest method. When predicting on AlphaFold models, expert human evaluators were twice as likely to prefer Chainsaw\u2019s predictions versus the next best method.<\/jats:p>\n                  <\/jats:sec>\n                  <jats:sec>\n                    <jats:title>Availability and implementation<\/jats:title>\n                    <jats:p>github.com\/JudeWells\/Chainsaw.<\/jats:p>\n                  <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btae296","type":"journal-article","created":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T14:23:02Z","timestamp":1715178182000},"source":"Crossref","is-referenced-by-count":41,"title":["Chainsaw: protein domain segmentation with fully convolutional neural networks"],"prefix":"10.1093","volume":"40","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5358-2310","authenticated-orcid":false,"given":"Jude","family":"Wells","sequence":"first","affiliation":[{"name":"Centre for Artificial Intelligence, University College London , WC1E 6BT, United Kingdom"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alex","family":"Hawkins-Hooker","sequence":"additional","affiliation":[{"name":"Centre for Artificial Intelligence, University College London , WC1E 6BT, United Kingdom"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6568-9035","authenticated-orcid":false,"given":"Nicola","family":"Bordin","sequence":"additional","affiliation":[{"name":"Institute of Structural and Molecular Biology, University College London , WC1E 6BT, United Kingdom"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1091-9144","authenticated-orcid":false,"given":"Ian","family":"Sillitoe","sequence":"additional","affiliation":[{"name":"Institute of Structural and Molecular Biology, University College London , WC1E 6BT, United Kingdom"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Brooks","family":"Paige","sequence":"additional","affiliation":[{"name":"Centre for Artificial Intelligence, University College London , WC1E 6BT, United Kingdom"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Christine","family":"Orengo","sequence":"additional","affiliation":[{"name":"Institute of Structural and Molecular Biology, University College London , WC1E 6BT, United Kingdom"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"286","published-online":{"date-parts":[[2024,5,8]]},"reference":[{"key":"2024082905324868400_btae296-B1","doi-asserted-by":"crossref","first-page":"429","DOI":"10.1093\/bioinformatics\/btg006","article-title":"PDP: protein domain parser","volume":"19","author":"Alexandrov","year":"2003","journal-title":"Bioinformatics"},{"key":"2024082905324868400_btae296-B2","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1038\/75556","article-title":"Gene ontology: tool for the unification of biology","volume":"25","author":"Ashburner","year":"2000","journal-title":"Nat Genet"},{"key":"2024082905324868400_btae296-B3","author":"CATH Database Team","year":"2023"},{"key":"2024082905324868400_btae296-B4","doi-asserted-by":"crossref","first-page":"D553","DOI":"10.1093\/nar\/gkab1054","article-title":"SCOPe: improvements to the structural classification of proteins \u2013 extended database to facilitate variant interpretation and machine learning","volume":"50","author":"Chandonia","year":"2021","journal-title":"Nucleic Acids Res"},{"key":"2024082905324868400_btae296-B5","doi-asserted-by":"crossref","first-page":"e1003926","DOI":"10.1371\/journal.pcbi.1003926","article-title":"ECOD: an evolutionary classification of protein domains","volume":"10","author":"Cheng","year":"2014","journal-title":"PLoS Comput Biol"},{"key":"2024082905324868400_btae296-B6","doi-asserted-by":"crossref","first-page":"W732","DOI":"10.1093\/nar\/gkac370","article-title":"SWORD2: hierarchical analysis of protein 3D structures","volume":"50","author":"Cretin","year":"2022","journal-title":"Nucleic Acids Res"},{"key":"2024082905324868400_btae296-B7","doi-asserted-by":"crossref","first-page":"1740","DOI":"10.1093\/bioinformatics\/btz650","article-title":"Multi-scale structural analysis of proteins by deep semantic segmentation","volume":"36","author":"Eguchi","year":"2020","journal-title":"Bioinformatics"},{"key":"2024082905324868400_btae296-B8","doi-asserted-by":"crossref","first-page":"W500","DOI":"10.1093\/nar\/gkh429","article-title":"STRIDE: a web server for secondary structure assignment from known atomic coordinates of proteins","volume":"32","author":"Heinig","year":"2004","journal-title":"Nucleic Acids Res"},{"key":"2024082905324868400_btae296-B9","doi-asserted-by":"crossref","first-page":"256","DOI":"10.1002\/prot.340190309","article-title":"Parser for protein folding units","volume":"19","author":"Holm","year":"1994","journal-title":"Proteins Struct Funct Bioinformatics"},{"key":"2024082905324868400_btae296-B10","first-page":"66","author":"Jiang","year":"2018"},{"key":"2024082905324868400_btae296-B11","doi-asserted-by":"crossref","first-page":"583","DOI":"10.1038\/s41586-021-03819-2","article-title":"Highly accurate protein structure prediction with AlphaFold","volume":"596","author":"Jumper","year":"2021","journal-title":"Nature"},{"key":"2024082905324868400_btae296-B12","doi-asserted-by":"crossref","first-page":"8445","DOI":"10.1038\/s41467-023-43934-4","article-title":"Merizo: a rapid and accurate protein domain segmentation method using invariant point attention","volume":"14","author":"Lau","year":"2023","journal-title":"Nat Commun"},{"key":"2024082905324868400_btae296-B13","author":"Lau","year":"2024"},{"key":"2024082905324868400_btae296-B14","doi-asserted-by":"crossref","first-page":"283","DOI":"10.1186\/s12859-022-04829-1","article-title":"Multi-head attention-based U-nets for predicting protein domain boundaries using 1d sequence features and 2d distance maps","volume":"23","author":"Mahmud","year":"2022","journal-title":"BMC Bioinformatics"},{"key":"2024082905324868400_btae296-B15","doi-asserted-by":"crossref","first-page":"D412","DOI":"10.1093\/nar\/gkaa913","article-title":"Pfam: the protein families database in 2021","volume":"49","author":"Mistry","year":"2021","journal-title":"Nucleic Acids Res"},{"key":"2024082905324868400_btae296-B16","doi-asserted-by":"crossref","first-page":"536","DOI":"10.1016\/S0022-2836(05)80134-2","article-title":"SCOP: a structural classification of proteins database for the investigation of sequences and structures","volume":"247","author":"Murzin","year":"1995","journal-title":"J Mol Biol"},{"key":"2024082905324868400_btae296-B17","doi-asserted-by":"crossref","first-page":"1093","DOI":"10.1016\/S0969-2126(97)00260-8","article-title":"CATH: a hierarchic classification of protein domain structures","volume":"5","author":"Orengo","year":"1997","journal-title":"Structure"},{"key":"2024082905324868400_btae296-B18","doi-asserted-by":"crossref","first-page":"e1600552","DOI":"10.1126\/sciadv.1600552","article-title":"An ambiguity principle for assigning protein structural domains","volume":"3","author":"Postic","year":"2017","journal-title":"Sci Adv"},{"key":"2024082905324868400_btae296-B19","doi-asserted-by":"crossref","first-page":"e1168","DOI":"10.1371\/journal.pntd.0001168","article-title":"Characterization of the phytochelatin synthase of Schistosoma mansoni","volume":"5","author":"Ray","year":"2011","journal-title":"PLoS Negl Trop Dis"},{"key":"2024082905324868400_btae296-B20","doi-asserted-by":"crossref","first-page":"e232","DOI":"10.1371\/journal.pcbi.0030232","article-title":"CATHEDRAL: a fast and effective algorithm to predict folds and domain boundaries from multidomain protein structures","volume":"3","author":"Redfern","year":"2007","journal-title":"PLoS Comput Biol"},{"key":"2024082905324868400_btae296-B21","doi-asserted-by":"crossref","first-page":"5128","DOI":"10.1093\/bioinformatics\/btz464","article-title":"DNN-Dom: predicting protein domain boundary from sequence alone by deep neural network","volume":"35","author":"Shi","year":"2019","journal-title":"Bioinformatics"},{"key":"2024082905324868400_btae296-B22","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1002\/prot.20736","article-title":"Evaluation of domain prediction in CASP6","volume":"61","author":"Tai","year":"2005","journal-title":"Proteins Struct Funct Bioinformatics"},{"key":"2024082905324868400_btae296-B23","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1002\/prot.21675","article-title":"Assessment of predictions submitted for the CASP7 domain prediction category","volume":"69","author":"Tress","year":"2007","journal-title":"Proteins Struct Funct Bioinformatics"},{"key":"2024082905324868400_btae296-B24","doi-asserted-by":"crossref","first-page":"243","DOI":"10.1038\/s41587-023-01773-0","article-title":"Fast and accurate protein structure search with foldseek","volume":"42","author":"van Kempen","year":"2023","journal-title":"Nat Biotechnol"},{"key":"2024082905324868400_btae296-B25","doi-asserted-by":"crossref","first-page":"D439","DOI":"10.1093\/nar\/gkab1061","article-title":"AlphaFold protein structure database: massively expanding the structural coverage of protein-sequence space with high-accuracy models","volume":"50","author":"Varadi","year":"2022","journal-title":"Nucleic Acids Res"},{"key":"2024082905324868400_btae296-B26","doi-asserted-by":"crossref","first-page":"1496","DOI":"10.1073\/pnas.1914677117","article-title":"Improved protein structure prediction using predicted interresidue orientations","volume":"117","author":"Yang","year":"2020","journal-title":"Proc Natl Acad Sci USA"},{"key":"2024082905324868400_btae296-B27","doi-asserted-by":"crossref","first-page":"e4548","DOI":"10.1002\/pro.4548","article-title":"DPAM: a domain parser for AlphaFold models","volume":"32","author":"Zhang","year":"2023","journal-title":"Protein Sci"},{"key":"2024082905324868400_btae296-B28","doi-asserted-by":"crossref","first-page":"3749","DOI":"10.1093\/bioinformatics\/btaa217","article-title":"FUpred: detecting protein domains through deep-learning-based contact map prediction","volume":"36","author":"Zheng","year":"2020","journal-title":"Bioinformatics"},{"key":"2024082905324868400_btae296-B29","doi-asserted-by":"crossref","first-page":"btad070","DOI":"10.1093\/bioinformatics\/btad070","article-title":"A unified approach to protein domain parsing with inter-residue distance matrix","volume":"39","author":"Zhu","year":"2023","journal-title":"Bioinformatics"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btae296\/57452232\/btae296.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/5\/btae296\/58958105\/btae296.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/40\/5\/btae296\/58958105\/btae296.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,29]],"date-time":"2024-08-29T01:34:31Z","timestamp":1724895271000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btae296\/7667299"}},"subtitle":[],"editor":[{"given":"Arne","family":"Elofsson","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"editor"}]}],"short-title":[],"issued":{"date-parts":[[2024,5,1]]},"references-count":29,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,5,2]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btae296","relation":{"has-preprint":[{"id-type":"doi","id":"10.1101\/2023.07.19.549732","asserted-by":"object"}]},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2024,5,1]]},"published":{"date-parts":[[2024,5,1]]},"article-number":"btae296"}}