{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T07:52:51Z","timestamp":1768031571168,"version":"3.49.0"},"reference-count":14,"publisher":"Oxford University Press (OUP)","issue":"7","license":[{"start":{"date-parts":[[2023,7,7]],"date-time":"2023-07-07T00:00:00Z","timestamp":1688688000000},"content-version":"vor","delay-in-days":6,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/100008475","name":"Brunel University London","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100008475","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,7,1]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:sec>\n                  <jats:title>Motivation<\/jats:title>\n                  <jats:p>Molecular dynamics (MD) simulations have become routine tools for the study of protein dynamics and function. Thanks to faster GPU-based algorithms, atomistic and coarse-grained simulations are being used to explore biological functions over the microsecond timescale, yielding terabytes of data spanning multiple trajectories, thereby extracting relevant protein conformations without losing important information is often challenging.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Results<\/jats:title>\n                  <jats:p>We present MDSubSampler, a Python library and toolkit for a posteriori subsampling of data from multiple trajectories. This toolkit provides access to uniform, random, stratified, weighted sampling, and bootstrapping sampling methods. Sampling can be performed under the constraint of preserving the original distribution of relevant geometrical properties. Possible applications include simulations post-processing, noise reduction, and structures selection for ensemble docking.<\/jats:p>\n               <\/jats:sec>\n               <jats:sec>\n                  <jats:title>Availability and implementation<\/jats:title>\n                  <jats:p>MDSubSampler is freely available at https:\/\/github.com\/alepandini\/MDSubSampler, along with guidance on installation and tutorials on how it can be used.<\/jats:p>\n               <\/jats:sec>","DOI":"10.1093\/bioinformatics\/btad427","type":"journal-article","created":{"date-parts":[[2023,7,7]],"date-time":"2023-07-07T14:48:03Z","timestamp":1688741283000},"source":"Crossref","is-referenced-by-count":3,"title":["MDSubSampler: <i>a posteriori<\/i> sampling of important protein conformations from biomolecular simulations"],"prefix":"10.1093","volume":"39","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-2001-1065","authenticated-orcid":false,"given":"Namir","family":"Oues","sequence":"first","affiliation":[{"name":"Department of Computer Science, Brunel University London , Uxbridge UB8 3PH, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2019-5311","authenticated-orcid":false,"given":"Sarath Chandra","family":"Dantu","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Brunel University London , Uxbridge UB8 3PH, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Riktaben Jigarkumar","family":"Patel","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Brunel University London , Uxbridge UB8 3PH, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4158-233X","authenticated-orcid":false,"given":"Alessandro","family":"Pandini","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Brunel University London , Uxbridge UB8 3PH, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"286","published-online":{"date-parts":[[2023,7,7]]},"reference":[{"key":"2023071423263331400_btad427-B1","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1016\/j.softx.2015.06.001","article-title":"GROMACS: high performance molecular simulations through multi-level parallelism from laptops to supercomputers","volume":"1\u20132","author":"Abraham","year":"2015","journal-title":"SoftwareX"},{"key":"2023071423263331400_btad427-B2","doi-asserted-by":"crossref","first-page":"1575","DOI":"10.1093\/bioinformatics\/btr168","article-title":"ProDy: protein dynamics inferred from theory and experiments","volume":"27","author":"Bakan","year":"2011","journal-title":"Bioinformatics"},{"key":"2023071423263331400_btad427-B3","first-page":"401","article-title":"On a measure of divergence between two multinomial populations","volume":"7","author":"Bhattacharyya","year":"1933","journal-title":"Indian J Stat"},{"key":"2023071423263331400_btad427-B4","author":"Branden","year":"1999"},{"key":"2023071423263331400_btad427-B5","doi-asserted-by":"crossref","first-page":"964","DOI":"10.1038\/nature06522","article-title":"Dynamic personalities of proteins","volume":"450","author":"Henzler-Wildman","year":"2007","journal-title":"Nature"},{"key":"2023071423263331400_btad427-B6","doi-asserted-by":"crossref","first-page":"1129","DOI":"10.1016\/j.neuron.2018.08.011","article-title":"Molecular dynamics simulation for all","volume":"99","author":"Hollingsworth","year":"2018","journal-title":"Neuron"},{"key":"2023071423263331400_btad427-B7","first-page":"1","author":"Kaptan","year":"2022"},{"key":"2023071423263331400_btad427-B8","doi-asserted-by":"crossref","first-page":"1528","DOI":"10.1016\/j.bpj.2015.08.015","article-title":"MDTraj: a modern open library for the analysis of molecular dynamics trajectories","volume":"109","author":"McGibbon","year":"2015","journal-title":"Biophys J"},{"key":"2023071423263331400_btad427-B9","doi-asserted-by":"crossref","first-page":"2319","DOI":"10.1002\/jcc.21787","article-title":"MDAnalysis: a toolkit for the analysis of molecular dynamics simulations","volume":"32","author":"Michaud-Agrawal","year":"2011","journal-title":"J Comput Chem"},{"key":"2023071423263331400_btad427-B10","doi-asserted-by":"crossref","first-page":"117","DOI":"10.3389\/fmolb.2019.00117","article-title":"Large-scale conformational changes and protein function: breaking the in silico barrier","volume":"6","author":"Orellana","year":"2019","journal-title":"Front Mol Biosci"},{"key":"2023071423263331400_btad427-B11","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1155\/2013\/628536","article-title":"Molecular dynamics studies on the conformational transitions of adenylate kinase: a computational evidence for the conformational selection mechanism","volume":"2013","author":"Ping","year":"2013","journal-title":"Biomed Res Int"},{"key":"2023071423263331400_btad427-B12","author":"R Core Team","year":"2022"},{"key":"2023071423263331400_btad427-B13","author":"Schr\u00f6dinger LLC","year":"2015"},{"key":"2023071423263331400_btad427-B14","doi-asserted-by":"crossref","first-page":"070902","DOI":"10.1063\/1.5109531","article-title":"Enhanced sampling in molecular dynamics","volume":"151","author":"Yang","year":"2019","journal-title":"J Chem Phys"}],"container-title":["Bioinformatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/academic.oup.com\/bioinformatics\/advance-article-pdf\/doi\/10.1093\/bioinformatics\/btad427\/50838254\/btad427.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/7\/btad427\/50886168\/btad427.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article-pdf\/39\/7\/btad427\/50886168\/btad427.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,14]],"date-time":"2023-07-14T23:26:49Z","timestamp":1689377209000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/bioinformatics\/article\/doi\/10.1093\/bioinformatics\/btad427\/7221036"}},"subtitle":[],"editor":[{"given":"Arne","family":"Elofsson","sequence":"additional","affiliation":[],"role":[{"role":"editor","vocabulary":"crossref"}]}],"short-title":[],"issued":{"date-parts":[[2023,7,1]]},"references-count":14,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2023,7,1]]}},"URL":"https:\/\/doi.org\/10.1093\/bioinformatics\/btad427","relation":{},"ISSN":["1367-4811"],"issn-type":[{"value":"1367-4811","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2023,7,1]]},"published":{"date-parts":[[2023,7,1]]},"article-number":"btad427"}}