{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,12]],"date-time":"2026-02-12T09:33:16Z","timestamp":1770888796938,"version":"3.50.1"},"reference-count":99,"publisher":"American Chemical Society (ACS)","issue":"3","license":[{"start":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T00:00:00Z","timestamp":1769472000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T00:00:00Z","timestamp":1769472000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T00:00:00Z","timestamp":1769472000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-045"}],"funder":[{"DOI":"10.13039\/501100001804","name":"Canada Research Chairs","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001804","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000057","name":"National Institute of General Medical Sciences","doi-asserted-by":"publisher","award":["R35GM150620"],"award-info":[{"award-number":["R35GM150620"]}],"id":[{"id":"10.13039\/100000057","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Banting Postdoctoral Fellowship"},{"name":"Center for Advanced Computing (CAC), Queen?s University."},{"DOI":"10.13039\/501100021202","name":"Digital Research Alliance of Canada","doi-asserted-by":"crossref","id":[{"id":"10.13039\/501100021202","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Alliance?s DRI (Digital Research Infrastructure) EDIA (Equity, Diversity, Inclusion and Accessibility) Champions Pilot Program"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["J. Chem. Inf. Model."],"published-print":{"date-parts":[[2026,2,9]]},"DOI":"10.1021\/acs.jcim.5c01499","type":"journal-article","created":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T16:47:52Z","timestamp":1769532472000},"page":"1275-1285","source":"Crossref","is-referenced-by-count":1,"title":["<tt>Selector<\/tt>\n                    : A General Python Library for Diverse Subset Selection"],"prefix":"10.1021","volume":"66","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2886-7012","authenticated-orcid":true,"given":"Fanwang","family":"Meng","sequence":"first","affiliation":[{"name":"Department of Chemistry, Queen\u2019s University, 90 Bader Lane, Kingston, Ontario K7L 3N6, Canada"},{"name":"Department of Chemistry and Chemical Biology, McMaster University, 1280 Main Street West, Hamilton, Ontario L8S 4L8, Canada"}]},{"given":"Marco","family":"Mart\u00ednez Gonz\u00e1lez","sequence":"additional","affiliation":[{"name":"Department of Chemistry and Chemical Biology, McMaster University, 1280 Main Street West, Hamilton, Ontario L8S 4L8, Canada"}]},{"given":"Valerii","family":"Chuiko","sequence":"additional","affiliation":[{"name":"Department of Chemistry and Chemical Biology, McMaster University, 1280 Main Street West, Hamilton, Ontario L8S 4L8, Canada"}]},{"given":"Alireza","family":"Tehrani","sequence":"additional","affiliation":[{"name":"Department of Chemistry, Queen\u2019s University, 90 Bader Lane, Kingston, Ontario K7L 3N6, Canada"}]},{"given":"Abdul Rahman","family":"Al Nabulsi","sequence":"additional","affiliation":[{"name":"Department of Chemistry and Chemical Biology, McMaster University, 1280 Main Street West, Hamilton, Ontario L8S 4L8, Canada"}]},{"given":"Abigail","family":"Broscius","sequence":"additional","affiliation":[{"name":"Department of Chemistry, Queen\u2019s University, 90 Bader Lane, Kingston, Ontario K7L 3N6, Canada"}]},{"given":"Hasan","family":"Khaleel","sequence":"additional","affiliation":[{"name":"Department of Chemistry and Chemical Biology, McMaster University, 1280 Main Street West, Hamilton, Ontario L8S 4L8, Canada"}]},{"given":"Kenneth","family":"L\u00f3pez-P\u00e9rez","sequence":"additional","affiliation":[{"name":"Department of Chemistry and Quantum Theory Project","place":["Gainesville, United States"]},{"name":"University of Florida","place":["Gainesville, United States"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2121-4449","authenticated-orcid":true,"given":"Ram\u00f3n Alain","family":"Miranda-Quintana","sequence":"additional","affiliation":[{"name":"Department of Chemistry and Quantum Theory Project","place":["Gainesville, United States"]},{"name":"University of Florida","place":["Gainesville, United States"]}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2605-3883","authenticated-orcid":true,"given":"Paul W.","family":"Ayers","sequence":"additional","affiliation":[{"name":"Department of Chemistry and Chemical Biology, McMaster University, 1280 Main Street West, Hamilton, Ontario L8S 4L8, Canada"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2069-050X","authenticated-orcid":true,"given":"Farnaz","family":"Heidar-Zadeh","sequence":"additional","affiliation":[{"name":"Department of Chemistry, Queen\u2019s University, 90 Bader Lane, Kingston, Ontario K7L 3N6, Canada"}]}],"member":"316","published-online":{"date-parts":[[2026,1,27]]},"reference":[{"key":"ref1\/cit1","doi-asserted-by":"publisher","DOI":"10.1007\/BF00031692"},{"key":"ref2\/cit2","doi-asserted-by":"publisher","DOI":"10.1007\/s10531-004-0224-z"},{"key":"ref3\/cit3","doi-asserted-by":"publisher","DOI":"10.1890\/04-0576"},{"key":"ref4\/cit4","doi-asserted-by":"publisher","DOI":"10.1002\/wcms.33"},{"key":"ref5\/cit5","doi-asserted-by":"publisher","DOI":"10.1021\/ci9700337"},{"key":"ref6\/cit6","doi-asserted-by":"publisher","DOI":"10.1039\/D4DD00073K"},{"key":"ref7\/cit7","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2111786119"},{"key":"ref8\/cit8","doi-asserted-by":"publisher","DOI":"10.1021\/ci025662h"},{"key":"ref9\/cit9","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.3c01674"},{"key":"ref10\/cit10","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.3c01790"},{"key":"ref11\/cit11","doi-asserted-by":"publisher","DOI":"10.1016\/j.carj.2019.06.002"},{"key":"ref12\/cit12","unstructured":"Killamsetty, K.; Durga, S.; Ramakrishnan, G.; De, A.; Iyer, R.Grad-match: Gradient matching based data subset selection for efficient deep model training. In\n                      International Conference on Machine Learning\n                      2021; pp 5464\u20135474."},{"key":"ref13\/cit13","doi-asserted-by":"crossref","unstructured":"Ro, Y.; Xu, C.; Ciborowska, A.; Bhattacharya, S.; Li, F.; Foltin, M.Dataset Efficient Training with Model Ensembling. In\n                      Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition\n                      2023; pp 4700\u20134704.","DOI":"10.1109\/CVPRW59228.2023.00497"},{"key":"ref14\/cit14","unstructured":"Tian, J.; Nurminen, J.; Kiss, I.Optimal subset selection from text databases. In\n                      Proceedings.(ICASSP\u201905). IEEE International Conference on Acoustics, Speech, and Signal Processing, 2005\n                      , 2005; p I-305."},{"key":"ref15\/cit15","doi-asserted-by":"crossref","unstructured":"Saranathan, G.; Xu, C.; Alam, M. P.; Kumar, T.; Foltin, M.; Wong, S. Y.; Bhattacharya, S.SubLIME: Subset Selection via Rank Correlation Prediction for Data-Efficient LLM Evaluation. In\n                      Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)\n                      2025; pp 30572\u201330593.","DOI":"10.18653\/v1\/2025.acl-long.1477"},{"key":"ref16\/cit16","doi-asserted-by":"publisher","DOI":"10.1140\/epjb\/s10051-021-00220-w"},{"key":"ref17\/cit17","unstructured":"Liberty, E.; Lang, K.; Shmakov, K.Stratified Sampling Meets Machine Learning. In\n                      Proceedings of The 33rd International Conference on Machine Learning\n                      , 2016; Vol. 48, pp 2320\u20132329."},{"key":"ref18\/cit18","doi-asserted-by":"crossref","unstructured":"Pearlman, R. S.; Smith, K. M.\n                      3D QSAR in Drug Design\n                      ; Kubinyi, H.; Folkers, G.; Martin, Y. C., Eds. Springer, 2002; Vol. 2, pp 339\u2013353.","DOI":"10.1007\/0-306-46857-3_18"},{"key":"ref19\/cit19","doi-asserted-by":"publisher","DOI":"10.1093\/nar\/gkad380"},{"key":"ref20\/cit20","doi-asserted-by":"crossref","unstructured":"Naveja-Romero, J.; Saldivar-Gonzalez, F. I.; Prado-Romero, D. L.; Ruiz-Moreno, A. J.; Velasco-Velazquez, M.; Miranda-Quintana, R. A.; Medina-Franco, J. L.\n                      ViSAS for Entering Chemical Space: Virtual Screening of Analog Series and Related Advances\n                      , 2022.","DOI":"10.26434\/chemrxiv-2022-9t2rj"},{"key":"ref21\/cit21","doi-asserted-by":"publisher","DOI":"10.1021\/cc0000388"},{"key":"ref22\/cit22","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jpcc.8b08063"},{"key":"ref23\/cit23","doi-asserted-by":"publisher","DOI":"10.6339\/21-JDS999"},{"key":"ref24\/cit24","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0048476"},{"key":"ref25\/cit25","doi-asserted-by":"publisher","DOI":"10.1126\/science.aat2663"},{"key":"ref26\/cit26","doi-asserted-by":"publisher","DOI":"10.1002\/minf.202300056"},{"key":"ref27\/cit27","doi-asserted-by":"publisher","DOI":"10.1021\/ci060117s"},{"key":"ref28\/cit28","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.2c01073"},{"key":"ref29\/cit29","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.2c00433"},{"key":"ref30\/cit30","doi-asserted-by":"publisher","DOI":"10.1063\/5.0166172"},{"key":"ref31\/cit31","doi-asserted-by":"publisher","DOI":"10.1038\/nmat4717"},{"key":"ref32\/cit32","doi-asserted-by":"crossref","unstructured":"Schwalbe-Koda, R.; Gomez-Bombarelli, R.\n                      Machine Learning Meets Quantum\n                      ; Schutt, K.; Chmiela, S.; Von Lilienfeld, O. A.; Tkatchenko, A.; Tsuda, A.; Muller, K.R., Eds. Springer, 2020; pp 445\u2013467.","DOI":"10.1007\/978-3-030-40245-7_21"},{"key":"ref33\/cit33","doi-asserted-by":"publisher","DOI":"10.1021\/acscentsci.7b00572"},{"key":"ref34\/cit34","doi-asserted-by":"publisher","DOI":"10.1039\/D1SC00231G"},{"key":"ref35\/cit35","doi-asserted-by":"publisher","DOI":"10.1016\/S1367-5931(99)80051-9"},{"key":"ref36\/cit36","doi-asserted-by":"publisher","DOI":"10.1016\/S1093-3263(00)00072-3"},{"key":"ref37\/cit37","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-021-00498-z"},{"key":"ref38\/cit38","doi-asserted-by":"publisher","DOI":"10.1093\/bib\/bbae174"},{"key":"ref39\/cit39","doi-asserted-by":"publisher","DOI":"10.1039\/D2SC00821A"},{"key":"ref40\/cit40","doi-asserted-by":"publisher","DOI":"10.1088\/2632-2153\/ac7ddc"},{"key":"ref41\/cit41","doi-asserted-by":"publisher","DOI":"10.1016\/S1093-3263(98)80008-9"},{"key":"ref42\/cit42","doi-asserted-by":"publisher","DOI":"10.1016\/S1093-3263(98)00008-4"},{"key":"ref43\/cit43","doi-asserted-by":"publisher","DOI":"10.3390\/70800657"},{"key":"ref44\/cit44","doi-asserted-by":"crossref","first-page":"1","DOI":"10.18637\/jss.v028.i05","volume":"28","author":"Kuhn","year":"2008","journal-title":"J. Stat. Software"},{"key":"ref45\/cit45","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2015.08.010"},{"key":"ref46\/cit46","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-022-04967-9"},{"key":"ref47\/cit47","doi-asserted-by":"publisher","DOI":"10.21105\/joss.05996"},{"key":"ref48\/cit48","doi-asserted-by":"publisher","DOI":"10.1007\/s00044-014-1193-8"},{"key":"ref49\/cit49","doi-asserted-by":"publisher","DOI":"10.1080\/00401706.1969.10490666"},{"key":"ref50\/cit50","doi-asserted-by":"publisher","DOI":"10.1021\/ci970282v"},{"key":"ref51\/cit51","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-019-0398-8"},{"issue":"31","key":"ref52\/cit52","first-page":"5281","volume":"8","author":"Landrum G.","year":"2013","journal-title":"Greg Landrum"},{"key":"ref53\/cit53","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-022-10366-3"},{"key":"ref54\/cit54","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-021-00554-8"},{"key":"ref55\/cit55","doi-asserted-by":"publisher","DOI":"10.1063\/5.0196638"},{"key":"ref56\/cit56","doi-asserted-by":"publisher","DOI":"10.1002\/jcc.26468"},{"key":"ref57\/cit57","doi-asserted-by":"publisher","DOI":"10.1063\/5.0202240"},{"key":"ref58\/cit58","doi-asserted-by":"publisher","DOI":"10.1063\/5.0216776"},{"key":"ref59\/cit59","doi-asserted-by":"crossref","unstructured":"Pujal, L.; Tehrani, A.; Heidar-Zadeh, F.\n                      Conceptual Density Functional Theory\n                      ; John Wiley & Sons, Ltd, 2022; Chapter 32, pp 649\u2013661.","DOI":"10.1002\/9783527829941.ch32"},{"key":"ref60\/cit60","doi-asserted-by":"publisher","DOI":"10.1016\/j.cplett.2016.07.039"},{"key":"ref61\/cit61","doi-asserted-by":"publisher","DOI":"10.1002\/jcc.27034"},{"key":"ref62\/cit62","doi-asserted-by":"publisher","DOI":"10.1063\/5.0216781"},{"key":"ref63\/cit63","doi-asserted-by":"publisher","DOI":"10.1063\/5.0219015"},{"key":"ref64\/cit64","doi-asserted-by":"publisher","DOI":"10.1063\/5.0219010"},{"key":"ref65\/cit65","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jpca.4c07353"},{"key":"ref66\/cit66","doi-asserted-by":"publisher","DOI":"10.1002\/jcc.27170"},{"key":"ref67\/cit67","doi-asserted-by":"publisher","DOI":"10.1016\/j.cpc.2022.108334"},{"key":"ref68\/cit68","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pcbi.1006561"},{"key":"ref69\/cit69","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-020-2649-2"},{"key":"ref70\/cit70","unstructured":"Krekel, H.; Oliveira, B.; Pfannschmidt, R.; Bruynooghe, F.; Laugher, B.; Bruhin, F. pytest 8.4.0 2004 https:\/\/github.com\/pytest-dev\/pytest, Version 8.4.0 Contributors include Holger Krekel, Bruno Oliveira, Ronny Pfannschmidt, Floris Bruynooghe, Brianna Laugher, Florian Bruhin, and others."},{"key":"ref71\/cit71","doi-asserted-by":"publisher","DOI":"10.1089\/106652799318382"},{"key":"ref72\/cit72","doi-asserted-by":"publisher","DOI":"10.1007\/0-306-46873-5_6"},{"key":"ref73\/cit73","doi-asserted-by":"publisher","DOI":"10.1021\/ci025554v"},{"key":"ref74\/cit74","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-021-00505-3"},{"key":"ref75\/cit75","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-021-00504-4"},{"key":"ref76\/cit76","doi-asserted-by":"publisher","DOI":"10.1039\/D4DD00041B"},{"key":"ref77\/cit77","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jctc.4c00362"},{"key":"ref78\/cit78","doi-asserted-by":"publisher","DOI":"10.1186\/s13321-015-0069-3"},{"key":"ref79\/cit79","doi-asserted-by":"publisher","DOI":"10.1198\/004017002317375064"},{"key":"ref80\/cit80","doi-asserted-by":"publisher","DOI":"10.1038\/s41592-019-0686-2"},{"key":"ref81\/cit81","first-page":"2825","volume":"12","author":"Pedregosa F.","year":"2011","journal-title":"J. Mach. Learn. Res."},{"key":"ref82\/cit82","doi-asserted-by":"publisher","DOI":"10.18637\/jss.v102.i10"},{"key":"ref83\/cit83","doi-asserted-by":"publisher","DOI":"10.1021\/ci900159f"},{"key":"ref84\/cit84","doi-asserted-by":"publisher","DOI":"10.1021\/ci060074f"},{"key":"ref85\/cit85","doi-asserted-by":"publisher","DOI":"10.1002\/jcc.24423"},{"key":"ref86\/cit86","doi-asserted-by":"publisher","DOI":"10.1063\/5.0006498"},{"key":"ref87\/cit87","doi-asserted-by":"publisher","DOI":"10.1016\/j.chempr.2020.05.014"},{"key":"ref88\/cit88","doi-asserted-by":"publisher","DOI":"10.1063\/5.0038301"},{"key":"ref89\/cit89","doi-asserted-by":"publisher","DOI":"10.1039\/D2DD00008C"},{"key":"ref90\/cit90","author":"Anstine D.","year":"2024","journal-title":"ChemRxiv"},{"key":"ref91\/cit91","doi-asserted-by":"publisher","DOI":"10.1038\/s43588-023-00561-9"},{"key":"ref92\/cit92","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jpca.8b12005"},{"key":"ref93\/cit93","doi-asserted-by":"publisher","DOI":"10.1063\/5.0031199"},{"key":"ref94\/cit94","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-022-01330-5"},{"key":"ref95\/cit95","doi-asserted-by":"publisher","DOI":"10.1080\/00268970500460390"},{"key":"ref96\/cit96","volume-title":"Quantum Biochemistry","author":"Liu Y. L.","year":"2010"},{"key":"ref97\/cit97","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.2c01073"},{"key":"ref98\/cit98","unstructured":"Wang, P.; Shen, Y.; Guo, Z.; Stallone, M.; Kim, Y.; Golland, P.; Panda, R. Diversity Measurement and Subset Selection for Instruction Tuning Datasets, 2024, arXiv:2402.02318. arXiv.org e-Print archive https:\/\/arxiv.org\/abs\/2402.02318."},{"key":"ref99\/cit99","doi-asserted-by":"publisher","DOI":"10.1021\/acs.jcim.5c00497"}],"container-title":["Journal of Chemical Information and Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.5c01499","content-type":"application\/pdf","content-version":"vor","intended-application":"unspecified"},{"URL":"https:\/\/pubs.acs.org\/doi\/pdf\/10.1021\/acs.jcim.5c01499","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T09:12:40Z","timestamp":1770628360000},"score":1,"resource":{"primary":{"URL":"https:\/\/pubs.acs.org\/doi\/10.1021\/acs.jcim.5c01499"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,27]]},"references-count":99,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2026,2,9]]}},"alternative-id":["10.1021\/acs.jcim.5c01499"],"URL":"https:\/\/doi.org\/10.1021\/acs.jcim.5c01499","relation":{},"ISSN":["1549-9596","1549-960X"],"issn-type":[{"value":"1549-9596","type":"print"},{"value":"1549-960X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,1,27]]}}}