{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T12:54:04Z","timestamp":1765371244560,"version":"3.46.0"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T00:00:00Z","timestamp":1763424000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T00:00:00Z","timestamp":1763424000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"DARPA Synergistic Discovery and Design (SD2) program","award":["FA8750-17-C-005"],"award-info":[{"award-number":["FA8750-17-C-005"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Appl. and Comput. Topology"],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s41468-025-00225-9","type":"journal-article","created":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T17:16:11Z","timestamp":1763486171000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A pipeline for data-driven learning of topological features with applications to protein stability prediction"],"prefix":"10.1007","volume":"9","author":[{"given":"Amish","family":"Mishra","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Francis C.","family":"Motta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,11,18]]},"reference":[{"key":"225_CR1","unstructured":"Adams, H., Emerson, T., Kirby, M., et\u00a0al.: Persistence images: A stable vector representation of persistent homology. Journal of Machine Learning Research 18 (2017)"},{"issue":"6","key":"225_CR2","doi-asserted-by":"publisher","first-page":"3031","DOI":"10.1021\/acs.jctc.7b00125","volume":"13","author":"RF Alford","year":"2017","unstructured":"Alford, R.F., Leaver-Fay, A., Jeliazkov, J.R., et al.: The rosetta all-atom energy function for macromolecular modeling and design. J. Chem. Theory Comput. 13(6), 3031\u20133048 (2017)","journal-title":"J. Chem. Theory Comput."},{"issue":"6","key":"225_CR3","doi-asserted-by":"publisher","first-page":"2644","DOI":"10.1109\/TAES.2016.160405","volume":"52","author":"P Bendich","year":"2016","unstructured":"Bendich, P., Chin, S.P., Clark, J., et al.: Topological and statistical behavior classifiers for tracking applications. IEEE Trans. Aerosp. Electron. Syst. 52(6), 2644\u20132661 (2016). https:\/\/doi.org\/10.1109\/TAES.2016.160405","journal-title":"IEEE Trans. Aerosp. Electron. Syst."},{"key":"225_CR4","doi-asserted-by":"publisher","unstructured":"Berman, H.M., Westbrook, J., Feng, Z., et al.: The Protein Data Bank. Nucleic Acids Res. 28(1), 235\u2013242 (2000). https:\/\/doi.org\/10.1093\/nar\/28.1.235. https:\/\/academic.oup.com\/nar\/article-pdf\/28\/1\/235\/9895144\/280235.pdf","DOI":"10.1093\/nar\/28.1.235"},{"key":"225_CR5","doi-asserted-by":"publisher","unstructured":"Beygelzimer, A., Kakade, S., Langford, J.: Cover trees for nearest neighbor. In: Proceedings of the 23rd International Conference on Machine Learning. Association for Computing Machinery, New York, NY, USA, ICML \u201906, p 97\u2013104 (2006). https:\/\/doi.org\/10.1145\/1143844.1143857","DOI":"10.1145\/1143844.1143857"},{"key":"225_CR6","doi-asserted-by":"publisher","unstructured":"Blaabjerg, LM., Kassem, MM., Good, LL., et\u00a0al.: Rapid protein stability prediction using deep learning representations. eLife 12:e82593 (2023). https:\/\/doi.org\/10.7554\/eLife.82593","DOI":"10.7554\/eLife.82593"},{"key":"225_CR7","doi-asserted-by":"crossref","unstructured":"Brand, A., Allen, L., Altman, M., et\u00a0al.: Beyond authorship: Attribution, contribution, collaboration, and credit. Learned Publishing 28(2) (2015)","DOI":"10.1087\/20150211"},{"issue":"1","key":"225_CR8","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1023\/a:1010933404324","volume":"45","author":"L Breiman","year":"2001","unstructured":"Breiman, L.: Random forests. Mach. Learn. 45(1), 5\u201332 (2001). https:\/\/doi.org\/10.1023\/a:1010933404324","journal-title":"Mach. Learn."},{"issue":"1","key":"225_CR9","first-page":"77","volume":"16","author":"P Bubenik","year":"2015","unstructured":"Bubenik, P.: Statistical topological data analysis using persistence landscapes. J. Mach. Learn. Res. 16(1), 77\u2013102 (2015)","journal-title":"J. Mach. Learn. Res."},{"key":"225_CR10","doi-asserted-by":"publisher","unstructured":"Cang, Z., Mu, L., Wu, K., et\u00a0al.: A topological approach for protein classification. Computational and Mathematical Biophysics 3(1).(2015) https:\/\/doi.org\/10.1515\/mlbmb-2015-0009","DOI":"10.1515\/mlbmb-2015-0009"},{"key":"225_CR11","doi-asserted-by":"publisher","first-page":"255","DOI":"10.1090\/S0273-0979-09-01249-X","volume":"46","author":"G Carlsson","year":"2009","unstructured":"Carlsson, G.: Topology and data. Bulletin of The American Mathematical Society - BULL AMER MATH SOC 46, 255\u2013308 (2009). https:\/\/doi.org\/10.1090\/S0273-0979-09-01249-X","journal-title":"Bulletin of The American Mathematical Society - BULL AMER MATH SOC"},{"key":"225_CR12","doi-asserted-by":"crossref","unstructured":"Chavent, M., Lacaille, J., Mourer, A., et\u00a0al.: Handling Correlations in Random Forests: which Impacts on Variable Importance and Model Interpretability? In: ESANN 2021 - European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning, Bruges, Belgium, (2021) https:\/\/hal.science\/hal-03483385","DOI":"10.14428\/esann\/2021.ES2021-155"},{"key":"225_CR13","doi-asserted-by":"publisher","unstructured":"Chung, Y.M., Hu, C.S., Lo, Y.L., et al.: A persistent homology approach to heart rate variability analysis with an application to sleep-wake classification. Front. Physiol. 12,(2021). https:\/\/doi.org\/10.3389\/fphys.2021.637684. https:\/\/www.frontiersin.org\/article\/10.3389\/fphys.2021.637684","DOI":"10.3389\/fphys.2021.637684"},{"key":"225_CR14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-33259-6_7","author":"H Edelsbrunner","year":"2010","unstructured":"Edelsbrunner, H., Harer, J.: Computational Topology: An Introduction. (2010). https:\/\/doi.org\/10.1007\/978-3-540-33259-6_7","journal-title":"Computational Topology: An Introduction."},{"key":"225_CR15","doi-asserted-by":"crossref","unstructured":"Edelsbrunner, H., Letscher, D., Zomorodian, A.: Topological persistence and simplification. Discrete & Computational Geometry 28:511\u2013533 (2002). https:\/\/api.semanticscholar.org\/CorpusID:9191014","DOI":"10.1007\/s00454-002-2885-2"},{"issue":"10","key":"225_CR16","doi-asserted-by":"publisher","first-page":"7112","DOI":"10.1109\/TPAMI.2021.3095381","volume":"44","author":"A Elnaggar","year":"2021","unstructured":"Elnaggar, A., Heinzinger, M., Dallago, C., et al.: Prottrans: Toward understanding the language of life through self-supervised learning. IEEE Trans. Pattern Anal. Mach. Intell. 44(10), 7112\u20137127 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"225_CR17","doi-asserted-by":"publisher","unstructured":"Fang, J.: A critical review of five machine learning-based algorithms for predicting protein stability changes upon mutation. Brief. Bioinform. 21(4), 1285\u20131292 (2019). https:\/\/doi.org\/10.1093\/bib\/bbz071. https:\/\/arxiv.org\/abs\/academic.oup.com\/bib\/article-pdf\/21\/4\/1285\/33584078\/bbz071.pdf","DOI":"10.1093\/bib\/bbz071"},{"key":"225_CR18","doi-asserted-by":"publisher","first-page":"659","DOI":"10.1007\/s11222-016-9646-1","volume":"27","author":"B Gregorutti","year":"2017","unstructured":"Gregorutti, B., Michel, B., Saint-Pierre, P.: Correlation and variable importance in random forests. Stat. Comput. 27, 659\u2013678 (2017)","journal-title":"Stat. Comput."},{"key":"225_CR19","unstructured":"Hatcher, A.: Algebraic Topology. Algebraic Topology, Cambridge University Press (2002). https:\/\/books.google.com\/books?id=BjKs86kosqgC"},{"key":"225_CR20","unstructured":"Jakubowski, H., Flatt, P.: Secondary Structural Motifs and Domains. [Online; accessed 2024-07-02] (2023)"},{"key":"225_CR21","doi-asserted-by":"publisher","unstructured":"Jiang, P., Lugo-Martinez, J.: Combined topological data analysis and geometric deep learning reveal niches by the quantification of protein binding pockets. bioRxiv (2023). https:\/\/doi.org\/10.1101\/2023.08.25.554762. https:\/\/www.biorxiv.org\/content\/early\/2023\/08\/27\/2023.08.25.554762","DOI":"10.1101\/2023.08.25.554762"},{"issue":"11","key":"225_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pcbi.1008291","volume":"16","author":"B Li","year":"2020","unstructured":"Li, B., Yang, Y.T., Capra, J.A., et al.: Predicting changes in protein thermodynamic stability upon point mutation with deep 3d convolutional neural networks. PLoS Comput. Biol. 16(11), 1\u201324 (2020). https:\/\/doi.org\/10.1371\/journal.pcbi.1008291","journal-title":"PLoS Comput. Biol."},{"key":"225_CR23","doi-asserted-by":"publisher","unstructured":"Li, L., Thompson, C., Henselman-Petrusek, G., et al.: Minimal cycle representatives in persistent homology using linear programming: An empirical study with user\u2019s guide. Frontiers in Artificial Intelligence 4,(2021). https:\/\/doi.org\/10.3389\/frai.2021.681117. https:\/\/www.frontiersin.org\/journals\/artificial-intelligence\/articles\/10.3389\/frai.2021.681117","DOI":"10.3389\/frai.2021.681117"},{"key":"225_CR24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-44199-2_28","author":"C Maria","year":"2014","unstructured":"Maria, C., Boissonnat, J.D., Glisse, M., et al.: The gudhi library: Simplicial complexes and persistent homology. (2014). https:\/\/doi.org\/10.1007\/978-3-662-44199-2_28","journal-title":"The gudhi library: Simplicial complexes and persistent homology."},{"key":"225_CR25","unstructured":"Mishra, A.: Learning topological features for protein stability prediction (2023a). https:\/\/github.com\/amish-mishra\/CDER-protein-classifier"},{"key":"225_CR26","unstructured":"Mishra, A.: Topological data analysis for data science: The delaunay-rips complex, triangulation stabilities, and protein stability predictions. Phd thesis, Florida Atlantic University, Boca Raton, FL, available at (2023b). https:\/\/fau.digital.flvc.org\/islandora\/object\/fau%3A99072"},{"key":"225_CR27","doi-asserted-by":"publisher","unstructured":"Mishra, A., Motta, F.C.: Stability and machine learning applications of persistent homology using the delaunay-rips complex. Frontiers in Applied Mathematics and Statistics 9,(2023). https:\/\/doi.org\/10.3389\/fams.2023.1179301. https:\/\/www.frontiersin.org\/articles\/10.3389\/fams.2023.1179301","DOI":"10.3389\/fams.2023.1179301"},{"key":"225_CR28","doi-asserted-by":"publisher","unstructured":"Motta, F., Tralie, C., Bedini, R., et\u00a0al.: Hyperparameter optimization of topological features for machine learning applications. In: 2019 18th IEEE International Conference On Machine Learning And Applications (ICMLA), pp 1107\u20131114 (2019). https:\/\/doi.org\/10.1109\/ICMLA.2019.00185","DOI":"10.1109\/ICMLA.2019.00185"},{"key":"225_CR29","doi-asserted-by":"publisher","unstructured":"Nielsen, SV., Schenstr\u00f8m, SM., Christensen, CE., et\u00a0al.: Chapter 6 - protein destabilization and degradation as a mechanism for hereditary disease. In: Pey AL (ed) Protein Homeostasis Diseases. Academic Press, p 111\u2013125,(2020). https:\/\/doi.org\/10.1016\/B978-0-12-819132-3.00006-3. https:\/\/www.sciencedirect.com\/science\/article\/pii\/B9780128191323000063","DOI":"10.1016\/B978-0-12-819132-3.00006-3"},{"key":"225_CR30","doi-asserted-by":"publisher","unstructured":"Otter, N., Porter, MA., Tillmann, U., et\u00a0al.: A roadmap for the computation of persistent homology. EPJ Data Science 6(17)(2017). https:\/\/doi.org\/10.1140\/epjds\/s13688-017-0109-5","DOI":"10.1140\/epjds\/s13688-017-0109-5"},{"key":"225_CR31","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F., Varoquaux, G., Gramfort, A., et al.: Scikit-learn: Machine learning in Python. J. Mach. Learn. Res. 12, 2825\u20132830 (2011)","journal-title":"J. Mach. Learn. Res."},{"issue":"4","key":"225_CR32","doi-asserted-by":"publisher","first-page":"1215","DOI":"10.1007\/s10208-022-09567-7","volume":"23","author":"JA Perea","year":"2023","unstructured":"Perea, J.A., Munch, E., Khasawneh, F.A.: Approximating continuous functions on persistence diagrams using template functions. Found. Comput. Math. 23(4), 1215\u20131272 (2023)","journal-title":"Found. Comput. Math."},{"key":"225_CR33","doi-asserted-by":"publisher","unstructured":"Polanco, L., Perea, JA.: Adaptive template systems: Data-driven feature selection for learning with persistence diagrams. In: 2019 18th IEEE International Conference On Machine Learning And Applications (ICMLA), pp 1115\u20131121, (2019). https:\/\/doi.org\/10.1109\/ICMLA.2019.00186","DOI":"10.1109\/ICMLA.2019.00186"},{"key":"225_CR34","doi-asserted-by":"crossref","unstructured":"Pun, CS., Xia, K., Lee, SX.: Persistent-homology-based machine learning and its applications \u2013 a survey. arXiv: Algebraic Topology (2018)","DOI":"10.2139\/ssrn.3275996"},{"key":"225_CR35","doi-asserted-by":"publisher","unstructured":"Reininghaus, J., Huber, S., Bauer, U., et\u00a0al.: A stable multi-scale kernel for topological machine learning. In: 2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp 4741\u20134748, (2015). https:\/\/doi.org\/10.1109\/CVPR.2015.7299106","DOI":"10.1109\/CVPR.2015.7299106"},{"key":"225_CR36","doi-asserted-by":"publisher","unstructured":"Rocklin, G.J., Chidyausiku, T.M., Goreshnik, I., et al.: Global analysis of protein folding using massively parallel design, synthesis, and testing. Science 357(6347), 168\u2013175 (2017). https:\/\/doi.org\/10.1126\/science.aan0693. https:\/\/www.science.org\/doi\/abs\/10.1126\/science.aan0693","DOI":"10.1126\/science.aan0693"},{"key":"225_CR37","unstructured":"Rouvreau, V.: Alpha complex. In: GUDHI User and Reference Manual, 3rd edn. GUDHI Editorial Board, (2023) https:\/\/gudhi.inria.fr\/doc\/3.7.1\/group__alpha__complex.html"},{"key":"225_CR38","doi-asserted-by":"publisher","unstructured":"Singer, J., Novotney, S., Strickland, D., et\u00a0al.: Large-scale design and refinement of stable proteins using sequence-only models. PloS one 17(3 March) (2022). https:\/\/doi.org\/10.1371\/journal.pone.0265020","DOI":"10.1371\/journal.pone.0265020"},{"key":"225_CR39","unstructured":"Smith, A., Bendich, P., Harer, J., et\u00a0al.: Supervised learning of labeled pointcloud differences via cover-tree entropy reduction. (2017). arXiv preprint arXiv:1702.07959"},{"issue":"4","key":"225_CR40","doi-asserted-by":"publisher","first-page":"649","DOI":"10.1042\/EBC20190042","volume":"64","author":"EJ Stollar","year":"2020","unstructured":"Stollar, E.J., Smith, D.P.: Uncovering protein structure. Essays Biochem. 64(4), 649\u2013680 (2020)","journal-title":"Essays Biochem."},{"key":"225_CR41","unstructured":"Swenson, N., Krishnapriyan, AS., Buluc, A., et\u00a0al.: Persgnn: applying topological data analysis and geometric deep learning to structure-based protein function prediction (2020). arXiv preprint arXiv:2010.16027"},{"key":"225_CR42","doi-asserted-by":"publisher","unstructured":"Wang, Z., Li, Q., Li, G., et\u00a0al.: Polynomial representation for persistence diagram. In: 2019 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp 6116\u20136125, (2019). https:\/\/doi.org\/10.1109\/CVPR.2019.00628","DOI":"10.1109\/CVPR.2019.00628"},{"key":"225_CR43","unstructured":"Zomorodian, A.: Computational Topology, 2nd edn., Chapman & Hall\/CRC, p\u00a03 (2010)"},{"key":"225_CR44","doi-asserted-by":"publisher","first-page":"249","DOI":"10.1007\/s00454-004-1146-y","volume":"33","author":"A Zomorodian","year":"2005","unstructured":"Zomorodian, A., Carlsson, G.: Computing persistent homology. Discret. Comput. Geom. 33, 249\u2013274 (2005). https:\/\/doi.org\/10.1007\/s00454-004-1146-y","journal-title":"Discret. Comput. Geom."}],"container-title":["Journal of Applied and Computational Topology"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41468-025-00225-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s41468-025-00225-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s41468-025-00225-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T09:52:33Z","timestamp":1765360353000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s41468-025-00225-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,18]]},"references-count":44,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["225"],"URL":"https:\/\/doi.org\/10.1007\/s41468-025-00225-9","relation":{},"ISSN":["2367-1726","2367-1734"],"issn-type":[{"type":"print","value":"2367-1726"},{"type":"electronic","value":"2367-1734"}],"subject":[],"published":{"date-parts":[[2025,11,18]]},"assertion":[{"value":"20 January 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 November 2025","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 November 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 November 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing Interests"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval and consent to participate"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Materials availability"}}],"article-number":"27"}}