{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:40:11Z","timestamp":1755884411981,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T00:00:00Z","timestamp":1731542400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,11,14]]},"DOI":"10.1145\/3677052.3698616","type":"proceedings-article","created":{"date-parts":[[2024,11,14]],"date-time":"2024-11-14T06:38:06Z","timestamp":1731566286000},"page":"213-221","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Can an unsupervised clustering algorithm reproduce a categorization system?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7471-4422","authenticated-orcid":false,"given":"Nathalia M","family":"Castellanos","sequence":"first","affiliation":[{"name":"BlackRock, Inc., United States"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-5455-3851","authenticated-orcid":false,"given":"Sebastian","family":"Frank","sequence":"additional","affiliation":[{"name":"BlackRock, Inc., United States"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7728-0081","authenticated-orcid":false,"given":"Dhruv","family":"Desai","sequence":"additional","affiliation":[{"name":"BlackRock, Inc., United States"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-8005-3207","authenticated-orcid":false,"given":"Stefano","family":"Pasquali","sequence":"additional","affiliation":[{"name":"BlackRock, Inc., United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1040-9032","authenticated-orcid":false,"given":"Dhagash","family":"Mehta","sequence":"additional","affiliation":[{"name":"BlackRock, Inc., United States"}]}],"member":"320","published-online":{"date-parts":[[2024,11,14]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/312129.312279"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1111\/1475-679X.00054"},{"key":"e_1_3_2_1_3_1","volume-title":"Random forests. Machine learning 45","author":"Breiman Leo","year":"2001","unstructured":"Leo Breiman. 2001. Random forests. Machine learning 45 (2001), 5\u201332."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1080\/03610927408827101"},{"volume-title":"Investment valuation: Tools and techniques for determining the value of any asset. Vol.\u00a0666","author":"Damodaran Aswath","key":"e_1_3_2_1_5_1","unstructured":"Aswath Damodaran. 2012. Investment valuation: Tools and techniques for determining the value of any asset. Vol.\u00a0666. John Wiley & Sons."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.1979.4766909"},{"key":"e_1_3_2_1_7_1","first-page":"1","article-title":"metric-learn: Metric learning algorithms in python","volume":"21","author":"De\u00a0Vazelhes William","year":"2020","unstructured":"William De\u00a0Vazelhes, CJ Carey, Yuan Tang, Nathalie Vauquier, and Aur\u00e9lien Bellet. 2020. metric-learn: Metric learning algorithms in python. Journal of Machine Learning Research 21, 138 (2020), 1\u20136.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3604237.3626878"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.3905\/jfds.2021.3.4.130"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1111\/1540-6261.00545"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2022.104743"},{"key":"e_1_3_2_1_12_1","volume-title":"Open Set Recognition for Random Forest. arXiv preprint arXiv:2408.02684","author":"Feng Guanchao","year":"2024","unstructured":"Guanchao Feng, Dhruv Desai, Stefano Pasquali, and Dhagash Mehta. 2024. Open Set Recognition for Random Forest. arXiv preprint arXiv:2408.02684 (2024)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1983.10478008"},{"key":"e_1_3_2_1_14_1","first-page":"32142","article-title":"Adbench: Anomaly detection benchmark","volume":"35","author":"Han Songqiao","year":"2022","unstructured":"Songqiao Han, Xiyang Hu, Hailiang Huang, Minqi Jiang, and Yue Zhao. 2022. Adbench: Anomaly detection benchmark. Advances in Neural Information Processing Systems 35 (2022), 32142\u201332159.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.3905\/joi.2001.319455"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01908075"},{"volume-title":"An introduction to statistical learning. Vol.\u00a0112","author":"James Gareth","key":"e_1_3_2_1_17_1","unstructured":"Gareth James, Daniela Witten, Trevor Hastie, Robert Tibshirani, 2013. An introduction to statistical learning. Vol.\u00a0112. Springer."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3533271.3561736"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.physa.2016.06.094"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPEICES.2016.7853264"},{"key":"e_1_3_2_1_21_1","first-page":"38","article-title":"Performance evaluation of distance metrics in the clustering algorithms","volume":"13","author":"Kumar Vijay","year":"2014","unstructured":"Vijay Kumar, Jitender\u00a0Kumar Chhabra, and Dinesh Kumar. 2014. Performance evaluation of distance metrics in the clustering algorithms. INFOCOMP Journal of Computer Science 13, 1 (2014), 38\u201352.","journal-title":"INFOCOMP Journal of Computer Science"},{"key":"e_1_3_2_1_22_1","volume-title":"Quantile Regression using Random Forest Proximities. arXiv preprint arXiv:2408.02355","author":"Li Mingshu","year":"2024","unstructured":"Mingshu Li, Bhaskarjit Sarmah, Dhruv Desai, Joshua Rosaler, Snigdha Bhagat, Philip Sommer, and Dhagash Mehta. 2024. Quantile Regression using Random Forest Proximities. arXiv preprint arXiv:2408.02355 (2024)."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3606274.3606277"},{"key":"e_1_3_2_1_24_1","series-title":"Series A (2008-) 80","volume-title":"On the generalized distance in statistics. Sankhy\u0101: The Indian Journal of Statistics","author":"Mahalanobis Prasanta\u00a0Chandra","year":"2018","unstructured":"Prasanta\u00a0Chandra Mahalanobis. 2018. On the generalized distance in statistics. Sankhy\u0101: The Indian Journal of Statistics, Series A (2008-) 80 (2018), S1\u2013S7."},{"key":"e_1_3_2_1_25_1","volume-title":"Categorizing mutual funds using clusters. Advances in Quantitative analysis of Finance and Accounting 7, 1","author":"Marathe Achla","year":"1999","unstructured":"Achla Marathe and Hany\u00a0A Shawky. 1999. Categorizing mutual funds using clusters. Advances in Quantitative analysis of Finance and Accounting 7, 1 (1999), 199\u2013204."},{"key":"e_1_3_2_1_26_1","unstructured":"Kolby\u00a0Nottingham Markelle\u00a0Kelly Rachel\u00a0Longjohn. 2017. The UCI Machine Learning Repository. https:\/\/archive.ics.uci.edu"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383455.3422555"},{"key":"e_1_3_2_1_28_1","volume-title":"Matthew\u00a0J Hirn","author":"Moon R","year":"2019","unstructured":"Kevin\u00a0R Moon, David Van\u00a0Dijk, Zheng Wang, Scott Gigante, Daniel\u00a0B Burkhardt, William\u00a0S Chen, Kristina Yim, Antonia van\u00a0den Elzen, Matthew\u00a0J Hirn, Ronald\u00a0R Coifman, 2019. Visualizing structure and transitions in high-dimensional biological data. Nature biotechnology 37, 12 (2019), 1482\u20131492."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ejor.2004.12.018"},{"key":"e_1_3_2_1_30_1","unstructured":"Morningstar. 2018. \"Morningstar Categorization.\". https:\/\/www.morningstar.com\/content\/dam\/marketing\/shared\/research\/methodology\/860250-GlobalCategoryClassifications.pdf"},{"key":"e_1_3_2_1_31_1","volume-title":"Scikit-learn: Machine learning in Python. the Journal of machine Learning research 12","author":"Pedregosa Fabian","year":"2011","unstructured":"Fabian Pedregosa, Ga\u00ebl Varoquaux, Alexandre Gramfort, Vincent Michel, Bertrand Thirion, Olivier Grisel, Mathieu Blondel, Peter Prettenhofer, Ron Weiss, Vincent Dubourg, 2011. Scikit-learn: Machine learning in Python. the Journal of machine Learning research 12 (2011), 2825\u20132830."},{"key":"e_1_3_2_1_32_1","unstructured":"Refinitiv. 2016. Rifinitive. \"Lipper Fund Research.\". https:\/\/lipperalpha.refinitiv.com\/wp-content\/uploads\/2016\/01\/Lipper-U.S.-Mutual-Fund-Classifications-Definitions-Document-version-1.2-August-15-2020.pdf"},{"key":"e_1_3_2_1_33_1","unstructured":"Jake\u00a0S. Rhodes Adele Cutler and Kevin\u00a0R. Moon. 2023. Geometry- and Accuracy-Preserving Random Forest Proximities. arxiv:2201.12682\u00a0[stat.ML]"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/SSP49050.2021.9513749"},{"key":"e_1_3_2_1_35_1","volume-title":"Towards Enhanced Local Explainability of Random Forests: A Proximity-Based Approach. arXiv preprint arXiv:2310.12428","author":"Rosaler Joshua","year":"2023","unstructured":"Joshua Rosaler, Dhruv Desai, Bhaskarjit Sarmah, Dimitrios Vamvourellis, Deran Onay, Dhagash Mehta, and Stefano Pasquali. 2023. Towards Enhanced Local Explainability of Random Forests: A Proximity-Based Approach. arXiv preprint arXiv:2310.12428 (2023)."},{"volume-title":"Jason Eisner (Ed.)","author":"Rosenberg Andrew","key":"e_1_3_2_1_36_1","unstructured":"Andrew Rosenberg and Julia Hirschberg. 2007. V-Measure: A Conditional Entropy-Based External Cluster Evaluation Measure, Jason Eisner (Ed.). Association for Computational Linguistics, 410\u2013420. https:\/\/aclanthology.org\/D07-1043"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1016\/0377-0427(87)90125-7"},{"key":"e_1_3_2_1_38_1","volume-title":"Machine Learning-based Relative Valuation of Municipal Bonds. arXiv preprint arXiv:2408.02273","author":"Saha Preetha","year":"2024","unstructured":"Preetha Saha, Jingrao Lyu, Dhruv Desai, Rishab Chauhan, Jerinsh Jeyapaulraj, Philip Sommer, and Dhagash Mehta. 2024. Machine Learning-based Relative Valuation of Municipal Bonds. arXiv preprint arXiv:2408.02273 (2024)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2015.08.251"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.08.017"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1111\/1467-9868.00293"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.5555\/1756006.1953024"},{"volume-title":"Distance metric learning, with application to clustering with side-information(NIPS\u201902)","author":"Xing P.","key":"e_1_3_2_1_43_1","unstructured":"Eric\u00a0P. Xing, Andrew\u00a0Y. Ng, Michael\u00a0I. Jordan, and Stuart Russell. 2002. Distance metric learning, with application to clustering with side-information(NIPS\u201902). MIT Press, Cambridge, MA, USA, 521\u2013528."},{"key":"e_1_3_2_1_44_1","volume-title":"Critics, Counter-factuals and Semi-factuals. arXiv preprint arXiv:2408.06679","author":"Yampolsky Gregory","year":"2024","unstructured":"Gregory Yampolsky, Dhruv Desai, Mingshu Li, Stefano Pasquali, and Dhagash Mehta. 2024. Case-based Explainability for Random Forest: Prototypes, Critics, Counter-factuals and Semi-factuals. arXiv preprint arXiv:2408.06679 (2024)."}],"event":{"name":"ICAIF '24: 5th ACM International Conference on AI in Finance","acronym":"ICAIF '24","location":"Brooklyn NY USA"},"container-title":["Proceedings of the 5th ACM International Conference on AI in Finance"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677052.3698616","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3677052.3698616","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:11:27Z","timestamp":1755882687000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677052.3698616"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,14]]},"references-count":44,"alternative-id":["10.1145\/3677052.3698616","10.1145\/3677052"],"URL":"https:\/\/doi.org\/10.1145\/3677052.3698616","relation":{},"subject":[],"published":{"date-parts":[[2024,11,14]]},"assertion":[{"value":"2024-11-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}