{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:53:05Z","timestamp":1775159585987,"version":"3.50.1"},"reference-count":159,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,8,1]],"date-time":"2026-08-01T00:00:00Z","timestamp":1785542400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computer Science Review"],"published-print":{"date-parts":[[2026,8]]},"DOI":"10.1016\/j.cosrev.2026.100974","type":"journal-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T17:35:10Z","timestamp":1775151310000},"page":"100974","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["From embeddings to interpretations: A comprehensive review of language models in clustering"],"prefix":"10.1016","volume":"61","author":[{"given":"Nazila","family":"Pourhaji Aghayengejeh","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5898-0871","authenticated-orcid":false,"given":"M.A.","family":"Balafar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jafar","family":"Tanha","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Narjes","family":"Nikzad Khasmakhi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"issue":"9","key":"10.1016\/j.cosrev.2026.100974_bib0005","doi-asserted-by":"crossref","first-page":"5047","DOI":"10.1007\/s10115-024-02120-8","article-title":"An analysis of large language models: their impact and potential applications","volume":"66","author":"Bharathi Mohan","year":"2024","journal-title":"Knowl. Inf. Syst."},{"issue":"2","key":"10.1016\/j.cosrev.2026.100974_bib0010","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3605943","article-title":"Recent advances in natural language processing via large pre-trained language models: a survey","volume":"56","author":"Min","year":"2023","journal-title":"ACM Comput. Surv."},{"issue":"8","key":"10.1016\/j.cosrev.2026.100974_bib0015","doi-asserted-by":"crossref","first-page":"4557","DOI":"10.1007\/s11831-024-10115-5","article-title":"Demystifying Chatgpt: an in-depth survey of openai\u2019s robust large language models","volume":"31","author":"Bhattacharya","year":"2024","journal-title":"Arch. Comput. Methods Eng."},{"key":"10.1016\/j.cosrev.2026.100974_bib0020","doi-asserted-by":"crossref","DOI":"10.1109\/ACCESS.2025.3600739","article-title":"From Transformers to Chatgpt: an analysis of large language models research","volume":"13","author":"Cotfas","year":"2025","journal-title":"IEEE Access"},{"key":"10.1016\/j.cosrev.2026.100974_bib0025","series-title":"Mining Text Data","first-page":"77","article-title":"A survey of text clustering algorithms","author":"Aggarwal","year":"2012"},{"issue":"1","key":"10.1016\/j.cosrev.2026.100974_bib0030","doi-asserted-by":"crossref","first-page":"95","DOI":"10.1007\/s10791-025-09590-6","article-title":"Llm-based embeddings for clustering and predicting integrated reporting quality levels of companies","volume":"28","author":"Sarioglu","year":"2025","journal-title":"Discover Comput."},{"key":"10.1016\/j.cosrev.2026.100974_bib0035","doi-asserted-by":"crossref","DOI":"10.1016\/j.cosrev.2025.100792","article-title":"Revolutionizing textual data insights: a comprehensive review of the dual relationship between transformers and clustering in textual data analysis","volume":"58","author":"Aghayengejeh","year":"2025","journal-title":"Comput. Sci. Rev."},{"issue":"4","key":"10.1016\/j.cosrev.2026.100974_bib0040","first-page":"2187","article-title":"An integrated clustering and BERT framework for improved topic modeling","volume":"15","author":"George","year":"2023","journal-title":"Int. J. Inf. Technol."},{"key":"10.1016\/j.cosrev.2026.100974_bib0045","doi-asserted-by":"crossref","first-page":"192","DOI":"10.1016\/j.eswa.2019.05.030","article-title":"Text document clustering using spectral clustering algorithm with particle swarm optimization","volume":"134","author":"Janani","year":"2019","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.cosrev.2026.100974_bib0050","series-title":"Proceedings of the IEEE International Conference on Intelligent Systems and Knowledge Engineering","article-title":"A clustering-based approach on sentiment analysis","author":"Li","year":"2010"},{"key":"10.1016\/j.cosrev.2026.100974_bib0055","series-title":"Proceedings of the 2018 Federated Conference on Computer Science and Information Systems (FedCSIS)","article-title":"What was the question? A systematization of information retrieval and NLP problems","author":"D\u00f6rpinghaus","year":"2018"},{"key":"10.1016\/j.cosrev.2026.100974_bib0060","author":"Wang"},{"key":"10.1016\/j.cosrev.2026.100974_bib0065","series-title":"Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing","article-title":"Llmedgerefine: enhancing text clustering with llm-based boundary point refinement","author":"Feng","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100974_bib0070","series-title":"Proceedings of the 2025 5th International Conference on Advanced Research in Computing (ICARC)","article-title":"Intelligent document clustering for sinhala text: combining llm-based NER with fuzzy clustering techniques","author":"Rathnayake","year":"2025"},{"issue":"1","key":"10.1016\/j.cosrev.2026.100974_bib0075","doi-asserted-by":"crossref","DOI":"10.1098\/rsos.241692","article-title":"Human-interpretable clustering of short text using large language models","volume":"12","author":"Miller","year":"2025","journal-title":"R. Soc. Open Sci."},{"key":"10.1016\/j.cosrev.2026.100974_bib0080","series-title":"Recent Advances in Universal Text Embeddings: A Comprehensive Review of Top-Performing Methods on the Mteb Benchmark","author":"Cao","year":"2023"},{"issue":"1","key":"10.1016\/j.cosrev.2026.100974_bib0085","doi-asserted-by":"crossref","first-page":"342","DOI":"10.3390\/app13010342","article-title":"Short text clustering algorithms, application and challenges: a survey","volume":"13","author":"Ahmed","year":"2022","journal-title":"Appl. Sci."},{"issue":"3","key":"10.1016\/j.cosrev.2026.100974_bib0090","doi-asserted-by":"crossref","first-page":"239","DOI":"10.1007\/s41060-024-00540-x","article-title":"A comprehensive and analytical review of text clustering techniques","volume":"18","author":"Mehta","year":"2024","journal-title":"Int. J. Data Sci. Anal."},{"key":"10.1016\/j.cosrev.2026.100974_bib0095","series-title":"Cambridge Forum on AI: Law and Governance","article-title":"The architecture of language: understanding the mechanics behind llms","author":"Ferraris","year":"2025"},{"key":"10.1016\/j.cosrev.2026.100974_bib0100","series-title":"First Conference on Language Modeling","article-title":"On limitations of the transformer architecture","author":"Peng","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100974_bib0105","doi-asserted-by":"crossref","first-page":"101630","DOI":"10.1109\/ACCESS.2023.3313420","article-title":"Prostate segmentation in MRI using transformer encoder and decoder framework","volume":"11","author":"Ren","year":"2023","journal-title":"IEEE Access"},{"issue":"5","key":"10.1016\/j.cosrev.2026.100974_bib0110","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3744746","article-title":"A comprehensive overview of large language models","volume":"16","author":"Naveed","year":"2025","journal-title":"ACM Trans. Intell. Syst. Technol."},{"key":"10.1016\/j.cosrev.2026.100974_bib0115","series-title":"Advances in Neural Information Processing Systems","article-title":"Attention is all you need","volume":"vol. 30","author":"Vaswani","year":"2017"},{"key":"10.1016\/j.cosrev.2026.100974_bib0120","author":"Gao"},{"key":"10.1016\/j.cosrev.2026.100974_bib0125","series-title":"International Conference on Database Theory","first-page":"420","article-title":"On the surprising behavior of distance metrics in high dimensional space","author":"Aggarwal","year":"2001"},{"key":"10.1016\/j.cosrev.2026.100974_bib0130","series-title":"Introduction to Information Retrieval","author":"Manning","year":"2008"},{"key":"10.1016\/j.cosrev.2026.100974_bib0135","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.127046","article-title":"Joint u-nets with hierarchical graph structure and sparse transformer for hyperspectral image classification","volume":"275","author":"Zhu","year":"2025","journal-title":"Expert Syst. Appl."},{"issue":"17","key":"10.1016\/j.cosrev.2026.100974_bib0140","doi-asserted-by":"crossref","first-page":"1598","DOI":"10.1007\/s11227-025-08098-6","article-title":"Hybrid Cnn-transformer network with multi-scale attention for enhanced image compressive sensing","volume":"81","author":"Ma","year":"2025","journal-title":"J. Supercomput."},{"key":"10.1016\/j.cosrev.2026.100974_bib0145","doi-asserted-by":"crossref","DOI":"10.1016\/j.optlastec.2025.114182","article-title":"Channel-wise transformer with spectral-spatial gated self-attention for hyperspectral image classification","volume":"193","author":"Zhu","year":"2026","journal-title":"Opt. Laser Technol."},{"key":"10.1016\/j.cosrev.2026.100974_bib0150","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2025.107504","article-title":"Interpretable unsupervised neural network structure for data clustering via differentiable reconstruction of onmf and sparse autoencoder","author":"Gai","year":"2025","journal-title":"Neural Netw."},{"key":"10.1016\/j.cosrev.2026.100974_bib0155","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.119814","article-title":"Noise-tolerant clustering via joint doubly stochastic matrix regularization and dual sparse coding","volume":"222","author":"Shi","year":"2023","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.cosrev.2026.100974_bib0160","doi-asserted-by":"crossref","DOI":"10.1016\/j.chaos.2024.114670","article-title":"Neighborhood information based semi-supervised fuzzy c-means employing feature-weight and cluster-weight learning","volume":"181","author":"Jasim","year":"2024","journal-title":"Chaos Solitons Fractals"},{"issue":"4","key":"10.1016\/j.cosrev.2026.100974_bib0165","doi-asserted-by":"crossref","first-page":"571","DOI":"10.1016\/j.jss.2006.07.009","article-title":"Lessons from applying the systematic literature review process within the software engineering domain","volume":"80","author":"Brereton","year":"2007","journal-title":"J. Syst. Softw."},{"key":"10.1016\/j.cosrev.2026.100974_bib0170","doi-asserted-by":"crossref","first-page":"90109","DOI":"10.1109\/ACCESS.2025.3571518","article-title":"Lise: a logic-based interactive similarity explainer for clusters of RDF data","author":"Colucci","year":"2025","journal-title":"IEEE Access"},{"key":"10.1016\/j.cosrev.2026.100974_bib0175","series-title":"Proceedings of the International Symposium on Intelligent Data Analysis","article-title":"Beyond words: a comparative analysis of LLM embeddings for effective clustering","author":"Keraghel","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100974_bib0180","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","article-title":"Llmemb: large language model can be a good embedding generator for sequential recommendation","author":"Liu","year":"2025"},{"key":"10.1016\/j.cosrev.2026.100974_bib0185","series-title":"Proceedings of the 2024 IEEE Pacific Rim Conference on Communications, Computers and Signal Processing (PACRIM)","article-title":"Exploiting LLM embeddings for content-based IOT anomaly detection","author":"Wang","year":"2024"},{"issue":"5","key":"10.1016\/j.cosrev.2026.100974_bib0190","doi-asserted-by":"crossref","DOI":"10.1111\/exsy.70041","article-title":"A bert-based multi-embedding fusion method using review text for recommendation","volume":"42","author":"Lim","year":"2025","journal-title":"Expert Syst."},{"key":"10.1016\/j.cosrev.2026.100974_bib0195","doi-asserted-by":"crossref","DOI":"10.1016\/j.mex.2025.103656","article-title":"Integrating commonsense knowledge with GPT embeddings for emotion classification","author":"Yadav","year":"2025","journal-title":"MethodsX"},{"key":"10.1016\/j.cosrev.2026.100974_bib0200","series-title":"Improving Language Understanding by Generative Pre-Training","author":"Radford","year":"2018"},{"issue":"21","key":"10.1016\/j.cosrev.2026.100974_bib0205","doi-asserted-by":"crossref","DOI":"10.3390\/app122111220","article-title":"Identification and visualization of key topics in scientific publications with transformer-based language models and document clustering methods","volume":"12","author":"Weng","year":"2022","journal-title":"Appl. Sci."},{"key":"10.1016\/j.cosrev.2026.100974_bib0210","first-page":"100","article-title":"Text clustering with large language model embeddings","volume":"6","author":"Petukhova","year":"2025","journal-title":"Int. J. Cogn. Comput. Eng."},{"key":"10.1016\/j.cosrev.2026.100974_bib0215","series-title":"Advances in Neural Information Processing Systems","article-title":"Xlnet: generalized autoregressive pretraining for language understanding","volume":"vol. 32","author":"Yang","year":"2019"},{"key":"10.1016\/j.cosrev.2026.100974_bib0220","series-title":"Proceedings of the 2021 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","article-title":"News2mapping: a news events correlation model for news videos","author":"Zhou","year":"2021"},{"key":"10.1016\/j.cosrev.2026.100974_bib0225","author":"Wang"},{"issue":"8","key":"10.1016\/j.cosrev.2026.100974_bib0230","doi-asserted-by":"crossref","first-page":"5178","DOI":"10.3390\/app13085178","article-title":"On the use of transformer-based models for intent detection using clustering algorithms","volume":"13","author":"Moura","year":"2023","journal-title":"Appl. Sci."},{"key":"10.1016\/j.cosrev.2026.100974_bib0235","series-title":"Proceedings of NAACL-HLT","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"Kenton","year":"2019"},{"key":"10.1016\/j.cosrev.2026.100974_bib0240","series-title":"Proceedings of the 2021 2nd Information Communication Technologies Conference (ICTC)","article-title":"Improved k-means text clustering algorithm based on BERT and density peak","author":"Hu","year":"2021"},{"key":"10.1016\/j.cosrev.2026.100974_bib0245","series-title":"Proceedings of the 2022 2nd International Conference on Advance Computing and Innovative Technologies in Engineering (ICACITE)","article-title":"Framework for topic modeling using BERT, LDA and k-means","author":"Sethia","year":"2022"},{"key":"10.1016\/j.cosrev.2026.100974_bib0250","series-title":"Proceedings of the 2020 5th International Conference on Computing, Communication and Security (ICCCS)","article-title":"Extractive text summarization using dynamic clustering and co-reference on BERT","author":"Srikanth","year":"2020"},{"key":"10.1016\/j.cosrev.2026.100974_bib0255","series-title":"Proceedings of the 2024 International Conference on Computer, Control, Informatics and Its Applications (IC3INA)","article-title":"Bert-based deep embedded clustering for topic modeling","author":"Cahyadi","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100974_bib0260","series-title":"Proceedings of the 2024 International Joint Conference on Neural Networks (IJCNN)","article-title":"A hybrid clustering and BERT model for Chinese threads in social media","author":"Wu","year":"2024"},{"issue":"2","key":"10.1016\/j.cosrev.2026.100974_bib0265","doi-asserted-by":"crossref","first-page":"1","DOI":"10.58898\/sij.v2i2.01-13","article-title":"Text mining: clustering using BERT and probabilistic topic modeling","volume":"2","author":"Datchanamoorthy","year":"2023","journal-title":"Soc. Inform. J."},{"key":"10.1016\/j.cosrev.2026.100974_bib0270","author":"He"},{"key":"10.1016\/j.cosrev.2026.100974_bib0275","series-title":"Proceedings of the 5th Workshop on Challenges and Applications of Automated Extraction of Socio-Political Events from Text (CASE)","article-title":"Arc-nlp at case 2022 task 1: ensemble learning for multilingual protest event detection","author":"Sahin","year":"2022"},{"key":"10.1016\/j.cosrev.2026.100974_bib0280","series-title":"Proceedings of the CCF International Conference on Natural Language Processing and Chinese Computing","article-title":"Chinese metaphor recognition using a multi-stage prompting large language model","author":"Wang","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100974_bib0285","doi-asserted-by":"crossref","first-page":"21517","DOI":"10.1109\/ACCESS.2022.3152828","article-title":"Roberta-lstm: a hybrid model for sentiment analysis with transformer and recurrent neural network","volume":"10","author":"Tan","year":"2022","journal-title":"IEEE Access"},{"issue":"11","key":"10.1016\/j.cosrev.2026.100974_bib0290","doi-asserted-by":"crossref","DOI":"10.1111\/exsy.13701","article-title":"Exploring transformer models for sentiment classification: a comparison of BERT, Roberta, Albert, Distilbert, and Xlnet","volume":"41","author":"Areshey","year":"2024","journal-title":"Expert Systems"},{"key":"10.1016\/j.cosrev.2026.100974_bib0295","series-title":"Proceedings of the 2021 20th IEEE International Conference on Machine Learning and Applications (ICMLA)","article-title":"Automating questions and answers of goods and services tax system using clustering and embeddings of queries","author":"Dikshit","year":"2021"},{"key":"10.1016\/j.cosrev.2026.100974_bib0300","series-title":"Proceedings of the 30th ACM International Conference on Information and Knowledge Management (CIKM)","article-title":"How to leverage a multi-layered transformer language model for text clustering: an ensemble approach","author":"Ait-Saada","year":"2021"},{"key":"10.1016\/j.cosrev.2026.100974_bib0305","series-title":"Proceedings of the Workshop on Natural Language Processing for Indigenous Languages of the Americas (AmericasNLP)","article-title":"Fine-tuning sentence-roberta to construct word embeddings for low-resource languages from bilingual dictionaries","author":"Bear","year":"2023"},{"key":"10.1016\/j.cosrev.2026.100974_bib0310","author":"Wehrli"},{"key":"10.1016\/j.cosrev.2026.100974_bib0315","author":"Sanh"},{"key":"10.1016\/j.cosrev.2026.100974_bib0320","series-title":"Proceedings of the 2022 4th International Conference on Advances in Computing, Communication Control and Networking (ICAC3N)","article-title":"Tweet summarization using clustering mechanisms","author":"Ampili","year":"2022"},{"issue":"8","key":"10.1016\/j.cosrev.2026.100974_bib0325","doi-asserted-by":"crossref","first-page":"950","DOI":"10.1007\/s11227-025-07414-4","article-title":"Optimizing Sbert for long text clustering: two novel approaches with empirical insights","volume":"81","author":"Ortakci","year":"2025","journal-title":"J. Supercomput."},{"key":"10.1016\/j.cosrev.2026.100974_bib0330","author":"Zhang"},{"issue":"140","key":"10.1016\/j.cosrev.2026.100974_bib0335","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel","year":"2020","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.cosrev.2026.100974_bib0340","author":"Zafarani-Moattar"},{"key":"10.1016\/j.cosrev.2026.100974_bib0345","author":"Su"},{"key":"10.1016\/j.cosrev.2026.100974_bib0350","doi-asserted-by":"crossref","first-page":"321","DOI":"10.1162\/tacl_a_00648","article-title":"Large language models enable few-shot clustering","volume":"12","author":"Viswanathan","year":"2024","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"10.1016\/j.cosrev.2026.100974_bib0355","series-title":"European Conference on Machine Learning","first-page":"674","article-title":"K-means with large and noisy constraint sets","author":"Pelleg","year":"2007"},{"key":"10.1016\/j.cosrev.2026.100974_bib0360","series-title":"Constrained Clustering: Advances in Algorithms, Theory, and Applications","author":"Basu","year":"2008"},{"key":"10.1016\/j.cosrev.2026.100974_bib0365","series-title":"Proceedings of the 2005 SIAM International Conference on Data Mining","first-page":"138","article-title":"Clustering with constraints: feasibility issues and the k-means algorithm","author":"Davidson","year":"2005"},{"key":"10.1016\/j.cosrev.2026.100974_bib0370","author":"Awasthi"},{"key":"10.1016\/j.cosrev.2026.100974_bib0375","author":"Zhang"},{"key":"10.1016\/j.cosrev.2026.100974_bib0380","series-title":"International Conference on Machine Learning","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","author":"Finn","year":"2017"},{"key":"10.1016\/j.cosrev.2026.100974_bib0385","series-title":"Advances in Neural Information Processing Systems","first-page":"1877","article-title":"Language models are few-shot learners","volume":"vol. 33","author":"Brown","year":"2020"},{"key":"10.1016\/j.cosrev.2026.100974_bib0390","author":"Aky\u00fcrek"},{"key":"10.1016\/j.cosrev.2026.100974_bib0395","series-title":"International Conference on Machine Learning","first-page":"35151","article-title":"Transformers learn in-context by gradient descent","author":"Von Oswald","year":"2023"},{"key":"10.1016\/j.cosrev.2026.100974_bib0400","series-title":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","article-title":"Tnt-Llm: text mining at scale with large language models","author":"Wan","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100974_bib0405","author":"Huang"},{"key":"10.1016\/j.cosrev.2026.100974_bib0410","series-title":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","article-title":"Llm-guided semantic-aware clustering for topic modeling","author":"Liu","year":"2025"},{"key":"10.1016\/j.cosrev.2026.100974_bib0415","author":"Diaz-Rodriguez"},{"key":"10.1016\/j.cosrev.2026.100974_bib0420","author":"De Raedt"},{"key":"10.1016\/j.cosrev.2026.100974_bib0425","author":"Wang"},{"key":"10.1016\/j.cosrev.2026.100974_bib0430","series-title":"Proceedings of the 2025 International Conference on Industrial Engineering, Applications and Manufacturing (ICIEAM)","article-title":"Automatic interpreting of network attacks using hybrid architecture based on catboost and LLM","author":"Tokarev","year":"2025"},{"key":"10.1016\/j.cosrev.2026.100974_bib0435","article-title":"Llm-in-the-loop: replicating human insight with llms for better machine learning applications","author":"Hong","year":"2025","journal-title":"Authorea Preprints"},{"key":"10.1016\/j.cosrev.2026.100974_bib0440","author":"Wei"},{"key":"10.1016\/j.cosrev.2026.100974_bib0445","first-page":"1","article-title":"Clustering algorithms and RAG enhancing semi-supervised text classification with large llms","author":"Zhong","year":"2025","journal-title":"Int. J. Data Sci. Anal."},{"key":"10.1016\/j.cosrev.2026.100974_bib0450","series-title":"Proceedings of the 2025 IEEE Symposium on Trustworthy, Explainable and Responsible Computational Intelligence (CITREx Companion)","article-title":"Improving clustering explainability and automated cluster naming with approximate inverse model explanations and large language models","author":"Nakanishi","year":"2025"},{"key":"10.1016\/j.cosrev.2026.100974_bib0455","author":"Pei"},{"key":"10.1016\/j.cosrev.2026.100974_bib0460","author":"Petnehazi"},{"key":"10.1016\/j.cosrev.2026.100974_bib0465","series-title":"Preliminary Study in Using Large Language Models as Interpretation Assistants in Cluster Analysis for Customer Segmentation","author":"Panjkota","year":"2025"},{"key":"10.1016\/j.cosrev.2026.100974_bib0470","doi-asserted-by":"crossref","first-page":"73603","DOI":"10.1109\/ACCESS.2025.3562560","article-title":"Llm-assisted ontology restriction verification with clustering-based description generation","author":"Kim","year":"2025","journal-title":"IEEE Access"},{"issue":"11","key":"10.1016\/j.cosrev.2026.100974_bib0475","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s10994-025-06867-1","article-title":"Llm-based feature generation from text for interpretable machine learning","volume":"114","author":"Balek","year":"2025","journal-title":"Mach. Learn."},{"key":"10.1016\/j.cosrev.2026.100974_bib0480","series-title":"Proceedings of the 2025 IEEE\/ACIS 23rd International Conference on Software Engineering Research, Management and Applications (SERA)","article-title":"Interpretable news clustering and topic attribution for the Us presidential election: an AIME and LLM approach","author":"Nakanishi","year":"2025"},{"key":"10.1016\/j.cosrev.2026.100974_bib0485","series-title":"Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies","first-page":"1536","article-title":"Automatic labelling of Topic models","author":"Lau","year":"2011"},{"key":"10.1016\/j.cosrev.2026.100974_bib0490","series-title":"Elements of Information Theory","author":"Cover","year":"1999"},{"key":"10.1016\/j.cosrev.2026.100974_bib0495","series-title":"Proceedings of the 14th Conference of the European Chapter of the Association for Computational Linguistics","first-page":"530","article-title":"Machine reading tea leaves: automatically evaluating topic coherence and topic model quality","author":"Lau","year":"2014"},{"key":"10.1016\/j.cosrev.2026.100974_bib0500","article-title":"Comparing human-perceived cluster characteristics through the lens of Ciphe: measuring coherence beyond keywords","volume":"NLP4DH","author":"Eklund","year":"2025","journal-title":"J. Data Min. Digit. Humanit."},{"key":"10.1016\/j.cosrev.2026.100974_bib0505","article-title":"On the (in)fidelity and sensitivity of explanations","volume":"32","author":"Yeh","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.cosrev.2026.100974_bib0510","author":"Hong"},{"key":"10.1016\/j.cosrev.2026.100974_bib0515","series-title":"Proceedings of the 2024 4th International Conference on Advanced Research in Computing (ICARC)","article-title":"Enhancing customer segmentation using large language models (llms) and deterministic, independent-of-corpus embeddings (dice)","author":"Tissera","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100974_bib0520","doi-asserted-by":"crossref","DOI":"10.1016\/j.jretconser.2024.104078","article-title":"Consumer segmentation with large language models","volume":"82","author":"Li","year":"2025","journal-title":"J. Retail. Consum. Serv."},{"key":"10.1016\/j.cosrev.2026.100974_bib0525","author":"Larson"},{"key":"10.1016\/j.cosrev.2026.100974_bib0530","author":"Casanueva"},{"key":"10.1016\/j.cosrev.2026.100974_bib0535","series-title":"Proceedings of the 2016 IEEE 32nd International Conference on Data Engineering (ICDE)","article-title":"A model-based approach for text clustering with outlier detection","author":"Yin","year":"2016"},{"key":"10.1016\/j.cosrev.2026.100974_bib0540","series-title":"Twenty Newsgroups","author":"Mitchell","year":"1999"},{"key":"10.1016\/j.cosrev.2026.100974_bib0545","series-title":"CSTR VCTK Corpus: English Multi-Speaker Corpus for CSTR Voice Cloning Toolkit (Version 0.92)","author":"Yamagishi","year":"2019"},{"key":"10.1016\/j.cosrev.2026.100974_bib0550","series-title":"Proceedings of the 17th International Conference on World Wide Web","article-title":"Learning to classify short and sparse text & web with hidden topics from large-scale data collections","author":"Phan","year":"2008"},{"key":"10.1016\/j.cosrev.2026.100974_bib0555","first-page":"1","article-title":"Classification of multi-labeled text articles with Reuters dataset using SVM","author":"Al Hasan","year":"2022","journal-title":"Int. Conf. Sci. Technol."},{"key":"10.1016\/j.cosrev.2026.100974_bib0560","doi-asserted-by":"crossref","first-page":"22","DOI":"10.1016\/j.neunet.2016.12.008","article-title":"Self-taught convolutional neural networks for short text clustering","volume":"88","author":"Xu","year":"2017","journal-title":"Neural Netw."},{"issue":"17","key":"10.1016\/j.cosrev.2026.100974_bib0565","first-page":"18","article-title":"Analysis of the influence of preprocessing techniques on text classification accuracy: an investigation with the naive Bayes model and the Reuters-21578 dataset","volume":"11","author":"Acosta","year":"2021","journal-title":"Essence"},{"key":"10.1016\/j.cosrev.2026.100974_bib0570","series-title":"Proceedings of the 20th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining","article-title":"A Dirichlet multinomial mixture model-based approach for short text clustering","author":"Yin","year":"2014"},{"key":"10.1016\/j.cosrev.2026.100974_bib0575","series-title":"Proceedings of the 23rd International Conference on Machine Learning","article-title":"Practical solutions to the problem of diagonal dominance in kernel document clustering","author":"Greene","year":"2006"},{"key":"10.1016\/j.cosrev.2026.100974_bib0580","author":"Dernoncourt"},{"key":"10.1016\/j.cosrev.2026.100974_bib0585","first-page":"361","article-title":"Rcv1: a new benchmark collection for text categorization research","volume":"5","author":"Lewis","year":"2004","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.cosrev.2026.100974_bib0590","author":"Wang"},{"key":"10.1016\/j.cosrev.2026.100974_bib0595","series-title":"2018 Second International Conference on Electronics, Communication and Aerospace Technology (ICECA)","article-title":"Survey on sentiment analysis using Twitter dataset","author":"Wagh","year":"2018"},{"key":"10.1016\/j.cosrev.2026.100974_bib0600","first-page":"1","article-title":"A distilbertopic model for short text documents","author":"Rashid","year":"2022","journal-title":"Proc. 20th Annu. Workshop Australas. Lang. Technol. Assoc."},{"key":"10.1016\/j.cosrev.2026.100974_bib0605","doi-asserted-by":"crossref","DOI":"10.1016\/j.is.2022.102131","article-title":"Topic modeling algorithms and applications: a survey","volume":"112","author":"Abdelrazek","year":"2023","journal-title":"Inf. Syst."},{"key":"10.1016\/j.cosrev.2026.100974_bib0610","series-title":"Advances in Neural Information Processing Systems","article-title":"Teaching machines to read and comprehend","volume":"vol. 28","author":"Hermann","year":"2015"},{"key":"10.1016\/j.cosrev.2026.100974_bib0615","series-title":"Proceedings of the 18th ACM Conference on Computer Supported Cooperative Work and Social Computing","article-title":"What to expect when the unexpected happens: social media communications across crises","author":"Olteanu","year":"2015"},{"key":"10.1016\/j.cosrev.2026.100974_bib0620","author":"Ofli"},{"key":"10.1016\/j.cosrev.2026.100974_bib0625","series-title":"Proceedings of the International AAAI Conference on Web and Social Media","article-title":"Crisislex: a lexicon for collecting and filtering microblogged communications in crises","author":"Olteanu","year":"2014"},{"key":"10.1016\/j.cosrev.2026.100974_bib0630","first-page":"1","article-title":"Pre-training meets clustering: a hybrid extractive multi-document summarization model","author":"Karotia","year":"2022","journal-title":"Int. Conf. Hybrid Intell. Syst."},{"key":"10.1016\/j.cosrev.2026.100974_bib0635","doi-asserted-by":"crossref","first-page":"549","DOI":"10.1007\/s10579-014-9274-3","article-title":"Creating language resources for under-resourced languages: methodologies, and experiments with Arabic","volume":"49","author":"El-Haj","year":"2015","journal-title":"Lang. Resour. Eval."},{"key":"10.1016\/j.cosrev.2026.100974_bib0640","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2020.113679","article-title":"Automatic text summarization: a comprehensive survey","volume":"165","author":"El-Kassas","year":"2021","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.cosrev.2026.100974_bib0645","author":"Gliwa"},{"key":"10.1016\/j.cosrev.2026.100974_bib0650","first-page":"2565","article-title":"Abstractive summarization: a survey of the state of the art","volume":"33","author":"Lin","year":"2019","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.cosrev.2026.100974_bib0655","author":"Ganesan"},{"key":"10.1016\/j.cosrev.2026.100974_bib0660","author":"Lu"},{"key":"10.1016\/j.cosrev.2026.100974_bib0665","author":"Hu"},{"key":"10.1016\/j.cosrev.2026.100974_bib0670","author":"Pang"},{"key":"10.1016\/j.cosrev.2026.100974_bib0675","series-title":"Proceedings of the 11th ACM Symposium on Document Engineering","article-title":"Contributions to the study of SMS SPAM filtering: new collection and results","author":"Almeida","year":"2011"},{"key":"10.1016\/j.cosrev.2026.100974_bib0680","author":"Pang"},{"key":"10.1016\/j.cosrev.2026.100974_bib0685","series-title":"Advances in Neural Information Processing Systems","article-title":"Character-level convolutional networks for text classification","volume":"vol. 28","author":"Zhang","year":"2015"},{"issue":"2","key":"10.1016\/j.cosrev.2026.100974_bib0690","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1177\/0022022114557479","article-title":"Universality versus cultural specificity of three emotion domains: some evidence based on the cascading model of emotional intelligence","volume":"46","author":"Shao","year":"2015","journal-title":"J. Cross-Cult. Psychol."},{"key":"10.1016\/j.cosrev.2026.100974_bib0695","author":"Rahman"},{"key":"10.1016\/j.cosrev.2026.100974_bib0700","series-title":"Proceedings of the 21st International Conference on Natural Language Processing (ICON)","article-title":"Improving few-shot prompting using cluster-based sample retrieval for medical NER in clinical text","author":"Mohan","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100974_bib0705","series-title":"Proceedings of the 2024 IEEE International Conference on Big Data (BigData)","article-title":"Advancing personalized medicine: a scalable llm-based recommender system for patient matching","author":"Berger","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100974_bib0710","series-title":"Proceedings of the 2025 International Conference on Artificial Intelligence, Computer, Data Sciences and Applications (ACDSA)","article-title":"Embedding-enhanced patient clustering for customized medical report summarization using llms","author":"Torabizadeh","year":"2025"},{"key":"10.1016\/j.cosrev.2026.100974_bib0715","author":"Liang"},{"key":"10.1016\/j.cosrev.2026.100974_bib0720","author":"Liu"},{"key":"10.1016\/j.cosrev.2026.100974_bib0725","series-title":"Transactions of ADIA Lab: Interdisciplinary Advances in Data and Computational Science","first-page":"359","article-title":"Toward automating causal discovery in financial markets and beyond","author":"Sokolov","year":"2025"},{"key":"10.1016\/j.cosrev.2026.100974_bib0730","series-title":"Proceedings of the 18th ACM Conference on Recommender Systems","article-title":"Llms for user interest exploration in large-scale recommendation systems","author":"Wang","year":"2024"},{"key":"10.1016\/j.cosrev.2026.100974_bib0735","author":"John"},{"issue":"1","key":"10.1016\/j.cosrev.2026.100974_bib0740","doi-asserted-by":"crossref","first-page":"42","DOI":"10.23919\/ICN.2024.0004","article-title":"Recbert: semantic recommendation engine with large language model enhanced query segmentation for k-nearest neighbors ranking retrieval","volume":"5","author":"Wu","year":"2024","journal-title":"Intell. Converg. Netw."},{"key":"10.1016\/j.cosrev.2026.100974_bib0745","article-title":"Personalized recommendation with clustering via prompt-tuning","author":"Wu","year":"2025","journal-title":"Intell. Data Anal."},{"issue":"9","key":"10.1016\/j.cosrev.2026.100974_bib0750","doi-asserted-by":"crossref","first-page":"763","DOI":"10.1093\/bioinformatics\/17.9.763","article-title":"Details of the adjusted RAND index and clustering algorithms: supplement to the paper \u201can empirical study on principal component analysis for clustering gene expression data\u201d","volume":"17","author":"Yeung","year":"2001","journal-title":"Bioinformatics"},{"issue":"8","key":"10.1016\/j.cosrev.2026.100974_bib0755","first-page":"207","article-title":"Clustering result evaluation method based on mutual information and contour coefficient","volume":"41","author":"Shizhuang","year":"2020","journal-title":"J. Ord. Equip. Eng."},{"key":"10.1016\/j.cosrev.2026.100974_bib0760","author":"Wang"},{"key":"10.1016\/j.cosrev.2026.100974_bib0765","series-title":"Proceedings of the 24th International Conference on Neural Information Processing (ICONIP 2017), Guangzhou, China","article-title":"Deep clustering with convolutional autoencoders","author":"Guo","year":"2017"},{"issue":"8","key":"10.1016\/j.cosrev.2026.100974_bib0770","doi-asserted-by":"crossref","first-page":"3669","DOI":"10.1109\/TKDE.2020.3028943","article-title":"Deep feature-based text clustering and its explanation","volume":"34","author":"Guan","year":"2020","journal-title":"IEEE Trans. Knowl. Data Eng."},{"key":"10.1016\/j.cosrev.2026.100974_bib0775","series-title":"Introduction to Information Retrieval","author":"Christopher","year":"2008"},{"issue":"9","key":"10.1016\/j.cosrev.2026.100974_bib0780","doi-asserted-by":"crossref","first-page":"763","DOI":"10.1093\/bioinformatics\/17.9.763","article-title":"Principal component analysis for clustering gene expression data","volume":"17","author":"Yeung","year":"2001","journal-title":"Bioinformatics"},{"key":"10.1016\/j.cosrev.2026.100974_bib0785","series-title":"Big Data","first-page":"357","article-title":"A case study in big data analytics: exploring Twitter sentiment analysis and the weather","author":"Sinnott","year":"2016"},{"key":"10.1016\/j.cosrev.2026.100974_bib0790","series-title":"Finding Groups in Data: an Introduction to Cluster Analysis","author":"Kaufman","year":"1990"},{"key":"10.1016\/j.cosrev.2026.100974_bib0795","series-title":"Introduction to Information Retrieval","author":"Manning","year":"2008"}],"container-title":["Computer Science Review"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1574013726000821?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1574013726000821?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:07:28Z","timestamp":1775156848000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1574013726000821"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,8]]},"references-count":159,"alternative-id":["S1574013726000821"],"URL":"https:\/\/doi.org\/10.1016\/j.cosrev.2026.100974","relation":{},"ISSN":["1574-0137"],"issn-type":[{"value":"1574-0137","type":"print"}],"subject":[],"published":{"date-parts":[[2026,8]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"From embeddings to interpretations: A comprehensive review of language models in clustering","name":"articletitle","label":"Article Title"},{"value":"Computer Science Review","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.cosrev.2026.100974","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Inc. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"100974"}}