{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T08:43:48Z","timestamp":1770713028481,"version":"3.49.0"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2025,2,10]],"date-time":"2025-02-10T00:00:00Z","timestamp":1739145600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,2,10]],"date-time":"2025-02-10T00:00:00Z","timestamp":1739145600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"STI2030-Major Projects","award":["2021ZD0200200"],"award-info":[{"award-number":["2021ZD0200200"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62202446"],"award-info":[{"award-number":["62202446"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Vis"],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s12650-025-01043-y","type":"journal-article","created":{"date-parts":[[2025,2,10]],"date-time":"2025-02-10T07:11:45Z","timestamp":1739171505000},"page":"625-643","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["TextLens: large language models-powered visual analytics enhancing text clustering"],"prefix":"10.1007","volume":"28","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-1707-8941","authenticated-orcid":false,"given":"Ruixiao","family":"Peng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Dong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guan","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dong","family":"Tian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guihua","family":"Shan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,2,10]]},"reference":[{"key":"1043_CR1","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2018) Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805"},{"key":"1043_CR2","unstructured":"Achiam J, Adler S, Agarwal S, Ahmad L, Akkaya I, Aleman FL, Almeida D, Altenschmidt J, Altman S, Anadkat S, et al (2023) Gpt-4 technical report. arXiv preprint arXiv:2303.08774"},{"key":"1043_CR3","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Polosukhin I (2017) Attention is all you need. Advances in neural information processing systems, 30"},{"key":"1043_CR4","unstructured":"MacQueen J. Classification and analysis of multivariate observations. In: 5th Berkeley Symp. Math. Statist. Probability, pp. 281\u2013297"},{"key":"1043_CR5","doi-asserted-by":"crossref","unstructured":"Zhang D, Nan F, Wei X, Li S, Zhu H, McKeown K, Nallapati R, Arnold A, Xiang B (2021) Supporting clustering with contrastive learning. arXiv preprint arXiv:2103.12953","DOI":"10.18653\/v1\/2021.naacl-main.427"},{"key":"1043_CR6","doi-asserted-by":"crossref","unstructured":"Muennighoff N, Tazi N, Magne L, Reimers N (2022) Mteb: Massive text embedding benchmark. arXiv preprint arXiv:2210.07316","DOI":"10.18653\/v1\/2023.eacl-main.148"},{"key":"1043_CR7","unstructured":"Wang L, Yang N, Huang X, Jiao B, Yang L, Jiang D, Majumder R, Wei F (2022) Text embeddings by weakly-supervised contrastive pre-training. arXiv preprint arXiv:2212.03533"},{"key":"1043_CR8","doi-asserted-by":"crossref","unstructured":"Su H, Shi W, Kasai J, Wang Y, Hu Y, Ostendorf M, Yih W-t, Smith NA, Zettlemoyer L, Yu T (2022) One embedder, any task: Instruction-finetuned text embeddings. arXiv preprint arXiv:2212.09741","DOI":"10.18653\/v1\/2023.findings-acl.71"},{"key":"1043_CR9","doi-asserted-by":"crossref","unstructured":"Zhang Y, Wang Z, Shang J (2023) Clusterllm: Large language models as a guide for text clustering. arXiv preprint arXiv:2305.14871","DOI":"10.18653\/v1\/2023.emnlp-main.858"},{"key":"1043_CR10","doi-asserted-by":"crossref","unstructured":"Viswanathan V, Gashteovski K, Lawrence C, Wu T, Neubig G (2023) Large language models enable few-shot clustering. arXiv preprint arXiv:2307.00524","DOI":"10.1162\/tacl_a_00648"},{"key":"1043_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.is.2022.102131","volume":"112","author":"A Abdelrazek","year":"2023","unstructured":"Abdelrazek A, Eid Y, Gawish E, Medhat W, Hassan A (2023) Topic modeling algorithms and applications: a survey. Inf Syst 112:102131","journal-title":"Inf Syst"},{"key":"1043_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.is.2020.101582","volume":"94","author":"I Vayansky","year":"2020","unstructured":"Vayansky I, Kumar SA (2020) A review of topic modeling methods. Inf Syst 94:101582","journal-title":"Inf Syst"},{"issue":"6","key":"1043_CR13","doi-asserted-by":"publisher","first-page":"391","DOI":"10.1002\/(SICI)1097-4571(199009)41:6<391::AID-ASI1>3.0.CO;2-9","volume":"41","author":"S Deerwester","year":"1990","unstructured":"Deerwester S, Dumais ST, Furnas GW, Landauer TK, Harshman R (1990) Indexing by latent semantic analysis. J Am Soc Inf Sci 41(6):391\u2013407","journal-title":"J Am Soc Inf Sci"},{"issue":"Jan","key":"1043_CR14","first-page":"993","volume":"3","author":"DM Blei","year":"2003","unstructured":"Blei DM, Ng AY, Jordan MI (2003) Latent dirichlet allocation. J Mach Learn Res 3(Jan):993\u20131022","journal-title":"J Mach Learn Res"},{"key":"1043_CR15","unstructured":"Lee DD, Pham P, Largman Y, Ng A (2009) Advances in neural information processing systems 22. Tech Rep"},{"key":"1043_CR16","doi-asserted-by":"crossref","unstructured":"Blei DM, Lafferty JD (2006) Dynamic topic models. In: Proceedings of the 23rd International Conference on Machine Learning, pp. 113\u2013120","DOI":"10.1145\/1143844.1143859"},{"key":"1043_CR17","unstructured":"Grootendorst M (2022) Bertopic: Neural topic modeling with a class-based tf-idf procedure. arXiv preprint arXiv:2203.05794"},{"key":"1043_CR18","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1162\/tacl_a_00325","volume":"8","author":"AB Dieng","year":"2020","unstructured":"Dieng AB, Ruiz FJ, Blei DM (2020) Topic modeling in embedding spaces. Trans Assoc Comput Linguist 8:439\u2013453","journal-title":"Trans Assoc Comput Linguist"},{"key":"1043_CR19","unstructured":"Srivastava A, Sutton C (2017) Autoencoding variational inference for topic models. arXiv preprint arXiv:1703.01488"},{"key":"1043_CR20","unstructured":"Miao Y, Grefenstette E, Blunsom P (2017) Discovering discrete latent topics with neural variational inference. In: International Conference on Machine Learning, pp. 2410\u20132419. PMLR"},{"key":"1043_CR21","doi-asserted-by":"crossref","unstructured":"Bianchi F, Terragni S, Hovy D (2020) Pre-training is a hot topic: contextualized document embeddings improve topic coherence. arXiv preprint arXiv:2004.03974","DOI":"10.18653\/v1\/2021.acl-short.96"},{"issue":"2","key":"1043_CR22","doi-asserted-by":"publisher","first-page":"403","DOI":"10.1007\/s12650-022-00882-3","volume":"26","author":"D Yu","year":"2023","unstructured":"Yu D, Ian O, Jie L, Xiaoru Y, Vinh NQ (2023) User-centered visual explorer of in-process comparison in spatiotemporal space. J Visual 26(2):403\u2013421","journal-title":"J Visual"},{"issue":"1","key":"1043_CR23","doi-asserted-by":"publisher","first-page":"161","DOI":"10.1007\/s41095-023-0353-5","volume":"10","author":"Y Dong","year":"2024","unstructured":"Dong Y, Liang CJ, Chen Y, Hua J (2024) A visual modeling method for spatiotemporal and multidimensional features in epidemiological analysis: applied covid-19 aggregated datasets. Comput Visual Media 10(1):161\u2013186","journal-title":"Comput Visual Media"},{"key":"1043_CR24","doi-asserted-by":"publisher","first-page":"381","DOI":"10.1007\/s12650-020-00703-5","volume":"24","author":"K Chen","year":"2021","unstructured":"Chen K, Wang Y, Yu M, Shen H-W, Yu X, Shan G (2021) Confvisexplorer: a literature-based visual analysis system for conference comparison. J Visual 24:381\u2013395","journal-title":"J Visual"},{"issue":"6","key":"1043_CR25","doi-asserted-by":"publisher","first-page":"1445","DOI":"10.1007\/s12650-023-00941-3","volume":"26","author":"M Tian","year":"2023","unstructured":"Tian M, Li G, Yuan X (2023) Litvis: a visual analytics approach for managing and exploring literature. J Visual 26(6):1445\u20131458","journal-title":"J Visual"},{"key":"1043_CR26","unstructured":"Petukhova A, Matos-Carvalho JP, Fachada N (2024) Text clustering with llm embeddings. arXiv preprint arXiv:2403.15112"},{"key":"1043_CR27","unstructured":"Learning S-S (2006) Semi-supervised learning. CSZ2006. html, 5"},{"key":"1043_CR28","doi-asserted-by":"crossref","unstructured":"Basu S, Banerjee A, Mooney RJ (2004) Active semi-supervision for pairwise constrained clustering. In: Proceedings of the 2004 SIAM International Conference on Data Mining, pp. 333\u2013344. SIAM","DOI":"10.1137\/1.9781611972740.31"},{"key":"1043_CR29","doi-asserted-by":"crossref","unstructured":"Ni J, Abrego GH, Constant N, Ma J, Hall KB, Cer D, Yang Y (2021) Sentence-t5: Scalable sentence encoders from pre-trained text-to-text models. arXiv preprint arXiv:2108.08877","DOI":"10.18653\/v1\/2022.findings-acl.146"},{"key":"1043_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1002\/9780470689646.ch1","volume-title":"Text Mining: Applications and Theory","author":"S Rose","year":"2010","unstructured":"Rose S, Engel D, Cramer N, Cowley W (2010) Automatic Keyword Extraction from Individual Documents. In: Berry MW, Kogan J (eds) Text Mining: Applications and Theory. Wiley, pp 1\u201320. https:\/\/doi.org\/10.1002\/9780470689646.ch1"},{"key":"1043_CR31","unstructured":"Mihalcea R, Tarau P (2004) Textrank: Bringing order into text"},{"issue":"1\u20132","key":"1043_CR32","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1002\/nav.3800020109","volume":"2","author":"HW Kuhn","year":"1955","unstructured":"Kuhn HW (1955) The hungarian method for the assignment problem. Naval Res Logist Quart 2(1\u20132):83\u201397","journal-title":"Naval Res Logist Quart"},{"issue":"3","key":"1043_CR33","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1111\/cgf.13995","volume":"39","author":"C Ma","year":"2020","unstructured":"Ma C, Zhao Y, AL-Dohuki S, Yang J, Ye X, Kamw F, Amiruzzaman Md (2020) GTMapLens: interactive lens for geo-text data browsing on map. Comput Graph Forum 39(3):469\u2013481. https:\/\/doi.org\/10.1111\/cgf.13995","journal-title":"Comput Graph Forum"},{"key":"1043_CR34","unstructured":"Dong Y, Liang J, Cao L, Catchpoole D (2023) Cliniclens: Visual analytics for exploring and optimizing the testing capacity of clinics given uncertainty. arXiv preprint arXiv:2303.13558"},{"key":"1043_CR35","doi-asserted-by":"crossref","unstructured":"Larson S, Mahendran A, Peper JJ, Clarke C, Lee A, Hill P, Kummerfeld JK, Leach K, Laurenzano MA, Tang L, et al (2019) An evaluation dataset for intent classification and out-of-scope prediction. arXiv preprint arXiv:1909.02027","DOI":"10.18653\/v1\/D19-1131"},{"key":"1043_CR36","doi-asserted-by":"crossref","unstructured":"Casanueva I, Tem\u010dinas T, Gerz D, Henderson M, Vuli\u0107 I (2020) Efficient intent detection with dual sentence encoders. arXiv preprint arXiv:2003.04807","DOI":"10.18653\/v1\/2020.nlp4convai-1.5"}],"updated-by":[{"DOI":"10.1007\/s12650-025-01089-y","type":"correction","label":"Correction","source":"publisher","updated":{"date-parts":[[2025,11,13]],"date-time":"2025-11-13T00:00:00Z","timestamp":1762992000000}}],"container-title":["Journal of Visualization"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12650-025-01043-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s12650-025-01043-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s12650-025-01043-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,14]],"date-time":"2025-11-14T12:32:47Z","timestamp":1763123567000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s12650-025-01043-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,2,10]]},"references-count":36,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["1043"],"URL":"https:\/\/doi.org\/10.1007\/s12650-025-01043-y","relation":{},"ISSN":["1343-8875","1875-8975"],"issn-type":[{"value":"1343-8875","type":"print"},{"value":"1875-8975","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,2,10]]},"assertion":[{"value":"7 October 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 January 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 February 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 November 2025","order":5,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Update","order":6,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The original online version of this article was revised due to change in title and removal of incorrect citation \u201c[4]\u201d from introduction.","order":7,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 November 2025","order":8,"name":"change_date","label":"Change Date","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"Correction","order":9,"name":"change_type","label":"Change Type","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"A Correction to this paper has been published:","order":10,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"https:\/\/doi.org\/10.1007\/s12650-025-01089-y","URL":"https:\/\/doi.org\/10.1007\/s12650-025-01089-y","order":11,"name":"change_details","label":"Change Details","group":{"name":"ArticleHistory","label":"Article History"}}]}}