{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,22]],"date-time":"2026-07-22T13:37:35Z","timestamp":1784727455155,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":33,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,8,24]],"date-time":"2024-08-24T00:00:00Z","timestamp":1724457600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,8,25]]},"DOI":"10.1145\/3637528.3671647","type":"proceedings-article","created":{"date-parts":[[2024,8,25]],"date-time":"2024-08-25T04:55:12Z","timestamp":1724561712000},"page":"5836-5847","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":55,"title":["TnT-LLM: Text Mining at Scale with Large Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5298-1221","authenticated-orcid":false,"given":"Mengting","family":"Wan","sequence":"first","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3553-4331","authenticated-orcid":false,"given":"Tara","family":"Safavi","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9239-6211","authenticated-orcid":false,"given":"Sujay Kumar","family":"Jauhar","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0649-7471","authenticated-orcid":false,"given":"Yujin","family":"Kim","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1507-5200","authenticated-orcid":false,"given":"Scott","family":"Counts","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1157-018X","authenticated-orcid":false,"given":"Jennifer","family":"Neville","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1318-8140","authenticated-orcid":false,"given":"Siddharth","family":"Suri","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3797-4293","authenticated-orcid":false,"given":"Chirag","family":"Shah","sequence":"additional","affiliation":[{"name":"University of Washington, Seattle, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0265-4249","authenticated-orcid":false,"given":"Ryen W.","family":"White","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6615-8615","authenticated-orcid":false,"given":"Longqi","family":"Yang","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4878-6781","authenticated-orcid":false,"given":"Reid","family":"Andersen","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4044-3756","authenticated-orcid":false,"given":"Georg","family":"Buscher","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6144-1721","authenticated-orcid":false,"given":"Dhruv","family":"Joshi","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3231-2863","authenticated-orcid":false,"given":"Nagu","family":"Rangan","sequence":"additional","affiliation":[{"name":"Microsoft Corporation, Redmond, WA, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,8,24]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"A survey of text clustering algorithms. Mining text data","author":"Aggarwal Charu C","year":"2012","unstructured":"Charu C Aggarwal and ChengXiang Zhai. 2012. A survey of text clustering algorithms. Mining text data (2012), 77--128."},{"key":"e_1_3_2_2_2_1","first-page":"1027","article-title":"k-means: The advantages of careful seeding","volume":"7","author":"Arthur David","year":"2007","unstructured":"David Arthur, Sergei Vassilvitskii, et al. 2007. k-means: The advantages of careful seeding. In Soda, Vol. 7. 1027--1035.","journal-title":"Soda"},{"key":"e_1_3_2_2_3_1","volume-title":"Online algorithms and stochastic approximations. Online learning in neural networks","author":"Bottou L\u00e9on","year":"1998","unstructured":"L\u00e9on Bottou. 1998. Online algorithms and stochastic approximations. Online learning in neural networks (1998)."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3406522.3446027"},{"key":"e_1_3_2_2_5_1","volume-title":"Reading tea leaves: How humans interpret topic models. Advances in neural information processing systems","author":"Chang Jonathan","year":"2009","unstructured":"Jonathan Chang, Sean Gerrish, Chong Wang, Jordan Boyd-Graber, and David Blei. 2009. Reading tea leaves: How humans interpret topic models. Advances in neural information processing systems, Vol. 22 (2009)."},{"key":"e_1_3_2_2_6_1","volume-title":"A coefficient of agreement for nominal scales. Educational and psychological measurement","author":"Cohen Jacob","year":"1960","unstructured":"Jacob Cohen. 1960. A coefficient of agreement for nominal scales. Educational and psychological measurement, Vol. 20, 1 (1960), 37--46."},{"key":"e_1_3_2_2_7_1","volume-title":"The equivalence of weighted kappa and the intraclass correlation coefficient as measures of reliability. Educational and psychological measurement","author":"Fleiss Joseph L","year":"1973","unstructured":"Joseph L Fleiss and Jacob Cohen. 1973. The equivalence of weighted kappa and the intraclass correlation coefficient as measures of reliability. Educational and psychological measurement, Vol. 33, 3 (1973), 613--619."},{"key":"e_1_3_2_2_8_1","volume-title":"Chatgpt outperforms crowd-workers for text-annotation tasks. arXiv preprint arXiv:2303.15056","author":"Gilardi Fabrizio","year":"2023","unstructured":"Fabrizio Gilardi, Meysam Alizadeh, and Ma\u00ebl Kubli. 2023. Chatgpt outperforms crowd-workers for text-annotation tasks. arXiv preprint arXiv:2303.15056 (2023)."},{"key":"e_1_3_2_2_9_1","volume-title":"Neural networks: a comprehensive foundation","author":"Haykin Simon","unstructured":"Simon Haykin. 1998. Neural networks: a comprehensive foundation. Prentice Hall PTR."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.21248\/jlcl.20.2005.68"},{"key":"e_1_3_2_2_11_1","volume-title":"Compressing text classification models. arXiv preprint arXiv:1612.03651","author":"Joulin Armand","year":"2016","unstructured":"Armand Joulin, Edouard Grave, Piotr Bojanowski, Matthijs Douze, H\u00e9rve J\u00e9gou, and Tomas Mikolov. 2016. FastText.zip: Compressing text classification models. arXiv preprint arXiv:1612.03651 (2016)."},{"key":"e_1_3_2_2_12_1","volume-title":"Bag of Tricks for Efficient Text Classification. arXiv preprint arXiv:1607.01759","author":"Joulin Armand","year":"2016","unstructured":"Armand Joulin, Edouard Grave, Piotr Bojanowski, and Tomas Mikolov. 2016. Bag of Tricks for Efficient Text Classification. arXiv preprint arXiv:1607.01759 (2016)."},{"key":"e_1_3_2_2_13_1","volume-title":"Lightgbm: A highly efficient gradient boosting decision tree. Advances in neural information processing systems","author":"Ke Guolin","year":"2017","unstructured":"Guolin Ke, Qi Meng, Thomas Finley, Taifeng Wang, Wei Chen, Weidong Ma, Qiwei Ye, and Tie-Yan Liu. 2017. Lightgbm: A highly efficient gradient boosting decision tree. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.948"},{"key":"e_1_3_2_2_15_1","volume-title":"Lost in the middle: How language models use long contexts. arXiv preprint arXiv:2307.03172","author":"Liu Nelson F","year":"2023","unstructured":"Nelson F Liu, Kevin Lin, John Hewitt, Ashwin Paranjape, Michele Bevilacqua, Fabio Petroni, and Percy Liang. 2023. Lost in the middle: How language models use long contexts. arXiv preprint arXiv:2307.03172 (2023)."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1002\/0471721182"},{"key":"e_1_3_2_2_17_1","volume-title":"TopicGPT: A Prompt-based Topic Modeling Framework. arXiv preprint arXiv:2311.01449","author":"Pham Chau Minh","year":"2023","unstructured":"Chau Minh Pham, Alexander Hoyle, Simeng Sun, and Mohit Iyyer. 2023. TopicGPT: A Prompt-based Topic Modeling Framework. arXiv preprint arXiv:2311.01449 (2023)."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/988672.988675"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/0377-0427(87)90125-7"},{"key":"e_1_3_2_2_21_1","first-page":"e26752","article-title":"The new york times annotated corpus","volume":"6","author":"Sandhaus Evan","year":"2008","unstructured":"Evan Sandhaus. 2008. The new york times annotated corpus. Linguistic Data Consortium, Philadelphia, Vol. 6, 12 (2008), e26752.","journal-title":"Linguistic Data Consortium, Philadelphia"},{"key":"e_1_3_2_2_22_1","volume-title":"Ali Montazer, Sathish Manivannan, Jennifer Neville, Xiaochuan Ni, et al.","author":"Shah Chirag","year":"2023","unstructured":"Chirag Shah, Ryen W White, Reid Andersen, Georg Buscher, Scott Counts, Sarkar Snigdha Sarathi Das, Ali Montazer, Sathish Manivannan, Jennifer Neville, Xiaochuan Ni, et al. 2023. Using large language models to generate, validate, and apply user intent taxonomies. arXiv preprint arXiv:2309.13063 (2023)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380259"},{"key":"e_1_3_2_2_24_1","volume-title":"Proceedings of the 2013 conference on empirical methods in natural language processing. 1631--1642","author":"Socher Richard","year":"2013","unstructured":"Richard Socher, Alex Perelygin, Jean Wu, Jason Chuang, Christopher D Manning, Andrew Y Ng, and Christopher Potts. 2013. Recursive deep models for semantic compositionality over a sentiment treebank. In Proceedings of the 2013 conference on empirical methods in natural language processing. 1631--1642."},{"key":"e_1_3_2_2_25_1","volume-title":"One Embedder","author":"Su Hongjin","unstructured":"Hongjin Su, Weijia Shi, Jungo Kasai, Yizhong Wang, Yushi Hu, Mari Ostendorf, Wen-tau Yih, Noah A. Smith, Luke Zettlemoyer, and Tao Yu. 2022. One Embedder, Any Task: Instruction-Finetuned Text Embeddings. https:\/\/arxiv.org\/abs\/2212.09741"},{"key":"e_1_3_2_2_26_1","volume-title":"Proceedings of the PAKDD 1999 workshop on knowledge discovery from advanced databases","volume":"8","author":"Ah-Hwee","unstructured":"Ah-Hwee Tan et al. 1999. Text mining: The state of the art and the challenges. In Proceedings of the PAKDD 1999 workshop on knowledge discovery from advanced databases, Vol. 8. 65--70."},{"key":"e_1_3_2_2_27_1","volume-title":"Large language models can accurately predict searcher preferences. arXiv preprint arXiv:2309.10621","author":"Thomas Paul","year":"2023","unstructured":"Paul Thomas, Seth Spielman, Nick Craswell, and Bhaskar Mitra. 2023. Large language models can accurately predict searcher preferences. arXiv preprint arXiv:2309.10621 (2023)."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.is.2020.101582"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.657"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.coling-main.429"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467308"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3220064"},{"key":"e_1_3_2_2_33_1","volume-title":"Can Large Language Models Transform Computational Social Science? arXiv preprint arXiv:2305.03514","author":"Ziems Caleb","year":"2023","unstructured":"Caleb Ziems, William Held, Omar Shaikh, Jiaao Chen, Zhehao Zhang, and Diyi Yang. 2023. Can Large Language Models Transform Computational Social Science? arXiv preprint arXiv:2305.03514 (2023)."}],"event":{"name":"KDD '24: The 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Barcelona Spain","acronym":"KDD '24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671647","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3637528.3671647","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:06:00Z","timestamp":1750291560000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3637528.3671647"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,24]]},"references-count":33,"alternative-id":["10.1145\/3637528.3671647","10.1145\/3637528"],"URL":"https:\/\/doi.org\/10.1145\/3637528.3671647","relation":{},"subject":[],"published":{"date-parts":[[2024,8,24]]},"assertion":[{"value":"2024-08-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}