{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T16:48:23Z","timestamp":1755794903873,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Key Technology Research and Development Program of China"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3690624.3709198","type":"proceedings-article","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T18:42:22Z","timestamp":1743792142000},"page":"2124-2134","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["HRSTORY: Historical News Review Based Online Story Discovery"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0701-7318","authenticated-orcid":false,"given":"Renjie","family":"Zhou","sequence":"first","affiliation":[{"name":"Key Laboratory of Complex Systems Modeling and Simulation of the Ministry of Education, Hangzhou Dianzi University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1687-3634","authenticated-orcid":false,"given":"Haoran","family":"Ye","sequence":"additional","affiliation":[{"name":"Key Laboratory of Complex Systems Modeling and Simulation of the Ministry of Education, Hangzhou Dianzi University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9882-3029","authenticated-orcid":false,"given":"Jian","family":"Wan","sequence":"additional","affiliation":[{"name":"Zhejiang University of Science and Technology, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6403-0557","authenticated-orcid":false,"given":"Yong","family":"Liao","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, Anhui, China"}]}],"member":"320","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"On clustering massive text and categorical data streams. Knowledge and information systems 24","author":"Aggarwal Charu C","year":"2010","unstructured":"Charu C Aggarwal and Philip S Yu. 2010. On clustering massive text and categorical data streams. Knowledge and information systems 24 (2010), 171--196."},{"key":"e_1_3_2_2_2_1","volume-title":"COLING 1998","author":"Bagga Amit","year":"1998","unstructured":"Amit Bagga and Breck Baldwin. 1998. Entity-based cross-document coreferencing using the vector space model. In COLING 1998 Volume 1: The 17th International Conference on Computational Linguistics."},{"key":"e_1_3_2_2_3_1","volume-title":"Scalable k-means. arXiv preprint arXiv:1203.6402","author":"Bahmani Bahman","year":"2012","unstructured":"Bahman Bahmani, Benjamin Moseley, Andrea Vattani, Ravi Kumar, and Sergei Vassilvitskii. 2012. Scalable k-means. arXiv preprint arXiv:1203.6402 (2012)."},{"key":"e_1_3_2_2_4_1","volume-title":"Make every example count: On the stability and utility of self-influence for learning from noisy NLP datasets. arXiv preprint arXiv:2302.13959","author":"Bejan Irina","year":"2023","unstructured":"Irina Bejan, Artem Sokolov, and Katja Filippova. 2023. Make every example count: On the stability and utility of self-influence for learning from noisy NLP datasets. arXiv preprint arXiv:2302.13959 (2023)."},{"key":"e_1_3_2_2_5_1","volume-title":"Understanding and mitigating the label noise in pre-training on downstream tasks. arXiv preprint arXiv:2309.17002","author":"Chen Hao","year":"2023","unstructured":"Hao Chen, Jindong Wang, Ankit Shah, Ran Tao, Hongxin Wei, Xing Xie, Masashi Sugiyama, and Bhiksha Raj. 2023. Understanding and mitigating the label noise in pre-training on downstream tasks. arXiv preprint arXiv:2309.17002 (2023)."},{"key":"e_1_3_2_2_6_1","volume-title":"Mixtext: Linguistically-informed interpolation of hidden space for semi-supervised text classification. arXiv preprint arXiv:2004.12239","author":"Chen Jiaao","year":"2020","unstructured":"Jiaao Chen, Zichao Yang, and Diyi Yang. 2020. Mixtext: Linguistically-informed interpolation of hidden space for semi-supervised text classification. arXiv preprint arXiv:2004.12239 (2020)."},{"key":"e_1_3_2_2_7_1","volume-title":"Write Summary Step-by-Step: A Pilot Study of Stepwise Summarization","author":"Chen Xiuying","year":"2024","unstructured":"Xiuying Chen, Shen Gao, Mingzhe Li, Qingqing Zhu, Xin Gao, and Xiangliang Zhang. 2024. Write Summary Step-by-Step: A Pilot Study of Stepwise Summarization. IEEE\/ACM Transactions on Audio, Speech, and Language Processing (2024)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615272"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3655103.3655110"},{"key":"e_1_3_2_2_10_1","first-page":"688","article-title":"Using large language models in psychology","volume":"2","author":"Demszky Dorottya","year":"2023","unstructured":"Dorottya Demszky, Diyi Yang, David S Yeager, Christopher J Bryan, Margarett Clapper, Susannah Chandhok, Johannes C Eichstaedt, Cameron Hecht, Jeremy Jamieson, Meghann Johnson, et al. 2023. Using large language models in psychology. Nature Reviews Psychology 2, 11 (2023), 688--701.","journal-title":"Nature Reviews Psychology"},{"key":"e_1_3_2_2_11_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_12_1","volume-title":"John Glover, and Georgiana Ifrim.","author":"Ghalandari Demian Gholipour","year":"2020","unstructured":"Demian Gholipour Ghalandari, Chris Hokamp, Nghia The Pham, John Glover, and Georgiana Ifrim. 2020. A large-scale multi-document summarization dataset from the Wikipedia current events portal. arXiv preprint arXiv:2005.10070 (2020)."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-24797-2"},{"key":"e_1_3_2_2_14_1","volume-title":"Supervised contrastive learning for pre-trained language model fine-tuning. arXiv preprint arXiv:2011.01403","author":"Gunel Beliz","year":"2020","unstructured":"Beliz Gunel, Jingfei Du, Alexis Conneau, and Ves Stoyanov. 2020. Supervised contrastive learning for pre-trained language model fine-tuning. arXiv preprint arXiv:2011.01403 (2020)."},{"key":"e_1_3_2_2_15_1","volume-title":"Augmenting data with mixup for sentence classification: An empirical study. arXiv preprint arXiv:1905.08941","author":"Guo Hongyu","year":"2019","unstructured":"Hongyu Guo, Yongyi Mao, and Richong Zhang. 2019. Augmenting data with mixup for sentence classification: An empirical study. arXiv preprint arXiv:1905.08941 (2019)."},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1007\/BF01908075"},{"key":"e_1_3_2_2_17_1","volume-title":"Stanley Jungkyu Choi, and Minjoon Seo","author":"Jang Joel","year":"2021","unstructured":"Joel Jang, Seonghyeon Ye, Sohee Yang, Joongbo Shin, Janghoon Han, Gyeonghun Kim, Stanley Jungkyu Choi, and Minjoon Seo. 2021. Towards continual knowledge learning of language models. arXiv preprint arXiv:2110.03215 (2021)."},{"key":"e_1_3_2_2_18_1","volume-title":"Lifelong pretraining: Continually adapting language models to emerging corpora. arXiv preprint arXiv:2110.08534","author":"Jin Xisen","year":"2021","unstructured":"Xisen Jin, Dejiao Zhang, Henghui Zhu, Wei Xiao, Shang-Wen Li, Xiaokai Wei, Andrew Arnold, and Xiang Ren. 2021. Lifelong pretraining: Continually adapting language models to emerging corpora. arXiv preprint arXiv:2110.08534 (2021)."},{"key":"e_1_3_2_2_19_1","volume-title":"A survey of GPT-3 family large language models including ChatGPT and GPT-4. Natural Language Processing Journal","author":"Kalyan Katikapalli Subramanyam","year":"2023","unstructured":"Katikapalli Subramanyam Kalyan. 2023. A survey of GPT-3 family large language models including ChatGPT and GPT-4. Natural Language Processing Journal (2023), 100048."},{"key":"e_1_3_2_2_20_1","volume-title":"Noisy text data: Achilles' heel of BERT. arXiv preprint arXiv:2003.12932","author":"Kumar Ankit","year":"2020","unstructured":"Ankit Kumar, Piyush Makhija, and Anuj Gupta. 2020. Noisy text data: Achilles' heel of BERT. arXiv preprint arXiv:2003.12932 (2020)."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-2701"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512002"},{"key":"e_1_3_2_2_23_1","volume-title":"Topic Taxonomy Expansion via Hierarchy-Aware Topic Phrase Generation. arXiv preprint arXiv:2211.01981","author":"Lee Dongha","year":"2022","unstructured":"Dongha Lee, Jiaming Shen, Seonghyeon Lee, Susik Yoon, Hwanjo Yu, and Jiawei Han. 2022. Topic Taxonomy Expansion via Hierarchy-Aware Topic Phrase Generation. arXiv preprint arXiv:2211.01981 (2022)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3560815"},{"key":"e_1_3_2_2_25_1","volume-title":"DIGAT: modeling news recommendation with dual-graph interaction. arXiv preprint arXiv:2210.05196","author":"Mao Zhiming","year":"2022","unstructured":"Zhiming Mao, Jian Li, Hongru Wang, Xingshan Zeng, and Kam-Fai Wong. 2022. DIGAT: modeling news recommendation with dual-graph interaction. arXiv preprint arXiv:2210.05196 (2022)."},{"key":"e_1_3_2_2_26_1","volume-title":"Multilingual clustering of streaming news. arXiv preprint arXiv:1809.00540","author":"Miranda Sebasti\u00e3o","year":"2018","unstructured":"Sebasti\u00e3o Miranda, Shay B Cohen, and Guntis Barzdins. 2018. Multilingual clustering of streaming news. arXiv preprint arXiv:1809.00540 (2018)."},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.274"},{"key":"e_1_3_2_2_28_1","volume-title":"Noah Constant, Ji Ma, Keith B Hall, Daniel Cer, and Yinfei Yang.","author":"Ni Jianmo","year":"2021","unstructured":"Jianmo Ni, Gustavo Hernandez Abrego, Noah Constant, Ji Ma, Keith B Hall, Daniel Cer, and Yinfei Yang. 2021. Sentence-t5: Scalable sentence encoders from pre-trained text-to-text models. arXiv preprint arXiv:2108.08877 (2021)."},{"key":"e_1_3_2_2_29_1","volume-title":"Sentence-bert: Sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084","author":"Reimers Nils","year":"2019","unstructured":"Nils Reimers and Iryna Gurevych. 2019. Sentence-bert: Sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084 (2019)."},{"key":"e_1_3_2_2_30_1","volume-title":"Simplifying Multilingual News Clustering Through Projection From a Shared Space. arXiv preprint arXiv:2204.13418","author":"Santos Jo\u00e3o","year":"2022","unstructured":"Jo\u00e3o Santos, Afonso Mendes, and Sebasti\u00e3o Miranda. 2022. Simplifying Multilingual News Clustering Through Projection From a Shared Space. arXiv preprint arXiv:2204.13418 (2022)."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11227-023-05423-9"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.1037"},{"key":"e_1_3_2_2_33_1","volume-title":"Preslav Nakov, et al.","author":"Staykovski Todor","year":"2019","unstructured":"Todor Staykovski, Alberto Barr\u00f3n-Cedeno, Giovanni Da San Martino, Preslav Nakov, et al. 2019. Dense vs. sparse representations for news stream clustering. In CEUR WORKSHOP PROCEEDINGS, Vol. 2342. CEUR-WS, 47--52."},{"key":"e_1_3_2_2_34_1","volume-title":"Mixup-transformer: dynamic data augmentation for nlp tasks. arXiv preprint arXiv:2010.02394","author":"Sun Lichao","year":"2020","unstructured":"Lichao Sun, Congying Xia, Wenpeng Yin, Tingting Liang, Philip S Yu, and Lifang He. 2020. Mixup-transformer: dynamic data augmentation for nlp tasks. arXiv preprint arXiv:2010.02394 (2020)."},{"key":"e_1_3_2_2_35_1","volume-title":"Vassilis N Ioannidis, Changhe Yuan, and Chandan K Reddy.","author":"Tipirneni Sindhu","year":"2024","unstructured":"Sindhu Tipirneni, Ravinarayana Adkathimar, Nurendra Choudhary, Gaurush Hiranandani, Rana Ali Amjad, Vassilis N Ioannidis, Changhe Yuan, and Chandan K Reddy. 2024. Context-Aware Clustering using Large Language Models. arXiv preprint arXiv:2405.00988 (2024)."},{"key":"e_1_3_2_2_36_1","volume-title":"Proceedings of 15th Multiconference on Information Society. 221--224","author":"Trampu\u0161 Mitja","year":"2012","unstructured":"Mitja Trampu\u0161 and Blaz Novak. 2012. Internals of an aggregated web news feed. In Proceedings of 15th Multiconference on Information Society. 221--224."},{"key":"e_1_3_2_2_37_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553511"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3530257"},{"key":"e_1_3_2_2_40_1","volume-title":"Oolong: Investigating what makes crosslingual transfer hard with controlled studies. arXiv preprint arXiv:2202.12312","author":"Wu Zhengxuan","year":"2022","unstructured":"Zhengxuan Wu, Isabel Papadimitriou, and Alex Tamkin. 2022. Oolong: Investigating what makes crosslingual transfer hard with controlled studies. arXiv preprint arXiv:2202.12312 (2022)."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2024.103753"},{"key":"e_1_3_2_2_42_1","volume-title":"Harnessing the power of llms in practice: A survey on chatgpt and beyond. ACM Transactions on Knowledge Discovery from Data 18, 6","author":"Yang Jingfeng","year":"2024","unstructured":"Jingfeng Yang, Hongye Jin, Ruixiang Tang, Xiaotian Han, Qizhang Feng, Haoming Jiang, Shaochen Zhong, Bing Yin, and Xia Hu. 2024. Harnessing the power of llms in practice: A survey on chatgpt and beyond. ACM Transactions on Knowledge Discovery from Data 18, 6 (2024), 1--32."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583371"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591782"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583507"},{"key":"e_1_3_2_2_46_1","volume-title":"Revisiting few-sample BERT fine-tuning. arXiv preprint arXiv:2006.05987","author":"Zhang Tianyi","year":"2020","unstructured":"Tianyi Zhang, Felix Wu, Arzoo Katiyar, Kilian Q Weinberger, and Yoav Artzi. 2020. Revisiting few-sample BERT fine-tuning. arXiv preprint arXiv:2006.05987 (2020)."},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539395"},{"key":"e_1_3_2_2_48_1","unstructured":"Wayne Xin Zhao Kun Zhou Junyi Li Tianyi Tang Xiaolei Wang Yupeng Hou Yingqian Min Beichen Zhang Junjie Zhang Zican Dong et al. 2023. A survey of large language models. arXiv preprint arXiv:2303.18223 (2023)."},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2023.119355"},{"key":"e_1_3_2_2_50_1","volume-title":"Large language models for information retrieval: A survey. arXiv preprint arXiv:2308.07107","author":"Zhu Yutao","year":"2023","unstructured":"Yutao Zhu, Huaying Yuan, Shuting Wang, Jiongnan Liu, Wenhan Liu, Chenlong Deng, Zhicheng Dou, and Ji-Rong Wen. 2023. Large language models for information retrieval: A survey. arXiv preprint arXiv:2308.07107 (2023)."}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"],"location":"Toronto ON Canada","acronym":"KDD '25"},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709198","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3690624.3709198","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T15:33:08Z","timestamp":1755358388000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709198"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":50,"alternative-id":["10.1145\/3690624.3709198","10.1145\/3690624"],"URL":"https:\/\/doi.org\/10.1145\/3690624.3709198","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-07-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}