{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:05:03Z","timestamp":1750309503448,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"National Natural Science Foundation of China under Grant No.62222211.","award":["62222211"],"award-info":[{"award-number":["62222211"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681695","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"10630-10639","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["VoiceTuner: Self-Supervised Pre-training and Efficient Fine-tuning For Voice Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1695-9000","authenticated-orcid":false,"given":"Rongjie","family":"Huang","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4695-3440","authenticated-orcid":false,"given":"Yongqi","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1723-6778","authenticated-orcid":false,"given":"Ruofan","family":"Hu","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4363-392X","authenticated-orcid":false,"given":"Xiaoshan","family":"Xu","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3682-4290","authenticated-orcid":false,"given":"Zhiqing","family":"Hong","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0879-4047","authenticated-orcid":false,"given":"Dongchao","family":"Yang","sequence":"additional","affiliation":[{"name":"The Chinese University of Hong Kong, Hong Kong, Hong Kong"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9708-3225","authenticated-orcid":false,"given":"Xize","family":"Cheng","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6426-3749","authenticated-orcid":false,"given":"Zehan","family":"Wang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1358-8098","authenticated-orcid":false,"given":"Ziyue","family":"Jiang","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7105-014X","authenticated-orcid":false,"given":"Zhenhui","family":"Ye","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2424-3560","authenticated-orcid":false,"given":"Luping","family":"Liu","sequence":"additional","affiliation":[{"name":"The University of Hong Kong, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4467-8505","authenticated-orcid":false,"given":"Siqi","family":"Zheng","sequence":"additional","affiliation":[{"name":"Alibaba Group, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6121-0384","authenticated-orcid":false,"given":"Zhou","family":"Zhao","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Musiclm: Generating music from text. arXiv preprint arXiv:2301.11325","author":"Agostinelli Andrea","year":"2023","unstructured":"Andrea Agostinelli, Timo I Denk, Zal\u00e1n Borsos, Jesse Engel, Mauro Verzetti, Antoine Caillon, Qingqing Huang, Aren Jansen, Adam Roberts, Marco Tagliasacchi, et al. 2023. Musiclm: Generating music from text. arXiv preprint arXiv:2301.11325 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in neural information processing systems 33","author":"Baevski Alexei","year":"2020","unstructured":"Alexei Baevski, Yuhao Zhou, Abdelrahman Mohamed, and Michael Auli. 2020. wav2vec 2.0: A framework for self-supervised learning of speech representations. Advances in neural information processing systems 33 (2020), 12449--12460."},{"key":"e_1_3_2_1_3_1","volume-title":"Audiolm: a language modeling approach to audio generation. arXiv preprint arXiv:2209.03143","author":"Borsos Zal\u00e1n","year":"2022","unstructured":"Zal\u00e1n Borsos, Rapha\u00ebl Marinier, Damien Vincent, Eugene Kharitonov, Olivier Pietquin, Matt Sharifi, Olivier Teboul, David Grangier, Marco Tagliasacchi, and Neil Zeghidour. 2022. Audiolm: a language modeling approach to audio generation. arXiv preprint arXiv:2209.03143 (2022)."},{"key":"e_1_3_2_1_4_1","volume-title":"International Conference on Machine Learning. PMLR, 2709--2720","author":"Casanova Edresson","year":"2022","unstructured":"Edresson Casanova, JulianWeber, Christopher D Shulby, Arnaldo Candido Junior, Eren G\u00f6lge, and Moacir A Ponti. 2022. Yourtts: Towards zero-shot multi-speaker tts and zero-shot voice conversion for everyone. In International Conference on Machine Learning. PMLR, 2709--2720."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"e_1_3_2_1_6_1","volume-title":"High fidelity neural audio compression. arXiv preprint arXiv:2210.13438","author":"D\u00e9fossez Alexandre","year":"2022","unstructured":"Alexandre D\u00e9fossez, Jade Copet, Gabriel Synnaeve, and Yossi Adi. 2022. High fidelity neural audio compression. arXiv preprint arXiv:2210.13438 (2022)."},{"key":"e_1_3_2_1_7_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096285"},{"key":"e_1_3_2_1_9_1","volume-title":"The curious case of neural text degeneration. arXiv preprint arXiv:1904.09751","author":"Holtzman Ari","year":"2019","unstructured":"Ari Holtzman, Jan Buys, Li Du, Maxwell Forbes, and Yejin Choi. 2019. The curious case of neural text degeneration. arXiv preprint arXiv:1904.09751 (2019)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"e_1_3_2_1_11_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_1_12_1","first-page":"28708","article-title":"Masked autoencoders that listen","volume":"35","author":"Huang Po-Yao","year":"2022","unstructured":"Po-Yao Huang, Hu Xu, Juncheng Li, Alexei Baevski, Michael Auli, Wojciech Galuba, Florian Metze, and Christoph Feichtenhofer. 2022. Masked autoencoders that listen. Advances in Neural Information Processing Systems 35 (2022), 28708--28720.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475437"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547854"},{"key":"e_1_3_2_1_15_1","unstructured":"Ziyue Jiang Yi Ren Zhenhui Ye Jinglin Liu Chen Zhang Qian Yang Shengpeng Ji Rongjie Huang Chunfeng Wang Xiang Yin et al. 2023. Mega-TTS: Zero-Shot Text-to-Speech at Scale with Intrinsic Inductive Bias. arXiv preprint arXiv:2306.03509 (2023)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9052942"},{"key":"e_1_3_2_1_17_1","volume-title":"Read and Prompt: High-Fidelity Text-to-Speech with Minimal Supervision. arXiv preprint arXiv:2302.03540","author":"Kharitonov Eugene","year":"2023","unstructured":"Eugene Kharitonov, Damien Vincent, Zal\u00e1n Borsos, Rapha\u00ebl Marinier, Sertan Girgin, Olivier Pietquin, Matt Sharifi, Marco Tagliasacchi, and Neil Zeghidour. 2023. Speak, Read and Prompt: High-Fidelity Text-to-Speech with Minimal Supervision. arXiv preprint arXiv:2302.03540 (2023)."},{"key":"e_1_3_2_1_18_1","volume-title":"International Conference on Machine Learning. PMLR, 5530--5540","author":"Kim Jaehyeon","year":"2021","unstructured":"Jaehyeon Kim, Jungil Kong, and Juhee Son. 2021. Conditional variational autoencoder with adversarial learning for end-to-end text-to-speech. In International Conference on Machine Learning. PMLR, 5530--5540."},{"key":"e_1_3_2_1_19_1","volume-title":"Proc. of NeurIPS","author":"Kong Jungil","year":"2020","unstructured":"Jungil Kong, Jaehyeon Kim, and Jaekyoung Bae. 2020. HiFi-GAN: Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis. Proc. of NeurIPS (2020)."},{"key":"e_1_3_2_1_20_1","volume-title":"Audiogen: Textually guided audio generation. arXiv preprint arXiv:2209.15352","author":"Kreuk Felix","year":"2022","unstructured":"Felix Kreuk, Gabriel Synnaeve, Adam Polyak, Uriel Singer, Alexandre D\u00e9fossez, Jade Copet, Devi Parikh, Yaniv Taigman, and Yossi Adi. 2022. Audiogen: Textually guided audio generation. arXiv preprint arXiv:2209.15352 (2022)."},{"key":"e_1_3_2_1_21_1","first-page":"1336","article-title":"On generative spoken language modeling from raw audio","volume":"9","author":"Lakhotia Kushal","year":"2021","unstructured":"Kushal Lakhotia, Eugene Kharitonov, Wei-Ning Hsu, Yossi Adi, Adam Polyak, Benjamin Bolte, Tu-Anh Nguyen, Jade Copet, Alexei Baevski, Abdelrahman Mohamed, et al. 2021. On generative spoken language modeling from raw audio. Transactions of the Association for Computational Linguistics 9 (2021), 1336--1354.","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"e_1_3_2_1_22_1","volume-title":"Bigvgan: A universal neural vocoder with large-scale training. arXiv preprint arXiv:2206.04658","author":"Ping Wei","year":"2022","unstructured":"Sang-gil Lee, Wei Ping, Boris Ginsburg, Bryan Catanzaro, and Sungroh Yoon. 2022. Bigvgan: A universal neural vocoder with large-scale training. arXiv preprint arXiv:2206.04658 (2022)."},{"key":"e_1_3_2_1_23_1","volume-title":"Wei-Ning Hsu, Michael Auli, Alexei Baevski, and James Glass.","author":"Liu Alexander H","year":"2022","unstructured":"Alexander H Liu, Cheng-I Jeff Lai, Wei-Ning Hsu, Michael Auli, Alexei Baevski, and James Glass. 2022. Simple and effective unsupervised speech synthesis. arXiv preprint arXiv:2204.02524 (2022)."},{"key":"e_1_3_2_1_24_1","volume-title":"Generative Pre-training for Speech with Flow Matching. arXiv preprint arXiv:2310.16338","author":"Liu Alexander H","year":"2023","unstructured":"Alexander H Liu, Matt Le, Apoorv Vyas, Bowen Shi, Andros Tjandra, and Wei-Ning Hsu. 2023. Generative Pre-training for Speech with Flow Matching. arXiv preprint arXiv:2310.16338 (2023)."},{"key":"e_1_3_2_1_25_1","volume-title":"AudioLDM 2: Learning holistic audio generation with self-supervised pretraining. arXiv preprint arXiv:2308.05734","author":"Liu Haohe","year":"2023","unstructured":"Haohe Liu, Qiao Tian, Yi Yuan, Xubo Liu, Xinhao Mei, Qiuqiang Kong, Yuping Wang, Wenwu Wang, Yuxuan Wang, and Mark D Plumbley. 2023. AudioLDM 2: Learning holistic audio generation with self-supervised pretraining. arXiv preprint arXiv:2308.05734 (2023)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i10.21350"},{"key":"e_1_3_2_1_27_1","volume-title":"fairseq: A fast, extensible toolkit for sequence modeling. arXiv preprint arXiv:1904.01038","author":"Ott Myle","year":"2019","unstructured":"Myle Ott, Sergey Edunov, Alexei Baevski, Angela Fan, Sam Gross, Nathan Ng, David Grangier, and Michael Auli. 2019. fairseq: A fast, extensible toolkit for sequence modeling. arXiv preprint arXiv:1904.01038 (2019)."},{"key":"e_1_3_2_1_28_1","volume-title":"International Conference on Machine Learning. PMLR, 7836--7846","author":"Qian Kaizhi","year":"2020","unstructured":"Kaizhi Qian, Yang Zhang, Shiyu Chang, Mark Hasegawa-Johnson, and David Cox. 2020. Unsupervised speech decomposition via triple information bottleneck. In International Conference on Machine Learning. PMLR, 7836--7846."},{"key":"e_1_3_2_1_29_1","volume-title":"International Conference on Machine Learning. PMLR, 28492--28518","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, JongWook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust speech recognition via large-scale weak supervision. In International Conference on Machine Learning. PMLR, 28492--28518."},{"key":"e_1_3_2_1_30_1","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, Peter J Liu, et al. 2020. Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21, 140 (2020), 1--67.","journal-title":"J. Mach. Learn. Res."},{"key":"e_1_3_2_1_31_1","volume-title":"Fastspeech: Fast, robust and controllable text to speech. Advances in Neural Information Processing Systems 32","author":"Ren Yi","year":"2019","unstructured":"Yi Ren, Yangjun Ruan, Xu Tan, Tao Qin, Sheng Zhao, Zhou Zhao, and Tie-Yan Liu. 2019. Fastspeech: Fast, robust and controllable text to speech. Advances in Neural Information Processing Systems 32 (2019)."},{"key":"e_1_3_2_1_32_1","volume-title":"The Eleventh International Conference on Learning Representations.","author":"Ren Yi","year":"2022","unstructured":"Yi Ren, Chen Zhang, and YAN Shuicheng. 2022. Bag of tricks for unsupervised text-to-speech. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_33_1","volume-title":"Token-level ensemble distillation for grapheme-to-phoneme conversion. arXiv preprint arXiv:1904.03446","author":"Sun Hao","year":"2019","unstructured":"Hao Sun, Xu Tan, Jun-Wei Gan, Hongzhi Liu, Sheng Zhao, Tao Qin, and Tie-Yan Liu. 2019. Token-level ensemble distillation for grapheme-to-phoneme conversion. arXiv preprint arXiv:1904.03446 (2019)."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/MP.2006.1664069"},{"key":"e_1_3_2_1_35_1","first-page":"15","article-title":"CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit. University of Edinburgh","volume":"6","author":"Veaux Christophe","year":"2017","unstructured":"Christophe Veaux, Junichi Yamagishi, Kirsten MacDonald, et al. 2017. CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit. University of Edinburgh. The Centre for Speech Technology Research (CSTR) 6 (2017), 15.","journal-title":"The Centre for Speech Technology Research (CSTR)"},{"key":"e_1_3_2_1_36_1","volume-title":"Audiobox: Unified Audio Generation with Natural Language Prompts. arXiv preprint arXiv:2312.15821","author":"Vyas Apoorv","year":"2023","unstructured":"Apoorv Vyas, Bowen Shi, Matthew Le, Andros Tjandra, Yi-Chiao Wu, Baishan Guo, Jiemin Zhang, Xinyue Zhang, Robert Adkins, William Ngan, et al. 2023. Audiobox: Unified Audio Generation with Natural Language Prompts. arXiv preprint arXiv:2312.15821 (2023)."},{"key":"e_1_3_2_1_37_1","unstructured":"Chengyi Wang Sanyuan Chen Yu Wu Ziqiang Zhang Long Zhou Shujie Liu Zhuo Chen Yanqing Liu Huaming Wang Jinyu Li et al. 2023. Neural Codec Language Models are Zero-Shot Text to Speech Synthesizers. arXiv preprint arXiv:2301.02111 (2023)."},{"key":"e_1_3_2_1_38_1","volume-title":"Tacotron: Towards end-to-end speech synthesis. arXiv preprint arXiv:1703.10135","author":"Wang Yuxuan","year":"2017","unstructured":"Yuxuan Wang, RJ Skerry-Ryan, Daisy Stanton, Yonghui Wu, Ron J Weiss, Navdeep Jaitly, Zongheng Yang, Ying Xiao, Zhifeng Chen, Samy Bengio, et al. 2017. Tacotron: Towards end-to-end speech synthesis. arXiv preprint arXiv:1703.10135 (2017)."},{"key":"e_1_3_2_1_39_1","volume-title":"Opencpop: A high-quality open source chinese popular song corpus for singing voice synthesis. arXiv preprint arXiv:2201.07429","author":"Wang Yu","year":"2022","unstructured":"Yu Wang, Xinsheng Wang, Pengcheng Zhu, Jie Wu, Hanzhao Li, Heyang Xue, Yongmao Zhang, Lei Xie, and Mengxiao Bi. 2022. Opencpop: A high-quality open source chinese popular song corpus for singing voice synthesis. arXiv preprint arXiv:2201.07429 (2022)."},{"key":"e_1_3_2_1_40_1","unstructured":"Dongchao Yang Jinchuan Tian Xu Tan Rongjie Huang Songxiang Liu Xuankai Chang Jiatong Shi Sheng Zhao Jiang Bian Xixin Wu et al. 2023. UniAudio: An Audio Foundation Model Toward Universal Audio Generation. arXiv preprint arXiv:2310.00704 (2023)."},{"key":"e_1_3_2_1_41_1","volume-title":"Megabyte: Predicting million-byte sequences with multiscale transformers. arXiv preprint arXiv:2305.07185","author":"Yu Lili","year":"2023","unstructured":"Lili Yu, D\u00e1niel Simig, Colin Flaherty, Armen Aghajanyan, Luke Zettlemoyer, and Mike Lewis. 2023. Megabyte: Predicting million-byte sequences with multiscale transformers. arXiv preprint arXiv:2305.07185 (2023)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3129994"},{"key":"e_1_3_2_1_43_1","volume-title":"LibriTTS: A corpus derived from LibriSpeech for text-tospeech. arXiv preprint arXiv:1904.02882","author":"Zen Heiga","year":"2019","unstructured":"Heiga Zen, Viet Dang, Rob Clark, Yu Zhang, Ron J Weiss, Ye Jia, Zhifeng Chen, and Yonghui Wu. 2019. LibriTTS: A corpus derived from LibriSpeech for text-tospeech. arXiv preprint arXiv:1904.02882 (2019)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746682"},{"key":"e_1_3_2_1_45_1","first-page":"6914","article-title":"M4Singer: A Multi-Style, Multi-Singer and Musical Score Provided Mandarin Singing Corpus","volume":"35","author":"Zhang Lichao","year":"2022","unstructured":"Lichao Zhang, Ruiqi Li, ShoutongWang, Liqun Deng, Jinglin Liu, Yi Ren, Jinzheng He, Rongjie Huang, Jieming Zhu, Xiao Chen, et al. 2022. M4Singer: A Multi-Style, Multi-Singer and Musical Score Provided Mandarin Singing Corpus. Advances in Neural Information Processing Systems 35 (2022), 6914--6926.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_46_1","volume-title":"Llama-adapter: Efficient fine-tuning of language models with zero-init attention. arXiv preprint arXiv:2303.16199","author":"Zhang Renrui","year":"2023","unstructured":"Renrui Zhang, Jiaming Han, Aojun Zhou, Xiangfei Hu, Shilin Yan, Pan Lu, Hongsheng Li, Peng Gao, and Yu Qiao. 2023. Llama-adapter: Efficient fine-tuning of language models with zero-init attention. arXiv preprint arXiv:2303.16199 (2023)."},{"key":"e_1_3_2_1_47_1","unstructured":"Ziqiang Zhang Long Zhou Chengyi Wang Sanyuan Chen Yu Wu Shujie Liu Zhuo Chen Yanqing Liu Huaming Wang Jinyu Li et al. 2023. Speak foreign languages with your own voice: Cross-lingual neural codec language modeling. arXiv preprint arXiv:2303.03926 (2023)."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681695","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681695","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:50Z","timestamp":1750295870000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681695"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":47,"alternative-id":["10.1145\/3664647.3681695","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681695","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}