{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T16:36:58Z","timestamp":1773247018270,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2025M771515"],"award-info":[{"award-number":["2025M771515"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Anhui Postdoctoral Scientific Research Program Foundation","award":["2025C1166"],"award-info":[{"award-number":["2025C1166"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3762059","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:54:17Z","timestamp":1761375257000},"page":"13822-13829","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["IntentVCNet: Bridging Spatio-Temporal Gaps for Intention-Oriented Controllable Video Captioning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-6793-7628","authenticated-orcid":false,"given":"Tianheng","family":"Qiu","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China and Hefei Institutes of Physical Science, Chinese Academy of Sciences, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-5273-8212","authenticated-orcid":false,"given":"Jingchun","family":"Gao","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9561-7550","authenticated-orcid":false,"given":"Jingyu","family":"Li","sequence":"additional","affiliation":[{"name":"Institute of Artificial Intelligence, Hefei Comprehensive National Science Center, Hefei, China and State Key Lab. for Novel Software Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7274-0811","authenticated-orcid":false,"given":"Huiyi","family":"Leong","sequence":"additional","affiliation":[{"name":"University of Chicago, Chicago, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5747-3341","authenticated-orcid":false,"given":"Xuan","family":"Huang","sequence":"additional","affiliation":[{"name":"Hefei Institutes of Physical Science, Chinese Academy of Sciences, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7668-3965","authenticated-orcid":false,"given":"Xi","family":"Wang","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-4864-2008","authenticated-orcid":false,"given":"Xiaocheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology, Harbin, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5997-5169","authenticated-orcid":false,"given":"Kele","family":"Xu","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1004-8588","authenticated-orcid":false,"given":"Lan","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Shuai Bai Keqin Chen Xuejing Liu Jialin Wang Wenbin Ge Sibo Song Kai Dang Peng Wang Shijie Wang Jun Tang et al. 2025. Qwen2. 5-vl technical report. arXiv preprint arXiv:2502.13923 (2025)."},{"key":"e_1_3_2_1_2_1","volume-title":"Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65-72","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65-72."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3731715.3733347"},{"key":"e_1_3_2_1_4_1","volume-title":"Videollm: Modeling video sequence with large language models. arXiv preprint arXiv:2305.13292","author":"Chen Guo","year":"2023","unstructured":"Guo Chen, Yin-Dong Zheng, Jiahao Wang, Jilan Xu, Yifei Huang, Junting Pan, Yi Wang, Yali Wang, Yu Qiao, Tong Lu, et al., 2023c. Videollm: Modeling video sequence with large language models. arXiv preprint arXiv:2305.13292 (2023)."},{"key":"e_1_3_2_1_5_1","volume-title":"Shikra: Unleashing multimodal llm's referential dialogue magic. arXiv preprint arXiv:2306.15195","author":"Chen Keqin","year":"2023","unstructured":"Keqin Chen, Zhao Zhang, Weili Zeng, Richong Zhang, Feng Zhu, and Rui Zhao. 2023b. Shikra: Unleashing multimodal llm's referential dialogue magic. arXiv preprint arXiv:2306.15195 (2023)."},{"key":"e_1_3_2_1_6_1","first-page":"72842","article-title":"Vast: A vision-audio-subtitle-text omni-modality foundation model and dataset","volume":"36","author":"Chen Sihan","year":"2023","unstructured":"Sihan Chen, Handong Li, Qunbo Wang, Zijia Zhao, Mingzhen Sun, Xinxin Zhu, and Jing Liu. 2023a. Vast: A vision-audio-subtitle-text omni-modality foundation model and dataset. Advances in Neural Information Processing Systems, Vol. 36 (2023), 72842-72866.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-54407-6_18"},{"key":"e_1_3_2_1_8_1","unstructured":"Zesen Cheng Sicong Leng Hang Zhang Yifei Xin Xin Li Guanzheng Chen Yongxin Zhu Wenqi Zhang Ziyang Luo Deli Zhao et al. 2024. Videollama 2: Advancing spatial-temporal modeling and audio understanding in video-llms. arXiv preprint arXiv:2406.07476 (2024)."},{"key":"e_1_3_2_1_9_1","volume-title":"Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys.org (accessed","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Ziqing Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E Gonzalez, et al., 2023. Vicuna: An open-source chatbot impressing gpt-4 with 90%* chatgpt quality. See https:\/\/vicuna. lmsys.org (accessed 14 April 2023), Vol. 2, 3 (2023), 6."},{"key":"e_1_3_2_1_10_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00552"},{"key":"e_1_3_2_1_12_1","volume-title":"International Conference on Machine Learning. PMLR, 2790-2799","author":"Houlsby Neil","year":"2019","unstructured":"Neil Houlsby, Andrei Giurgiu, Stanislaw Jastrzebski, Bruna Morrone, Quentin De Laroussilhe, Andrea Gesmundo, Mona Attariyan, and Sylvain Gelly. 2019. Parameter-efficient transfer learning for NLP. In International Conference on Machine Learning. PMLR, 2790-2799."},{"key":"e_1_3_2_1_13_1","first-page":"3","article-title":"Lora: Low-rank adaptation of large language models","volume":"1","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, Weizhu Chen, et al., 2022. Lora: Low-rank adaptation of large language models. ICLR, Vol. 1, 2 (2022), 3.","journal-title":"ICLR"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3746027.3762057"},{"key":"e_1_3_2_1_15_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730-19742."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/307"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"e_1_3_2_1_18_1","unstructured":"Yiming Li and Zhao Zhang. 2024. The First Place Solution of WSDM Cup 2024: Leveraging Large Language Models for Conversational Multi-Doc QA. arXiv:2402.18385 [cs.CL]"},{"key":"e_1_3_2_1_19_1","volume-title":"Van Tu Vu, et al","author":"Li Zhaowei","year":"2024","unstructured":"Zhaowei Li, Qi Xu, Dong Zhang, Hang Song, Yiqing Cai, Qi Qi, Ran Zhou, Junting Pan, Zefeng Li, Van Tu Vu, et al., 2024. Groundinggpt: Language enhanced multi-modal grounding model. arXiv preprint arXiv:2401.06071 (2024)."},{"key":"e_1_3_2_1_20_1","volume-title":"Video-llava: Learning united visual representation by alignment before projection. arXiv preprint arXiv:2311.10122","author":"Lin Bin","year":"2023","unstructured":"Bin Lin, Yang Ye, Bin Zhu, Jiaxi Cui, Munan Ning, Peng Jin, and Li Yuan. 2023. Video-llava: Learning united visual representation by alignment before projection. arXiv preprint arXiv:2311.10122 (2023)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01742"},{"key":"e_1_3_2_1_22_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems","author":"Liu Haotian","year":"2023","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2023. Visual instruction tuning. Advances in neural information processing systems, Vol. 36 (2023), 34892-34916."},{"key":"e_1_3_2_1_23_1","volume-title":"Macaw-llm: Multi-modal language modeling with image, audio, video, and text integration. arXiv preprint arXiv:2306.09093","author":"Lyu Chenyang","year":"2023","unstructured":"Chenyang Lyu, Minghao Wu, Longyue Wang, Xinting Huang, Bingshuai Liu, Zefeng Du, Shuming Shi, and Zhaopeng Tu. 2023. Macaw-llm: Multi-modal language modeling with image, audio, video, and text integration. arXiv preprint arXiv:2306.09093 (2023)."},{"key":"e_1_3_2_1_24_1","volume-title":"European Conference on Computer Vision. Springer, 417-435","author":"Ma Chuofan","year":"2024","unstructured":"Chuofan Ma, Yi Jiang, Jiannan Wu, Zehuan Yuan, and Xiaojuan Qi. 2024. Groma: Localized visual tokenization for grounding multimodal large language models. In European Conference on Computer Vision. Springer, 417-435."},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311-318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311-318."},{"key":"e_1_3_2_1_26_1","volume-title":"Reinforced video captioning with entailment rewards. arXiv preprint arXiv:1708.02300","author":"Pasunuru Ramakanth","year":"2017","unstructured":"Ramakanth Pasunuru and Mohit Bansal. 2017. Reinforced video captioning with entailment rewards. arXiv preprint arXiv:1708.02300 (2017)."},{"key":"e_1_3_2_1_27_1","volume-title":"Kosmos-2: Grounding multimodal large language models to the world. arXiv preprint arXiv:2306.14824","author":"Peng Zhiliang","year":"2023","unstructured":"Zhiliang Peng, Wenhui Wang, Li Dong, Yaru Hao, Shaohan Huang, Shuming Ma, and Furu Wei. 2023. Kosmos-2: Grounding multimodal large language models to the world. arXiv preprint arXiv:2306.14824 (2023)."},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of Workshop on Text Summarization of ACL, Spain.","author":"Lin CY","year":"2004","unstructured":"Lin CY ROUGE. 2004. A package for automatic evaluation of summaries. In Proceedings of Workshop on Text Summarization of ACL, Spain."},{"key":"e_1_3_2_1_29_1","volume-title":"Audio-visual llm for video understanding. arXiv preprint arXiv:2312.06720","author":"Shu Fangxun","year":"2023","unstructured":"Fangxun Shu, Lei Zhang, Hao Jiang, and Cihang Xie. 2023. Audio-visual llm for video understanding. arXiv preprint arXiv:2312.06720 (2023)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3183402"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2024.3359045"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611726"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00756"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3663976.3664004"},{"key":"e_1_3_2_1_35_1","volume-title":"Video: Fine-grained Object-centric Captioning via Spatiotemporal Multimodal Prompting. arXiv preprint arXiv:2504.05541","author":"Tang Yunlong","year":"2025","unstructured":"Yunlong Tang, Jing Bi, Chao Huang, Susan Liang, Daiki Shimada, Hang Hua, Yunzhong Xiao, Yizhi Song, Pinxin Liu, Mingqian Feng, et al., 2025. Caption Anything in Video: Fine-grained Object-centric Captioning via Spatiotemporal Multimodal Prompting. arXiv preprint arXiv:2504.05541 (2025)."},{"key":"e_1_3_2_1_36_1","volume-title":"Videomae: Masked autoencoders are data-efficient learners for self-supervised video pre-training. Advances in neural information processing systems","author":"Tong Zhan","year":"2022","unstructured":"Zhan Tong, Yibing Song, Jue Wang, and Limin Wang. 2022. Videomae: Masked autoencoders are data-efficient learners for self-supervised video pre-training. Advances in neural information processing systems, Vol. 35 (2022), 10078-10093."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00273"},{"key":"e_1_3_2_1_39_1","volume-title":"European Conference on Computer Vision. Springer, 166-185","author":"Wang Han","year":"2024","unstructured":"Han Wang, Yongjie Ye, Yanjie Wang, Yuxiang Nie, and Can Huang. 2024b. Elysium: Exploring object-level perception in videos via mllm. In European Conference on Computer Vision. Springer, 166-185."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00443"},{"key":"e_1_3_2_1_41_1","volume-title":"European Conference on Computer Vision. Springer, 396-416","author":"Wang Yi","year":"2024","unstructured":"Yi Wang, Kunchang Li, Xinhao Li, Jiashuo Yu, Yinan He, Guo Chen, Baoqi Pei, Rongkun Zheng, Zun Wang, Yansong Shi, et al., 2024a. Internvideo2: Scaling foundation models for multimodal video understanding. In European Conference on Computer Vision. Springer, 396-416."},{"key":"e_1_3_2_1_42_1","unstructured":"Yi Wang Xinhao Li Ziang Yan Yinan He Jiashuo Yu Xiangyu Zeng Chenting Wang Changlian Ma Haian Huang Jianfei Gao et al. 2025. InternVideo2. 5: Empowering Video MLLMs with Long and Rich Context Modeling. arXiv preprint arXiv:2501.12386 (2025)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICME46284.2020.9102967"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01302"},{"key":"e_1_3_2_1_45_1","volume-title":"Slowfast-llava: A strong training-free baseline for video large language models. arXiv preprint arXiv:2407.15841","author":"Xu Mingze","year":"2024","unstructured":"Mingze Xu, Mingfei Gao, Zhe Gan, Hong-You Chen, Zhengfeng Lai, Haiming Gang, Kai Kang, and Afshin Dehghan. 2024. Slowfast-llava: A strong training-free baseline for video large language models. arXiv preprint arXiv:2407.15841 (2024)."},{"key":"e_1_3_2_1_46_1","volume-title":"Clip-vip: Adapting pre-trained image-text model to video-language representation alignment. arXiv preprint arXiv:2209.06430","author":"Xue Hongwei","year":"2022","unstructured":"Hongwei Xue, Yuchong Sun, Bei Liu, Jianlong Fu, Ruihua Song, Houqiang Li, and Jiebo Luo. 2022. Clip-vip: Adapting pre-trained image-text model to video-language representation alignment. arXiv preprint arXiv:2209.06430 (2022)."},{"key":"e_1_3_2_1_47_1","unstructured":"An Yang Anfeng Li Baosong Yang Beichen Zhang Binyuan Hui Bo Zheng Bowen Yu Chang Gao Chengen Huang Chenxu Lv et al. 2025. Qwen3 technical report. arXiv preprint arXiv:2505.09388 (2025)."},{"key":"e_1_3_2_1_48_1","volume-title":"Set-of-mark prompting unleashes extraordinary visual grounding in gpt-4v. arXiv preprint arXiv:2310.11441","author":"Yang Jianwei","year":"2023","unstructured":"Jianwei Yang, Hao Zhang, Feng Li, Xueyan Zou, Chunyuan Li, and Jianfeng Gao. 2023. Set-of-mark prompting unleashes extraordinary visual grounding in gpt-4v. arXiv preprint arXiv:2310.11441 (2023)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680724"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681603"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01413"},{"key":"e_1_3_2_1_52_1","volume-title":"Ferret: Refer and ground anything anywhere at any granularity. arXiv preprint arXiv:2310.07704","author":"You Haoxuan","year":"2023","unstructured":"Haoxuan You, Haotian Zhang, Zhe Gan, Xianzhi Du, Bowen Zhang, Zirui Wang, Liangliang Cao, Shih-Fu Chang, and Yinfei Yang. 2023. Ferret: Refer and ground anything anywhere at any granularity. arXiv preprint arXiv:2310.07704 (2023)."},{"key":"e_1_3_2_1_53_1","unstructured":"Boqiang Zhang Kehan Li Zesen Cheng Zhiqiang Hu Yuqian Yuan Guanzheng Chen Sicong Leng Yuming Jiang Hang Zhang Xin Li et al. 2025a. VideoLLaMA 3: Frontier Multimodal Foundation Models for Image and Video Understanding. arXiv preprint arXiv:2501.13106 (2025)."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00852"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-91813-1_4"},{"key":"e_1_3_2_1_57_1","volume-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592","author":"Zhu Deyao","year":"2023","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2023. Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592 (2023)."},{"key":"e_1_3_2_1_58_1","volume-title":"Ovc-net: Object-oriented video captioning with temporal graph and detail enhancement. arXiv preprint arXiv:2003.03715","author":"Zhu Fangyi","year":"2020","unstructured":"Fangyi Zhu, Jenq-Neng Hwang, Zhanyu Ma, Guang Chen, and Jun Guo. 2020. Ovc-net: Object-oriented video captioning with temporal graph and detail enhancement. arXiv preprint arXiv:2003.03715 (2020)."},{"key":"e_1_3_2_1_59_1","unstructured":"Jinguo Zhu Weiyun Wang Zhe Chen Zhaoyang Liu Shenglong Ye Lixin Gu Hao Tian Yuchen Duan Weijie Su Jie Shao et al. 2025. Internvl3: Exploring advanced training and test-time recipes for open-source multimodal models. arXiv preprint arXiv:2504.10479 (2025)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3762059","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:44:01Z","timestamp":1765309441000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3762059"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":59,"alternative-id":["10.1145\/3746027.3762059","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3762059","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}