{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,11]],"date-time":"2026-07-11T17:30:47Z","timestamp":1783791047905,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":87,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.:62276155, No.:62476071, No.:U24A20328, No.:624B2047"],"award-info":[{"award-number":["No.:62276155, No.:62476071, No.:U24A20328, No.:624B2047"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Guangdong Basic and Applied Basic Research Foundation","award":["No.:2025A1515011732"],"award-info":[{"award-number":["No.:2025A1515011732"]}]},{"name":"China National University Student Innovation & Entrepreneurship Development Program","award":["No.:202410422071"],"award-info":[{"award-number":["No.:202410422071"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755445","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:50:47Z","timestamp":1761371447000},"page":"6143-6152","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["HUD: Hierarchical Uncertainty-Aware Disambiguation Network for Composed Video Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-0365-8553","authenticated-orcid":false,"given":"Zhiwei","family":"Chen","sequence":"first","affiliation":[{"name":"School of Software, Shandong University, Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5653-8286","authenticated-orcid":false,"given":"Yupeng","family":"Hu","sequence":"additional","affiliation":[{"name":"School of Software, Shandong University, Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-5136-159X","authenticated-orcid":false,"given":"Zixu","family":"Li","sequence":"additional","affiliation":[{"name":"School of Software, Shandong University, Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-7724-5662","authenticated-orcid":false,"given":"Zhiheng","family":"Fu","sequence":"additional","affiliation":[{"name":"School of Software, Shandong University, Jinan, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0633-3722","authenticated-orcid":false,"given":"Haokun","family":"Wen","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Harbin Institute of Technology (Shenzhen), Shenzhen, China and School of Data Science, City University of Hong Kong, Hong Kong, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5658-5509","authenticated-orcid":false,"given":"Weili","family":"Guan","sequence":"additional","affiliation":[{"name":"School of Information Science and Technology, Harbin Institute of Technology (Shenzhen), Shenzhen, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3662732"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1093\/nsr\/nwad141"},{"key":"e_1_3_2_1_3_1","unstructured":"Xingjian Wu Xiangfei Qiu Hongfan Gao Jilin Hu Bin Yang and Chenjuan Guo. 2025. K^2VAE: A Koopman-Kalman Enhanced Variational AutoEncoder for Probabilistic Time Series Forecasting. In ICML."},{"key":"e_1_3_2_1_4_1","volume-title":"SSFold: Learning to Fold Arbitrary Crumpled Cloth Using Graph Dynamics from Human Demonstration. arXiv preprint arXiv:2411.02608","author":"Zhou Changshi","year":"2024","unstructured":"Changshi Zhou, Haichuan Xu, Jiarui Hu, Feng Luan, Zhipeng Wang, Yanchao Dong, Yanmin Zhou, and Bin He. 2024. SSFold: Learning to Fold Arbitrary Crumpled Cloth Using Graph Dynamics from Human Demonstration. arXiv preprint arXiv:2411.02608 (2024)."},{"key":"e_1_3_2_1_5_1","first-page":"1185","article-title":"DUET","author":"Qiu Xiangfei","year":"2025","unstructured":"Xiangfei Qiu, Xingjian Wu, Yan Lin, Chenjuan Guo, Jilin Hu, and Bin Yang. 2025. DUET: Dual Clustering Enhanced Multivariate Time Series Forecasting. In SIGKDD. 1185-1196.","journal-title":"Dual Clustering Enhanced Multivariate Time Series Forecasting. In SIGKDD."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.1583"},{"key":"e_1_3_2_1_7_1","unstructured":"Zhenlong Yuan Cong Liu Fei Shen Zhaoxin Li Jinguo Luo Tianlu Mao and Zhaoqi Wang. 2024. MSP-MVS : Multi-Granularity Segmentation Prior Guided Multi-View Stereo . showeprint2407.19323"},{"key":"e_1_3_2_1_8_1","volume-title":"Bimcv-r: A landmark dataset for 3d ct text-image retrieval","author":"Chen Yinda","year":"2024","unstructured":"Yinda Chen, Che Liu, Xiaoyu Liu, Rossella Arcucci, and Zhiwei Xiong. 2024. Bimcv-r: A landmark dataset for 3d ct text-image retrieval. In MICCAI. Springer Nature Switzerland, 124-134."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/EMBC48229.2022.9870824"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.27977"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i6.28334"},{"key":"e_1_3_2_1_12_1","volume-title":"CoVR-2: Automatic Data Construction for Composed Video Retrieval","author":"Ventura Lucas","year":"2024","unstructured":"Lucas Ventura, Antoine Yang, Cordelia Schmid, and G\u00fcl Varol. 2024b. CoVR-2: Automatic Data Construction for Composed Video Retrieval. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02540"},{"key":"e_1_3_2_1_14_1","volume-title":"European Conference on Computer Vision. Springer, 1-17","author":"Hummel Thomas","year":"2024","unstructured":"Thomas Hummel, Shyamgopal Karthik, Mariana-Iuliana Georgescu, and Zeynep Akata. 2024. Egocvr: An egocentric benchmark for fine-grained composed video retrieval. In European Conference on Computer Vision. Springer, 1-17."},{"key":"e_1_3_2_1_15_1","volume-title":"The International Conference on Learning Representations.","author":"Yue WU","year":"2025","unstructured":"WU Yue, Zhaobo Qi, Yiling Wu, Junshu Sun, Yaowei Wang, and Shuhui Wang. 2025. Learning Fine-Grained Representations through Textual Token Disentanglement in Composed Video Retrieval. In The International Conference on Learning Representations."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00317"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3073867"},{"key":"e_1_3_2_1_18_1","unstructured":"Jinhe Bi Danqi Yan Yifan Wang Wenke Huang Haokun Chen Guancheng Wan Mang Ye Xun Xiao Hinrich Schuetze Volker Tresp et al. 2025. CoT-Kinetics: A Theoretical Modeling Assessing LRM Reasoning Process. arXiv preprint arXiv:2505.13408 (2025)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680774"},{"key":"e_1_3_2_1_20_1","volume-title":"Visual Instruction Tuning with 500x Fewer Parameters through Modality Linear Representation-Steering. arXiv preprint arXiv:2412.12359","author":"Bi Jinhe","year":"2024","unstructured":"Jinhe Bi, Yujun Wang, Haokun Chen, Xun Xiao, Artur Hecker, Volker Tresp, and Yunpu Ma. 2024. Visual Instruction Tuning with 500x Fewer Parameters through Modality Linear Representation-Steering. arXiv preprint arXiv:2412.12359 (2024)."},{"key":"e_1_3_2_1_21_1","volume-title":"Cpl-slam: Centralized collaborative multi-robot visual-inertial slam using point-and-line features","author":"Liu Xin","year":"2025","unstructured":"Xin Liu, Shuhuan Wen, Huaping Liu, and F Richard Yu. 2025. Cpl-slam: Centralized collaborative multi-robot visual-inertial slam using point-and-line features. IEEE Internet of Things Journal (2025)."},{"key":"e_1_3_2_1_22_1","volume-title":"Hao Feng, Zhen Zhao, et al.","author":"Tang Jingqun","year":"2024","unstructured":"Jingqun Tang, Qi Liu, Yongjie Ye, Jinghui Lu, Shu Wei, Chunhui Lin, Wanqing Li, Mohamad Fitri Faiz Bin Mahmood, Hao Feng, Zhen Zhao, et al., 2024. MTVQA: Benchmarking Multilingual Text-Centric Visual Question Answering. arXiv preprint arXiv:2405.11985 (2024)."},{"key":"e_1_3_2_1_23_1","unstructured":"Hongfan Gao Wangmeng Shen Xiangfei Qiu Ronghui Xu Bin Yang and Jilin Hu. 2025. SSD-TS: Exploring the potential of linear state space models for diffusion models in time series imputation. In SIGKDD."},{"key":"e_1_3_2_1_24_1","unstructured":"Jingqun Tang Chunhui Lin Zhen Zhao Shu Wei Binghong Wu Qi Liu Hao Feng Yang Li Siqi Wang Lei Liao et al. 2024. TextSquare: Scaling up Text-Centric Visual Instruction Tuning. arXiv preprint arXiv:2404.12803 (2024)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3090521"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2923608"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210003"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240549"},{"key":"e_1_3_2_1_29_1","volume-title":"Hdnet: A hybrid domain network with multi-scale high-frequency information enhancement for infrared small target detection","author":"Xu Mingzhu","year":"2025","unstructured":"Mingzhu Xu, Chenglong Yu, Zexuan Li, Haoyu Tang, Yupeng Hu, and Liqiang Nie. 2025. Hdnet: A hybrid domain network with multi-scale high-frequency information enhancement for infrared small target detection. IEEE Transactions on Geoscience and Remote Sensing (2025)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19815-1_14"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2024.107096"},{"key":"e_1_3_2_1_32_1","volume-title":"CATCH: Channel-Aware multivariate Time Series Anomaly Detection via Frequency Patching. In ICLR.","author":"Wu Xingjian","year":"2025","unstructured":"Xingjian Wu, Xiangfei Qiu, Zhengyu Li, Yihang Wang, Jilin Hu, Chenjuan Guo, Hui Xiong, and Bin Yang. 2025. CATCH: Channel-Aware multivariate Time Series Anomaly Detection via Frequency Patching. In ICLR."},{"key":"e_1_3_2_1_33_1","volume-title":"Simultaneously detecting spatiotemporal changes with penalized Poisson regression models. arXiv preprint arXiv:2405.06613","author":"Zhang Zerui","year":"2024","unstructured":"Zerui Zhang, Xin Wang, Xin Zhang, and Jing Zhang. 2024. Simultaneously detecting spatiotemporal changes with penalized Poisson regression models. arXiv preprint arXiv:2405.06613 (2024)."},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of the European conference on computer vision (ECCV).","author":"Du Penghui","year":"2024","unstructured":"Penghui Du, Yu Wang, Yifan Sun, Luting Wang, Yue Liao, Gang Zhang, Errui Ding, Yan Wang, Jingdong Wang, and Si Liu. 2024. LaMI-DETR: Open-Vocabulary Detection with Language Model Instruction. In Proceedings of the European conference on computer vision (ECCV)."},{"key":"e_1_3_2_1_35_1","volume-title":"Semantic-Orthogonal Multi-modal Attention Network for RGB-D Salient Object Detection. The Visual Computer","author":"Xu Jiawei","year":"2025","unstructured":"Jiawei Xu, Qiangqiang Zhou, Jiacong Yu, Chen Liao, and Dandan Zhu. 2025. Semantic-Orthogonal Multi-modal Attention Network for RGB-D Salient Object Detection. The Visual Computer (2025), 1-13."},{"key":"e_1_3_2_1_36_1","volume-title":"Prism: Self-pruning intrinsic selection method for training-free multimodal data selection. arXiv preprint arXiv:2502.12119","author":"Bi Jinhe","year":"2025","unstructured":"Jinhe Bi, Yifan Wang, Danqi Yan, Xun Xiao, Artur Hecker, Volker Tresp, and Yunpu Ma. 2025. Prism: Self-pruning intrinsic selection method for training-free multimodal data selection. arXiv preprint arXiv:2502.12119 (2025)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00452"},{"key":"e_1_3_2_1_38_1","first-page":"1","article-title":"Semantic collaborative learning for cross-modal moment localization","volume":"42","author":"Hu Yupeng","year":"2023","unstructured":"Yupeng Hu, Kun Wang, Meng Liu, Haoyu Tang, and Liqiang Nie. 2023. Semantic collaborative learning for cross-modal moment localization. ACM Transactions on Information Systems, Vol. 42, 2 (2023), 1-26.","journal-title":"ACM Transactions on Information Systems"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00218"},{"key":"e_1_3_2_1_40_1","unstructured":"Daiheng Gao Shilin Lu Shaw Walters Wenbo Zhou Jiaming Chu Jie Zhang Bang Zhang Mengxi Jia Jian Zhao Zhaoxin Fan et al. 2024. EraseAnything: Enabling Concept Erasure in Rectified Flow Transformers. arXiv preprint arXiv:2412.20413 (2024)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00615"},{"key":"e_1_3_2_1_42_1","volume-title":"CRISP-SAM2: SAM2 with Cross-Modal Interaction and Semantic Prompting for Multi-Organ Segmentation. arXiv preprint arXiv:2506.23121","author":"Yu Xinlei","year":"2025","unstructured":"Xinlei Yu, Changmiao Wang, Hui Jin, Ahmed Elazab, Gangyong Jia, Xiang Wan, Changqing Zou, and Ruiquan Ge. 2025. CRISP-SAM2: SAM2 with Cross-Modal Interaction and Semantic Prompting for Multi-Organ Segmentation. arXiv preprint arXiv:2506.23121 (2025)."},{"key":"e_1_3_2_1_43_1","volume-title":"Robust watermarking using generative priors against image editing: From benchmarking to advances. arXiv preprint arXiv:2410.18775","author":"Lu Shilin","year":"2024","unstructured":"Shilin Lu, Zihan Zhou, Jiayou Lu, Yuanzhi Zhu, and Adams Wai-Kin Kong. 2024. Robust watermarking using generative priors against image editing: From benchmarking to advances. arXiv preprint arXiv:2410.18775 (2024)."},{"key":"e_1_3_2_1_44_1","volume-title":"Set you straight: Auto-steering denoising trajectories to sidestep unwanted concepts. arXiv preprint arXiv:2504.12782","author":"Li Leyang","year":"2025","unstructured":"Leyang Li, Shilin Lu, Yan Ren, and Adams Wai-Kin Kong. 2025. Set you straight: Auto-steering denoising trajectories to sidestep unwanted concepts. arXiv preprint arXiv:2504.12782 (2025)."},{"key":"e_1_3_2_1_45_1","first-page":"477","volume-title":"Applications and Worksharing: 14th EAI International Conference, CollaborateCom 2018, Shanghai, China, December 1-3, 2018, Proceedings 14","author":"Zhan Peng","year":"2019","unstructured":"Peng Zhan, Yupeng Hu, Wei Luo, Yang Xu, Qi Zhang, and Xueqing Li. 2019. Feature-based online segmentation algorithm for streaming time series (short paper). In Collaborative Computing: Networking, Applications and Worksharing: 14th EAI International Conference, CollaborateCom 2018, Shanghai, China, December 1-3, 2018, Proceedings 14. Springer, 477-487."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2024.110565"},{"key":"e_1_3_2_1_47_1","volume-title":"Generative text-guided 3d vision-language pretraining for unified medical image segmentation. arXiv preprint arXiv:2306.04811","author":"Chen Yinda","year":"2023","unstructured":"Yinda Chen, Che Liu, Wei Huang, Xiaoyu Liu, Sibo Cheng, Rossella Arcucci, and Zhiwei Xiong. 2023. Generative text-guided 3d vision-language pretraining for unified medical image segmentation. arXiv preprint arXiv:2306.04811 (2023)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681526"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28512"},{"key":"e_1_3_2_1_50_1","volume-title":"Xiaogang Jin, and Deng-Ping Fan.","author":"Qian Haotian","year":"2024","unstructured":"Haotian Qian, YD Chen, Shengtao Lou, Fahad Shahbaz Khan, Xiaogang Jin, and Deng-Ping Fan. 2024. MaskFactory: Towards High-quality Synthetic Data Generation for Dichotomous Image Segmentation. NeurIPS 24 (2024)."},{"key":"e_1_3_2_1_51_1","unstructured":"Zhenlong Yuan Jinguo Luo Fei Shen Zhaoxin Li Cong Liu Tianlu Mao and Zhaoqi Wang. 2024. DVP-MVS : Synergize Depth-Edge and Visibility Prior for Multi-View Stereo . showeprint2412.11578"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Yinda Chen Wei Huang Shenglong Zhou Qi Chen and Zhiwei Xiong. 2023. Self-supervised neuron segmentation with multi-agent reinforcement learning. In IJCAI.","DOI":"10.24963\/ijcai.2023\/68"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"crossref","unstructured":"Zhenlong Yuan Zhidong Yang Yujun Cai Kuangxin Wu Mufan Liu Dapeng Zhang Hao Jiang Zhaoxin Li and Zhaoqi Wang. 2025. SED-MVS : Segmentation-Driven and Edge-Aligned Deformation Multi-View Stereo with Depth Restoration and Occlusion Constraint . showeprint2503.13721","DOI":"10.1109\/TCSVT.2025.3574473"},{"key":"e_1_3_2_1_54_1","volume-title":"International conference on machine learning. PMLR, 12888-12900","author":"Li Junnan","year":"2022","unstructured":"Junnan Li, Dongxu Li, Caiming Xiong, and Steven Hoi. 2022. Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation. In International conference on machine learning. PMLR, 12888-12900."},{"key":"e_1_3_2_1_55_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730-19742."},{"key":"e_1_3_2_1_56_1","volume-title":"A clip-hitchhiker's guide to long video retrieval. arXiv preprint arXiv:2205.08508","author":"Bain Max","year":"2022","unstructured":"Max Bain, Arsha Nagrani, G\u00fcl Varol, and Andrew Zisserman. 2022. A clip-hitchhiker's guide to long video retrieval. arXiv preprint arXiv:2205.08508 (2022)."},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i5.32541"},{"key":"e_1_3_2_1_58_1","volume-title":"FineCIR: Explicit Parsing of Fine-Grained Modification Semantics for Composed Image Retrieval. https:\/\/arxiv.org\/abs\/2503.21309","author":"Li Zixu","year":"2025","unstructured":"Zixu Li, Zhiheng Fu, Yupeng Hu, Zhiwei Chen, Haokun Wen, and Liqiang Nie. 2025b. FineCIR: Explicit Parsing of Fine-Grained Modification Semantics for Composed Image Retrieval. https:\/\/arxiv.org\/abs\/2503.21309 (2025)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10890642"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10888153"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611817"},{"key":"e_1_3_2_1_62_1","volume-title":"OFFSET: Segmentation-based Focus Shift Revision for Composed Image Retrieval. arXiv preprint arXiv:2507.05631","author":"Chen Zhiwei","year":"2025","unstructured":"Zhiwei Chen, Yupeng Hu, Zixu Li, Zhiheng Fu, Xuemeng Song, and Liqiang Nie. 2025. OFFSET: Segmentation-based Focus Shift Revision for Composed Image Retrieval. arXiv preprint arXiv:2507.05631 (2025)."},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657831"},{"key":"e_1_3_2_1_64_1","volume-title":"TokenUnify: Scalable Autoregressive Visual Pre-training with Mixture Token Prediction. arXiv preprint arXiv:2405.16847","author":"Chen Yinda","year":"2024","unstructured":"Yinda Chen, Haoyuan Shi, Xiaoyu Liu, Te Shi, Ruobing Zhang, Dong Liu, Zhiwei Xiong, and Feng Wu. 2024. TokenUnify: Scalable Autoregressive Visual Pre-training with Mixture Token Prediction. arXiv preprint arXiv:2405.16847 (2024)."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413556"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMECH.2025.3556283"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-024-00872-x"},{"key":"e_1_3_2_1_68_1","volume-title":"Conditional Latent Coding with Learnable Synthesized Reference for Deep Image Compression. AAAI","author":"Wu Siqi","year":"2025","unstructured":"Siqi Wu, Yinda Chen, Dong Liu, and Zhihai He. 2025. Conditional Latent Coding with Learnable Synthesized Reference for Deep Image Compression. AAAI (2025)."},{"key":"e_1_3_2_1_69_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547787"},{"key":"e_1_3_2_1_70_1","unstructured":"Changshi Zhou Feng Luan Jiarui Hu Shaoqiang Meng Zhipeng Wang Yanchao Dong Yanmin Zhou and Bin He. 2025. Learning Efficient Robotic Garment Manipulation with Standardization. arXiv:2506.22769 [[cs.RO](http:\/\/cs.ro\/)] https:\/\/arxiv.org\/abs\/2506.22769"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"crossref","unstructured":"Zhe Li Xiangfei Qiu Peng Chen Yihang Wang Hanyin Cheng Yang Shu Jilin Hu Chenjuan Guo Aoying Zhou Qingsong Wen et al. 2025. TSFM-Bench: A Comprehensive and Unified Benchmark of Foundation Models for Time Series Forecasting. In SIGKDD.","DOI":"10.1145\/3711896.3737442"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-024-02205-5"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449986"},{"key":"e_1_3_2_1_74_1","volume-title":"Modeling uncertainty with hedged instance embedding. arXiv preprint arXiv:1810.00319","author":"Oh Seong Joon","year":"2018","unstructured":"Seong Joon Oh, Kevin Murphy, Jiyan Pan, Joseph Roth, Florian Schroff, and Andrew Gallagher. 2018. Modeling uncertainty with hedged instance embedding. arXiv preprint arXiv:1810.00319 (2018)."},{"key":"e_1_3_2_1_75_1","first-page":"2888","article-title":"Bounding box regression with uncertainty for accurate object detection","author":"He Yihui","year":"2019","unstructured":"Yihui He, Chenchen Zhu, Jianren Wang, Marios Savvides, and Xiangyu Zhang. 2019. Bounding box regression with uncertainty for accurate object detection. In Proceedings of CVPR. 2888-2897.","journal-title":"Proceedings of CVPR."},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00831"},{"key":"e_1_3_2_1_77_1","volume-title":"Proceedings of the International Conference on Learning Representations","author":"Chen Yiyang","year":"2024","unstructured":"Yiyang Chen, Zhedong Zheng, Wei Ji, Leigang Qu, and Tat-Seng Chua. 2024. Composed image retrieval with text feedback via multi-grained uncertainty regularization. Proceedings of the International Conference on Learning Representations (2024)."},{"key":"e_1_3_2_1_78_1","volume-title":"UATVR: Uncertainty-Adaptive Text-Video Retrieval. In IEEE\/CVF International Conference on Computer Vision (ICCV). 13677-13687","author":"Fang Bo","year":"2023","unstructured":"Bo Fang, Wenhao Wu, Chang Liu, Yu Zhou, Yuxin Song, Weiping Wang, Xiangbo Shu, Xiangyang Ji, and Jingdong Wang. 2023. UATVR: Uncertainty-Adaptive Text-Video Retrieval. In IEEE\/CVF International Conference on Computer Vision (ICCV). 13677-13687."},{"key":"e_1_3_2_1_79_1","volume-title":"Auto-Encoding Variational Bayes. arXiv preprint arXiv:1312.6114","author":"Kingma Diederik P","year":"2013","unstructured":"Diederik P Kingma and Max Welling. 2013. Auto-Encoding Variational Bayes. arXiv preprint arXiv:1312.6114 (2013)."},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00660"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01115"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00213"},{"key":"e_1_3_2_1_83_1","volume-title":"International conference on machine learning. PMLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748-8763."},{"key":"e_1_3_2_1_84_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681493"},{"key":"e_1_3_2_1_85_1","volume-title":"Chun-Mei Feng, et al.","author":"Xu Xinxing","year":"2024","unstructured":"Xinxing Xu, Yong Liu, Salman Khan, Fahad Khan, Wangmeng Zuo, Rick Siow Mong Goh, Chun-Mei Feng, et al., 2024. Sentence-level Prompts Benefit Composed Image Retrieval. In ICLR."},{"key":"e_1_3_2_1_86_1","volume-title":"Self-Training Boosted Multi-Factor Matching Network for Composed Image Retrieval","author":"Wen Haokun","year":"2023","unstructured":"Haokun Wen, Xuemeng Song, Jianhua Yin, Jianlong Wu, Weili Guan, and Liqiang Nie. 2023. Self-Training Boosted Multi-Factor Matching Network for Composed Image Retrieval. IEEE TPAMI (2023)."},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1145\/3699715"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755445","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:28:19Z","timestamp":1765308499000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755445"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":87,"alternative-id":["10.1145\/3746027.3755445","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755445","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}