{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:20:47Z","timestamp":1765308047393,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":68,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172300, 62372326, 32470441"],"award-info":[{"award-number":["62172300, 62372326, 32470441"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755483","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:47:42Z","timestamp":1761371262000},"page":"4504-4513","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Fine-grained Zero-Shot Object Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-8623-0976","authenticated-orcid":false,"given":"Hongxu","family":"Ma","sequence":"first","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6773-598X","authenticated-orcid":false,"given":"Chenbo","family":"Zhang","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9532-5219","authenticated-orcid":false,"given":"Lu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1701-1489","authenticated-orcid":false,"given":"Jiaogen","family":"Zhou","sequence":"additional","affiliation":[{"name":"Huaiyin Normal Univeristy, Huaian, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2313-7635","authenticated-orcid":false,"given":"Jihong","family":"Guan","sequence":"additional","affiliation":[{"name":"Tongji University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1949-2768","authenticated-orcid":false,"given":"Shuigeng","family":"Zhou","sequence":"additional","affiliation":[{"name":"Fudan University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"crossref","unstructured":"Ankan Bansal Karan Sikka Gaurav Sharma Rama Chellappa and Ajay Divakaran. 2018. Zero-shot object detection.","DOI":"10.1007\/978-3-030-01246-5_24"},{"volume-title":"Food-101-mining discriminative components with random forests","author":"Bossard Lukas","key":"e_1_3_2_1_2_1","unstructured":"Lukas Bossard, Matthieu Guillaumin, and Luc Van Gool. 2014. Food-101-mining discriminative components with random forests. Springer."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Dongliang Chang Kaiyue Pang Yixiao Zheng Zhanyu Ma Yi-Zhe Song and Jun Guo. 2021. Your'' Flamingo'' is my'' Bird'': fine-grained or not.","DOI":"10.1109\/CVPR46437.2021.01131"},{"key":"e_1_3_2_1_4_1","volume-title":"Hsva: Hierarchical semantic-visual adaptation for zero-shot learning.","author":"Chen Shiming","year":"2021","unstructured":"Shiming Chen, Guosen Xie, Yang Liu, Qinmu Peng, Baigui Sun, Hao Li, Xinge You, and Ling Shao. 2021. Hsva: Hierarchical semantic-visual adaptation for zero-shot learning. Vol. 34 (2021)."},{"key":"e_1_3_2_1_5_1","unstructured":"Adam Coates Andrew Ng and Honglak Lee. 2011. An analysis of single-layer networks in unsupervised feature learning."},{"key":"e_1_3_2_1_6_1","volume-title":"Ramazan Gokberk Cinbis, and Nazli Ikizler-Cinbis","author":"Demirel Berkan","year":"2018","unstructured":"Berkan Demirel, Ramazan Gokberk Cinbis, and Nazli Ikizler-Cinbis. 2018. Zero-shot object detection by hybrid region embedding. arXiv preprint arXiv:1805.06157 (2018)."},{"key":"e_1_3_2_1_7_1","volume-title":"Maximum-entropy fine grained classification","author":"Dubey Abhimanyu","year":"2018","unstructured":"Abhimanyu Dubey, Otkrist Gupta, Ramesh Raskar, and Nikhil Naik. 2018. Maximum-entropy fine grained classification. Vol. 31 (2018)."},{"key":"e_1_3_2_1_8_1","volume-title":"Christopher KI Williams, John Winn, and Andrew Zisserman.","author":"Everingham Mark","year":"2010","unstructured":"Mark Everingham, Luc Van Gool, Christopher KI Williams, John Winn, and Andrew Zisserman. 2010. The pascal visual object classes (voc) challenge. Vol. 88 (2010)."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Qi Fan Wei Zhuo Chi-Keung Tang and Yu-Wing Tai. 2020. Few-shot object detection with attention-RPN and multi-relation detector.","DOI":"10.1109\/CVPR42600.2020.00407"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Ross Girshick. 2015. Fast r-cnn.","DOI":"10.1109\/ICCV.2015.169"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Dikshant Gupta Aditya Anantharaman Nehal Mamgain Vineeth N Balasubramanian CV Jawahar et al. 2020. A multi-space approach to zero-shot object detection.","DOI":"10.1109\/WACV45572.2020.9093384"},{"key":"e_1_3_2_1_12_1","unstructured":"Guangxing Han Jiawei Ma Shiyuan Huang Long Chen and Shih-Fu Chang. 2022. Few-shot object detection with fully cross-transformer."},{"key":"e_1_3_2_1_13_1","volume-title":"Syed Waqas Zamir, and Fahad Shahbaz Khan","author":"Hayat Nasir","year":"2020","unstructured":"Nasir Hayat, Munawar Hayat, Shafin Rahman, Salman Khan, Syed Waqas Zamir, and Fahad Shahbaz Khan. 2020. Synthesizing the unseen for zero-shot object detection."},{"key":"e_1_3_2_1_14_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition."},{"key":"e_1_3_2_1_15_1","volume-title":"Fast fine-grained image classification via weakly supervised discriminative localization","author":"He Xiangteng","year":"2018","unstructured":"Xiangteng He, Yuxin Peng, and Junjie Zhao. 2018. Fast fine-grained image classification via weakly supervised discriminative localization. Vol. 29, 5 (2018)."},{"key":"e_1_3_2_1_16_1","volume-title":"Which and how many regions to gaze: Focus discriminative regions for fine-grained visual categorization","author":"He Xiangteng","year":"2019","unstructured":"Xiangteng He, Yuxin Peng, and Junjie Zhao. 2019. Which and how many regions to gaze: Focus discriminative regions for fine-grained visual categorization. Vol. 127 (2019)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","unstructured":"Peiliang Huang Junwei Han De Cheng and Dingwen Zhang. 2022. Robust region feature synthesizer for zero-shot object detection.","DOI":"10.1109\/CVPR52688.2022.00747"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"crossref","unstructured":"Peiliang Huang Dingwen Zhang De Cheng Longfei Han Pengfei Zhu and Junwei Han. 2024. M-RRFS: A Memory-Based Robust Region Feature Synthesizer for Zero-Shot Object Detection. (2024).","DOI":"10.1007\/s11263-024-02112-9"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Dat Huynh and Ehsan Elhamifar. 2020. Fine-grained generalized zero-shot learning via dense attribute-based attention.","DOI":"10.1109\/CVPR42600.2020.00454"},{"key":"e_1_3_2_1_20_1","volume-title":"Zhongfei Mark Zhang, et al","author":"Ji Zhong","year":"2018","unstructured":"Zhong Ji, Yanwei Fu, Jichang Guo, Yanwei Pang, Zhongfei Mark Zhang, et al., 2018. Stacked semantics-guided attention model for fine-grained zero-shot learning. Vol. 31 (2018)."},{"key":"e_1_3_2_1_21_1","volume-title":"Multimodal LLMs Can Reason about Aesthetics in Zero-Shot. arXiv preprint arXiv:2501.09012","author":"Jiang Ruixiang","year":"2025","unstructured":"Ruixiang Jiang and Changwen Chen. 2025. Multimodal LLMs Can Reason about Aesthetics in Zero-Shot. arXiv preprint arXiv:2501.09012 (2025)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2013.77"},{"volume-title":"Zero-shot object detection based on dynamic semantic vectors","author":"Li Haoyu","key":"e_1_3_2_1_23_1","unstructured":"Haoyu Li, Jilin Mei, Jiancong Zhou, and Yu Hu. 2023. Zero-shot object detection based on dynamic semantic vectors. IEEE."},{"key":"e_1_3_2_1_24_1","volume-title":"From Visuals to Vocabulary: Establishing Equivalence Between Image and Text Token Through Autoregressive Pre-training in MLLMs. arXiv preprint arXiv:2502.09093","author":"Li Mingxiao","year":"2025","unstructured":"Mingxiao Li, Fang Qu, Zhanpeng Chen, Na Su, Zhizhou Zhong, Ziyang Chen, Nan Du, and Xiaolong Li. 2025a. From Visuals to Vocabulary: Establishing Equivalence Between Image and Text Token Through Autoregressive Pre-training in MLLMs. arXiv preprint arXiv:2502.09093 (2025)."},{"key":"e_1_3_2_1_25_1","volume-title":"VISTA: Enhancing Vision-Text Alignment in MLLMs via Cross-Modal Mutual Information Maximization. arXiv preprint arXiv:2505.10917","author":"Li Mingxiao","year":"2025","unstructured":"Mingxiao Li, Na Su, Fang Qu, Zhizhou Zhong, Ziyang Chen, Yuan Li, Zhaopeng Tu, and Xiaolong Li. 2025b. VISTA: Enhancing Vision-Text Alignment in MLLMs via Cross-Modal Mutual Information Maximization. arXiv preprint arXiv:2505.10917 (2025)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00614"},{"key":"e_1_3_2_1_28_1","volume-title":"Generative Regression Based Watch Time Prediction for Short-Video Recommendation. arXiv preprint arXiv:2412.20211","author":"Ma Hongxu","year":"2025","unstructured":"Hongxu Ma, Kai Tian, Tao Zhang, Xuefeng Zhang, Han Zhou, Chunjie Chen, Han Li, Jihong Guan, and Shuigeng Zhou. 2025. Generative Regression Based Watch Time Prediction for Short-Video Recommendation. arXiv preprint arXiv:2412.20211 (2025)."},{"key":"e_1_3_2_1_29_1","volume-title":"Fine-grained visual classification of aircraft. arXiv preprint arXiv:1306.5151","author":"Maji Subhransu","year":"2013","unstructured":"Subhransu Maji, Esa Rahtu, Juho Kannala, Matthew Blaschko, and Andrea Vedaldi. 2013. Fine-grained visual classification of aircraft. arXiv preprint arXiv:1306.5151 (2013)."},{"key":"e_1_3_2_1_30_1","article-title":"Zero-shot object detection with attributes-based category similarity","volume":"67","author":"Mao Qiaomei","year":"2020","unstructured":"Qiaomei Mao, Chong Wang, Shenghao Yu, Ye Zheng, and Yuqi Li. 2020. Zero-shot object detection with attributes-based category similarity. IEEE Transactions on Circuits and Systems II: Express Briefs, Vol. 67, 5 (2020).","journal-title":"IEEE Transactions on Circuits and Systems II: Express Briefs"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01975"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICVGIP.2008.47"},{"volume-title":"Zero-shot object detection: Learning to simultaneously recognize and localize novel concepts","author":"Rahman Shafin","key":"e_1_3_2_1_33_1","unstructured":"Shafin Rahman, Salman Khan, and Fatih Porikli. 2018. Zero-shot object detection: Learning to simultaneously recognize and localize novel concepts. Springer."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.5555\/AAI28102948"},{"key":"e_1_3_2_1_35_1","unstructured":"Aditya Ramesh Mikhail Pavlov Gabriel Goh Scott Gray Chelsea Voss Alec Radford Mark Chen and Ilya Sutskever. 2021. Zero-shot text-to-image generation. PMLR."},{"key":"e_1_3_2_1_36_1","unstructured":"Scott Reed Zeynep Akata Xinchen Yan Lajanugen Logeswaran Bernt Schiele and Honglak Lee. 2016. Generative adversarial text to image synthesis. PMLR."},{"key":"e_1_3_2_1_37_1","volume-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","author":"Ren Shaoqing","year":"2015","unstructured":"Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. Vol. 28 (2015)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"crossref","unstructured":"Robin Rombach Andreas Blattmann Dominik Lorenz Patrick Esser and Bj\u00f6rn Ommer. 2022. High-resolution image synthesis with latent diffusion models.","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Olga Russakovsky Jia Deng Hao Su Jonathan Krause Sanjeev Satheesh Sean Ma Zhiheng Huang Andrej Karpathy Aditya Khosla Michael Bernstein et al. 2015. Imagenet large scale visual recognition challenge. Vol. 115 (2015).","DOI":"10.1007\/s11263-015-0816-y"},{"key":"e_1_3_2_1_40_1","volume-title":"Resolving Semantic Confusions for Improved Zero-Shot Detection. arXiv preprint arXiv:2212.06097","author":"Sarma Sandipan","year":"2022","unstructured":"Sandipan Sarma, Sushil Kumar, and Arijit Sur. 2022. Resolving Semantic Confusions for Improved Zero-Shot Detection. arXiv preprint arXiv:2212.06097 (2022)."},{"volume-title":"Fine-grained recognition: Accounting for subtle differences between similar classes","author":"Sun Guolei","key":"e_1_3_2_1_41_1","unstructured":"Guolei Sun, Hisham Cholakkal, Salman Khan, Fahad Khan, and Ling Shao. 2020. Fine-grained recognition: Accounting for subtle differences between similar classes, Vol. 34."},{"key":"e_1_3_2_1_42_1","volume-title":"Df-gan: A simple and effective baseline for text-to-image synthesis.","author":"Tao Ming","year":"2022","unstructured":"Ming Tao, Hao Tang, Fei Wu, Xiao-Yuan Jing, Bing-Kun Bao, and Changsheng Xu. 2022. Df-gan: A simple and effective baseline for text-to-image synthesis."},{"key":"e_1_3_2_1_43_1","unstructured":"Catherine Wah Steve Branson Peter Welinder Pietro Perona and Serge Belongie. 2011. The caltech-ucsd birds-200-2011 dataset. (2011)."},{"key":"e_1_3_2_1_44_1","volume-title":"SAUI: Scale-Aware Unseen Imagineer for Zero-Shot Object Detection","author":"Wang Jiahao","year":"2024","unstructured":"Jiahao Wang, Caixia Yan, Weizhan Zhang, Huan Liu, Hao Sun, and Qinghua Zheng. 2024. SAUI: Scale-Aware Unseen Imagineer for Zero-Shot Object Detection, Vol. 38."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"crossref","unstructured":"Zhihui Wang Shijie Wang Shuhui Yang Haojie Li Jianjun Li and Zezhou Li. 2020. Weakly supervised fine-grained image classification via guassian mixture model oriented discriminative learning.","DOI":"10.1109\/CVPR42600.2020.00977"},{"key":"e_1_3_2_1_46_1","volume-title":"Attngan: Fine-grained text to image generation with attentional generative adversarial networks.","author":"Xu Tao","year":"2018","unstructured":"Tao Xu, Pengchuan Zhang, Qiuyuan Huang, Han Zhang, Zhe Gan, Xiaolei Huang, and Xiaodong He. 2018. Attngan: Fine-grained text to image generation with attentional generative adversarial networks."},{"key":"e_1_3_2_1_47_1","volume-title":"2nd Workshop on Advancing Neural Network Training: Computational Efficiency, Scalability, and Resource Optimization (WANT@ ICML","author":"Xu Zunnan","year":"2024","unstructured":"Zunnan Xu, Jiaqi Huang, Ting Liu, Yong Liu, Haonan Han, Kehong Yuan, and Xiu Li. 2024. Enhancing fine-grained multi-modal alignment via adapters: a parameter-efficient training framework for referring image segmentation. In 2nd Workshop on Advancing Neural Network Training: Computational Efficiency, Scalability, and Resource Optimization (WANT@ ICML 2024)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01483"},{"key":"e_1_3_2_1_49_1","unstructured":"Caixia Yan Xiaojun Chang Minnan Luo Huan Liu Xiaoqin Zhang and Qinghua Zheng. 2022. Semantics-guided contrastive network for zero-shot object detection. (2022)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3011807"},{"key":"e_1_3_2_1_51_1","volume-title":"Ap-10k: A benchmark for animal pose estimation in the wild. arXiv preprint arXiv:2108.12617","author":"Yu Hang","year":"2021","unstructured":"Hang Yu, Yufei Xu, Jing Zhang, Wei Zhao, Ziyu Guan, and Dacheng Tao. 2021. Ap-10k: A benchmark for animal pose estimation in the wild. arXiv preprint arXiv:2108.12617 (2021)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i7.28528"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681262"},{"key":"e_1_3_2_1_54_1","volume-title":"Stackgan: Text to photo-realistic image synthesis with stacked generative adversarial networks.","author":"Zhang Han","year":"2017","unstructured":"Han Zhang, Tao Xu, Hongsheng Li, Shaoting Zhang, Xiaogang Wang, Xiaolei Huang, and Dimitris N Metaxas. 2017. Stackgan: Text to photo-realistic image synthesis with stacked generative adversarial networks."},{"key":"e_1_3_2_1_55_1","volume-title":"Stackgan: Realistic image synthesis with stacked generative adversarial networks.","author":"Zhang Han","year":"2018","unstructured":"Han Zhang, Tao Xu, Hongsheng Li, Shaoting Zhang, Xiaogang Wang, Xiaolei Huang, and Dimitris N Metaxas. 2018. Stackgan: Realistic image synthesis with stacked generative adversarial networks. Vol. 41, 8 (2018)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Lu Zhang Yang Wang Jiaogen Zhou Chenbo Zhang Yinglu Zhang Jihong Guan Yatao Bian and Shuigeng Zhou. 2022. Hierarchical Few-Shot Object Detection: Problem Benchmark and Method.","DOI":"10.1145\/3503161.3548412"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00630"},{"key":"e_1_3_2_1_58_1","volume-title":"Hierarchical prototype learning for zero-shot recognition","author":"Zhang Xingxing","year":"2019","unstructured":"Xingxing Zhang, Shupeng Gui, Zhenfeng Zhu, Yao Zhao, and Ji Liu. 2019. Hierarchical prototype learning for zero-shot recognition. Vol. 22, 7 (2019)."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446683"},{"key":"e_1_3_2_1_60_1","volume-title":"Musetalk: Real-time high quality lip synchronization with latent space inpainting. arXiv preprint arXiv:2410.10122","author":"Zhang Yue","year":"2024","unstructured":"Yue Zhang, Zhizhou Zhong, Minhao Liu, Zhaokang Chen, Bin Wu, Yubin Zeng, Chao Zhan, Junxin Huang, Yingjie He, and Wenjiang Zhou. 2024d. Musetalk: Real-time high quality lip synchronization with latent space inpainting. arXiv preprint arXiv:2410.10122 (2024)."},{"key":"e_1_3_2_1_61_1","volume-title":"Gtnet: Generative transfer network for zero-shot object detection","author":"Zhao Shizhen","year":"2020","unstructured":"Shizhen Zhao, Changxin Gao, Yuanjie Shao, Lerenhan Li, Changqian Yu, Zhong Ji, and Nong Sang. 2020. Gtnet: Generative transfer network for zero-shot object detection, Vol. 34."},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"crossref","unstructured":"Heliang Zheng Jianlong Fu Tao Mei and Jiebo Luo. 2017. Learning multi-attention convolutional neural network for fine-grained image recognition.","DOI":"10.1109\/ICCV.2017.557"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"crossref","unstructured":"Ye Zheng Ruoran Huang Chuanqi Han Xi Huang and Li Cui. 2020. Background learnable cascade for zero-shot object detection.","DOI":"10.1007\/978-3-030-69535-4_7"},{"key":"e_1_3_2_1_64_1","doi-asserted-by":"crossref","unstructured":"Ye Zheng Jiahong Wu Yongqiang Qin Faen Zhang and Li Cui. 2021. Zero-shot instance segmentation.","DOI":"10.1109\/CVPR46437.2021.00262"},{"key":"e_1_3_2_1_65_1","volume-title":"SeeDS: Semantic Separable Diffusion Synthesizer for Zero-shot Food Detection. arXiv preprint arXiv:2310.04689","author":"Zhou Pengfei","year":"2023","unstructured":"Pengfei Zhou, Weiqing Min, Yang Zhang, Jiajun Song, Ying Jin, and Shuqiang Jiang. 2023. SeeDS: Semantic Separable Diffusion Synthesizer for Zero-shot Food Detection. arXiv preprint arXiv:2310.04689 (2023)."},{"key":"e_1_3_2_1_66_1","volume-title":"Dm-gan: Dynamic memory generative adversarial networks for text-to-image synthesis.","author":"Zhu Minfeng","year":"2019","unstructured":"Minfeng Zhu, Pingbo Pan, Wei Chen, and Yi Yang. 2019. Dm-gan: Dynamic memory generative adversarial networks for text-to-image synthesis."},{"key":"e_1_3_2_1_67_1","unstructured":"Pengkai Zhu Hanxiao Wang and Venkatesh Saligrama. 2020. Don't even look once: Synthesizing features for zero-shot detection."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240616"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755483","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:17:33Z","timestamp":1765307853000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755483"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":68,"alternative-id":["10.1145\/3746027.3755483","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755483","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}