{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:09:55Z","timestamp":1765008595140,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,12,9]]},"DOI":"10.1145\/3743093.3770959","type":"proceedings-article","created":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:06:16Z","timestamp":1765008376000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Learn Concepts from Multi-Scale Visual Information for Compositional Zero-Shot Learning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-3612-9767","authenticated-orcid":false,"given":"Guanyu","family":"Wang","sequence":"first","affiliation":[{"name":"School of Software and Microelectronics, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4934-1001","authenticated-orcid":false,"given":"Zhijie","family":"Tan","sequence":"additional","affiliation":[{"name":"School of Software and Microelectronics, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3330-0037","authenticated-orcid":false,"given":"Xu","family":"Chu","sequence":"additional","affiliation":[{"name":"School of Software and Microelectronics, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0343-5265","authenticated-orcid":false,"given":"Xinrong","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Software and Microelectronics, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3564-4610","authenticated-orcid":false,"given":"Tong","family":"Mo","sequence":"additional","affiliation":[{"name":"School of Software and Microelectronics, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2958-3097","authenticated-orcid":false,"given":"Weiping","family":"Li","sequence":"additional","affiliation":[{"name":"School of Software and Microelectronics, Peking University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,12,6]]},"reference":[{"key":"e_1_3_3_2_2_2","first-page":"1462","volume-title":"Advances in Neural Information Processing Systems","author":"Atzmon Yuval","year":"2020","unstructured":"Yuval Atzmon, Felix Kreuk, Uri Shalit, and Gal Chechik. 2020. A causal view of compositional zero-shot recognition. In Advances in Neural Information Processing Systems , Vol.\u00a033. 1462\u20131473."},{"key":"e_1_3_3_2_3_2","volume-title":"International Conference on Learning Representations","author":"Dosovitskiy Alexey","year":"2021","unstructured":"Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, and Neil Houlsby. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale. In International Conference on Learning Representations."},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01470"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.28000"},{"key":"e_1_3_3_2_7_2","doi-asserted-by":"crossref","unstructured":"Robert\u00a0A Jacobs Michael\u00a0I Jordan Steven\u00a0J Nowlan and Geoffrey\u00a0E Hinton. 1991. Adaptive mixtures of local experts. Neural computation 3 1 (1991) 79\u201387.","DOI":"10.1162\/neco.1991.3.1.79"},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i3.28043"},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00342"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00911"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01612"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01133"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"crossref","unstructured":"Yu Liu Jianghao Li Yanyi Zhang Qi Jia Weimin Wang Nan Pu and Nicu Sebe. 2024. PMGNet: Disentanglement and entanglement benefit mutually for compositional zero-shot learning. Computer Vision and Image Understanding 249 (2024) 104197.","DOI":"10.1016\/j.cviu.2024.104197"},{"key":"e_1_3_3_2_14_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00518"},{"key":"e_1_3_3_2_15_2","doi-asserted-by":"crossref","unstructured":"Massimiliano Mancini Muhammad\u00a0Ferjad Naeem Yongqin Xian and Zeynep Akata. 2022. Learning graph embeddings for open world compositional zero-shot learning. IEEE Transactions on pattern analysis and machine intelligence 46 3 (2022) 1545\u20131560.","DOI":"10.1109\/TPAMI.2022.3163667"},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/9780262514620.001.0001"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.129"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00101"},{"key":"e_1_3_3_2_19_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_11"},{"key":"e_1_3_3_2_20_2","unstructured":"Nihal\u00a0V Nayak Peilin Yu and Stephen\u00a0H Bach. 2022. Learning to compose soft prompts for compositional zero-shot learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2204.03574 (2022)."},{"key":"e_1_3_3_2_21_2","volume-title":"Advances in Neural Information Processing Systems","author":"Palatucci Mark","year":"2009","unstructured":"Mark Palatucci, Dean Pomerleau, Geoffrey\u00a0E Hinton, and Tom\u00a0M Mitchell. 2009. Zero-shot Learning with Semantic Output Codes. In Advances in Neural Information Processing Systems , Y.\u00a0Bengio, D.\u00a0Schuurmans, J.\u00a0Lafferty, C.\u00a0Williams, and A.\u00a0Culotta (Eds.), Vol.\u00a022. Curran Associates, Inc."},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"Aditya Panda and Dipti\u00a0Prasad Mukherjee. 2024. Compositional zero-shot learning using multi-branch graph convolution and cross-layer knowledge sharing. Pattern Recognition 145 (2024) 109916.","DOI":"10.1016\/j.patcog.2023.109916"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00369"},{"key":"e_1_3_3_2_24_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748\u20138763."},{"key":"e_1_3_3_2_25_2","first-page":"12116","volume-title":"Advances in Neural Information Processing Systems","author":"Raghu Maithra","year":"2021","unstructured":"Maithra Raghu, Thomas Unterthiner, Simon Kornblith, Chiyuan Zhang, and Alexey Dosovitskiy. 2021. Do Vision Transformers See Like Convolutional Neural Networks?. In Advances in Neural Information Processing Systems , M.\u00a0Ranzato, A.\u00a0Beygelzimer, Y.\u00a0Dauphin, P.S. Liang, and J.\u00a0Wortman Vaughan (Eds.), Vol.\u00a034. Curran Associates, Inc., 12116\u201312128."},{"key":"e_1_3_3_2_26_2","first-page":"2152","volume-title":"Proceedings of the 32nd International Conference on Machine Learning","author":"Romera-Paredes Bernardino","year":"2015","unstructured":"Bernardino Romera-Paredes and Philip Torr. 2015. An embarrassingly simple approach to zero-shot learning. In Proceedings of the 32nd International Conference on Machine Learning. 2152\u20132161."},{"key":"e_1_3_3_2_27_2","first-page":"10641","volume-title":"Advances in Neural Information Processing Systems","author":"Ruis Frank","year":"2021","unstructured":"Frank Ruis, Gertjan Burghouts, and Doina Bucur. 2021. Independent Prototype Propagation for Zero-Shot Compositionality. In Advances in Neural Information Processing Systems. 10641\u201310653."},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01329"},{"key":"e_1_3_3_2_29_2","volume-title":"Advances in Neural Information Processing Systems","author":"Socher Richard","year":"2013","unstructured":"Richard Socher, Milind Ganjoo, Christopher\u00a0D Manning, and Andrew Ng. 2013. Zero-Shot Learning Through Cross-Modal Transfer. In Advances in Neural Information Processing Systems , C.J. Burges, L.\u00a0Bottou, M.\u00a0Welling, Z.\u00a0Ghahramani, and K.Q. Weinberger (Eds.), Vol.\u00a026. Curran Associates, Inc."},{"key":"e_1_3_3_2_30_2","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan\u00a0N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00581"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"crossref","unstructured":"Yongqin Xian Bernt Schiele and Zeynep Akata. 2017. Zero-Shot Learning \u2014 The Good the Bad and the Ugly. 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017) 3077\u20133086.","DOI":"10.1109\/CVPR.2017.328"},{"key":"e_1_3_3_2_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00567"},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.32"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"crossref","unstructured":"Tian Zhang Kongming Liang Ruoyi Du Wei Chen and Zhanyu Ma. 2024. Disentangling Before Composing: Learning Invariant Disentangled Features for Compositional Zero-Shot Learning. IEEE Transactions on Pattern Analysis and Machine Intelligence (2024).","DOI":"10.1109\/TPAMI.2024.3487222"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20053-3_20"}],"event":{"name":"MMAsia '25: ACM Multimedia Asia","location":"Kuala Lumpur Malaysia","acronym":"MMAsia '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 7th ACM International Conference on Multimedia in Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3743093.3770959","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T08:07:30Z","timestamp":1765008450000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3743093.3770959"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":35,"alternative-id":["10.1145\/3743093.3770959","10.1145\/3743093"],"URL":"https:\/\/doi.org\/10.1145\/3743093.3770959","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]},"assertion":[{"value":"2025-12-06","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}