{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T16:35:14Z","timestamp":1778258114820,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["72071116"],"award-info":[{"award-number":["72071116"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Ningbo Municipal Bureau of Science and Technology","award":["2022Z173, 2022Z217, 2023Z138, 2023Z237, 2024Z110"],"award-info":[{"award-number":["2022Z173, 2022Z217, 2023Z138, 2023Z237, 2024Z110"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681246","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"4841-4850","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Hierarchical Perceptual and Predictive Analogy-Inference Network for Abstract Visual Reasoning"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6319-1639","authenticated-orcid":false,"given":"Wentao","family":"He","sequence":"first","affiliation":[{"name":"University of Nottingham Ningbo China, Ningbo, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4619-6590","authenticated-orcid":false,"given":"Jianfeng","family":"Ren","sequence":"additional","affiliation":[{"name":"University of Nottingham Ningbo China, Ningbo, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1722-568X","authenticated-orcid":false,"given":"Ruibin","family":"Bai","sequence":"additional","affiliation":[{"name":"University of Nottingham Ningbo China, Ningbo, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9104-2315","authenticated-orcid":false,"given":"Xudong","family":"Jiang","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the International Conference on Machine Learning","volume":"80","author":"Barrett David G","year":"2018","unstructured":"David G Barrett, Felix Hill, Adam Santoro, Ari Morcos, and Timothy Lillicrap. 2018. Measuring Abstract Reasoning in Neural Networks. In Proceedings of the International Conference on Machine Learning, Vol. 80. 511--520."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01237"},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the International Conference on Learning Representations.","author":"Chen Chun-Fu","year":"2021","unstructured":"Chun-Fu Chen, Rameswar Panda, and Quanfu Fan. 2021. RegionViT: Regional-to-Local Attention for Vision Transformers. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01043"},{"key":"e_1_3_2_1_5_1","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Dai Wang-Zhou","year":"2019","unstructured":"Wang-Zhou Dai, Qiuling Xu, Yang Yu, and Zhi-Hua Zhou. 2019. Bridging Machine Learning and Logical Reasoning by Abductive Learning. In Advances in Neural Information Processing Systems, Vol. 32."},{"key":"e_1_3_2_1_6_1","volume-title":"International Handbook of Thinking and Reasoning","author":"Gentner Dedre","unstructured":"Dedre Gentner and Francisco Maravilla. 2017. Analogical Reasoning. In International Handbook of Thinking and Reasoning. Routledge, 186--203."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00371-023-02930-x"},{"key":"e_1_3_2_1_9_1","volume-title":"Two-stage Rule-induction Visual Reasoning on RPMs with an Application to Video Prediction. arXiv preprint arXiv:2111.12301","author":"He Wentao","year":"2021","unstructured":"Wentao He, Jianfeng Ren, Ruibin Bai, and Xudong Jiang. 2021. Two-stage Rule-induction Visual Reasoning on RPMs with an Application to Video Prediction. arXiv preprint arXiv:2111.12301 (2021)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25072"},{"key":"e_1_3_2_1_11_1","volume-title":"Proceedings of the International Conference on Learning Representations.","author":"Hill Felix","year":"2018","unstructured":"Felix Hill, Adam Santoro, David Barrett, Ari Morcos, and Timothy Lillicrap. 2018. Learning to Make Analogies by Contrasting Abstract Relational Structure. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i6.16629"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00888"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00147"},{"key":"e_1_3_2_1_15_1","volume-title":"Squeeze-and-Excitation Networks. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 7132--7141","author":"Hu Jie","year":"2018","unstructured":"Jie Hu, Li Shen, and Gang Sun. 2018. Squeeze-and-Excitation Networks. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 7132--7141."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16248"},{"key":"e_1_3_2_1_17_1","first-page":"72096","article-title":"Language is not All You Need: Aligning Perception with Language Models","volume":"36","author":"Huang Shaohan","year":"2024","unstructured":"Shaohan Huang, Li Dong, Wenhui Wang, Yaru Hao, Saksham Singhal, Shuming Ma, Tengchao Lv, Lei Cui, Owais Khan Mohammed, Barun Patra, et al. 2024. Language is not All You Need: Aligning Perception with Language Models. In Advances in Neural Information Processing Systems, Vol. 36. 72096--72109.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.215"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095044"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02059"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the International Conference on Machine Learning. 5884--5894","author":"Li Qing","year":"2020","unstructured":"Qing Li, Siyuan Huang, Yining Hong, Yixin Chen, Ying Nian Wu, and Song-Chun Zhu. 2020. Closed Loop Neural-symbolic Learning via Integrating Neural Perception, Grammar Parsing, and Symbolic Reasoning. In Proceedings of the International Conference on Machine Learning. 5884--5894."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3286259"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01512"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3284038"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1814779116"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2022.11.011"},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the International Conference on Learning Representations.","author":"Mondal Shanka Subhra","year":"2022","unstructured":"Shanka Subhra Mondal, Taylor Webb, and Jonathan Cohen. 2022. Learning to Reason over Visual Objects. In Proceedings of the International Conference on Learning Representations."},{"key":"e_1_3_2_1_29_1","first-page":"16468","article-title":"Bongard-LOGO: A New Benchmark for Human-level Concept Learning and Reasoning","volume":"33","author":"Nie Weili","year":"2020","unstructured":"Weili Nie, Zhiding Yu, Lei Mao, Ankit B Patel, Yuke Zhu, and Anima Anandkumar. 2020. Bongard-LOGO: A New Benchmark for Human-level Concept Learning and Reasoning. In Advances in Neural Information Processing Systems, Vol. 33. 16468--16480.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics. 3235--3245","author":"Rossiello Gaetano","year":"2019","unstructured":"Gaetano Rossiello, Alfio Gliozzo, Robert Farrell, Nicolas R Fauceglia, and Michael Glass. 2019. Learning Relational Representations by Analogy using Hierarchical Siamese Networks. In Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics. 3235--3245."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3343031.3356082"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25325"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240563"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096300"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58583-9_36"},{"key":"e_1_3_2_1_36_1","article-title":"Visualizing Data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Laurens Van der Maaten and Geoffrey Hinton. 2008. Visualizing Data using t-SNE. Journal of Machine Learning Research, Vol. 9, 11 (2008).","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613822"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-40804-x"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41562-023-01659-w"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00649"},{"key":"e_1_3_2_1_41_1","volume-title":"Proceedings of the International Conference on Machine Learning","volume":"202","author":"Yang Lingxiao","year":"2023","unstructured":"Lingxiao Yang, Hongzhi You, Zonglei Zhen, Dahui Wang, Xiaohong Wan, Xiaohua Xie, and Ru-Yuan Zhang. 2023. Neural Prediction Errors enable Analogical Visual Reasoning in Human Standard Intelligence Tests. In Proceedings of the International Conference on Machine Learning, Vol. 202. 39572--39583."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP49359.2023.10222286"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2972830"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475638"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00688"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00546"},{"key":"e_1_3_2_1_47_1","first-page":"1075","article-title":"Learning Perceptual Inference by Contrasting","volume":"32","author":"Zhang Chi","year":"2019","unstructured":"Chi Zhang, Baoxiong Jia, Feng Gao, Yixin Zhu, HongJing Lu, and Song-Chun Zhu. 2019. Learning Perceptual Inference by Contrasting. In Advances in Neural Information Processing Systems, Vol. 32. 1075--1087.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680820"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3205207"},{"key":"e_1_3_2_1_50_1","volume-title":"Proceedings of the International Conference on Learning Representations.","author":"Zhuo Tao","year":"2021","unstructured":"Tao Zhuo and Mohan Kankanhalli. 2021. Effective Abstract Reasoning with Dual-Contrast Network. In Proceedings of the International Conference on Learning Representations."}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681246","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681246","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:42Z","timestamp":1750295862000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681246"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":50,"alternative-id":["10.1145\/3664647.3681246","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681246","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}