{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T20:06:34Z","timestamp":1765310794372,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":87,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62441617"],"award-info":[{"award-number":["62441617"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Zhejiang Provincial Natural Science Foundation of China","award":["LD25F020001"],"award-info":[{"award-number":["LD25F020001"]}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["226-2025-00057"],"award-info":[{"award-number":["226-2025-00057"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Hong Kong SAR RGC Early Career Scheme","award":["26208924"],"award-info":[{"award-number":["26208924"]}]},{"name":"National Natural Science Foundation of China Young Scholar Fund","award":["62402408"],"award-info":[{"award-number":["62402408"]}]},{"name":"HKUST Sports Science and Technology Research Grant","award":["SSTRG24EG04"],"award-info":[{"award-number":["SSTRG24EG04"]}]},{"name":"AI Chip Center for Emerging Smart Systems"},{"name":"InnoHK initiative of the Innovation and Technology Commission of the Hong Kong Special Administrative Region Government"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755134","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:30:51Z","timestamp":1761377451000},"page":"3625-3634","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Zero-shot Compositional Action Recognition with Neural Logic Constraints"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-3030-405X","authenticated-orcid":false,"given":"Gefan","family":"Ye","sequence":"first","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5678-4487","authenticated-orcid":false,"given":"Lin","family":"Li","sequence":"additional","affiliation":[{"name":"AI Chip Center for Emerging Smart Systems, Hong Kong, China and The Hong Kong University of Science and Technology, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-0726-0196","authenticated-orcid":false,"given":"Kexin","family":"Li","sequence":"additional","affiliation":[{"name":"Zhejiang Tobacco Monopoly Administration, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6142-9914","authenticated-orcid":false,"given":"Jun","family":"Xiao","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6148-9709","authenticated-orcid":false,"given":"Long","family":"Chen","sequence":"additional","affiliation":[{"name":"The Hong Kong University of Science and Technology, Hong Kong, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al.","author":"Achiam Josh","year":"2023","unstructured":"Josh Achiam, Steven Adler, Sandhini Agarwal, Lama Ahmad, Ilge Akkaya, Florencia Leoni Aleman, Diogo Almeida, Janko Altenschmidt, Sam Altman, Shyamal Anadkat, et al., 2023. Gpt-4 technical report. arXiv preprint arXiv:2303.08774 (2023)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TAI.2021.3076974"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547798"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2021.103649"},{"volume-title":"Studies in Logic and the Foundations of Mathematics.","author":"Barwise Jon","key":"e_1_3_2_1_5_1","unstructured":"Jon Barwise. 1977. An introduction to first-order logic. In Studies in Logic and the Foundations of Mathematics. Vol. 90. Elsevier, 5-46."},{"key":"e_1_3_2_1_6_1","unstructured":"Xiao Bi Deli Chen Guanting Chen Shanhuang Chen Damai Dai Chengqi Deng Honghui Ding Kai Dong Qiushi Du Zhe Fu et al. 2024. Deepseek llm: Scaling open-source language models with longtermism. arXiv preprint arXiv:2401.02954 (2024)."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNN.2011.2170180"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00756"},{"key":"e_1_3_2_1_9_1","first-page":"1","article-title":"Palm: Scaling language modeling with pathways","volume":"24","author":"Chowdhery Aakanksha","year":"2023","unstructured":"Aakanksha Chowdhery, Sharan Narang, Jacob Devlin, Maarten Bosma, Gaurav Mishra, Adam Roberts, Paul Barham, Hyung Won Chung, Charles Sutton, Sebastian Gehrmann, et al., 2023. Palm: Scaling language modeling with pathways. Journal of Machine Learning Research, Vol. 24, 240 (2023), 1-113.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_10_1","first-page":"1653","volume-title":"Proceedings of the Twenty-Second international joint conference on Artificial Intelligence","volume":"2","author":"de Penning Leo","year":"2011","unstructured":"Leo de Penning, Artur Garcez, Lu\u00eds C Lamb, and JJ Meyer. 2011. A neural-symbolic cognitive agent for online learning and reasoning. In Proceedings of the Twenty-Second international joint conference on Artificial Intelligence, Vol. 2. International Joint Conferences on Artificial Intelligence, 1653-1658."},{"key":"e_1_3_2_1_11_1","unstructured":"DeepSeek-AI. 2025. DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning. arXiv:2501.12948 [cs.CL] https:\/\/arxiv.org\/abs\/2501.12948"},{"key":"e_1_3_2_1_12_1","first-page":"4171","volume-title":"Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers). 4171-4186."},{"key":"e_1_3_2_1_13_1","series-title":"Kurt G\u00f6del: Collected Works","volume-title":"Publications 1929-1936. Mind","author":"Feferman Solomon","year":"1998","unstructured":"Solomon Feferman, John W Dawson, Stephen C Kleene, Gregory H Moore, and Robert M Solovay. 1998. Kurt G\u00f6del: Collected Works, Vol. I: Publications 1929-1936. Mind, Vol. 107, 425 (1998)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512024"},{"key":"e_1_3_2_1_15_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Fischer Marc","year":"2019","unstructured":"Marc Fischer, Mislav Balunovic, Dana Drachsler-Cohen, Timon Gehr, Ce Zhang, and Martin Vechev. 2019. DL2: training and querying neural networks with logic. In International Conference on Machine Learning. PMLR, 1931-1941."},{"key":"e_1_3_2_1_16_1","volume-title":"Fast relational learning using bottom clause propositionalization with artificial neural networks. Machine learning","author":"Fran\u00e7a Manoel VM","year":"2014","unstructured":"Manoel VM Fran\u00e7a, Gerson Zaverucha, and Artur S d'Avila Garcez. 2014. Fast relational learning using bottom clause propositionalization with artificial neural networks. Machine learning, Vol. 94 (2014), 81-104."},{"key":"e_1_3_2_1_17_1","unstructured":"Peng Gao Jiaming Han Renrui Zhang Ziyi Lin Shijie Geng Aojun Zhou Wei Zhang Pan Lu Conghui He Xiangyu Yue et al. 2023. Llama-adapter v2: Parameter-efficient visual instruction model. arXiv preprint arXiv:2304.15010 (2023)."},{"key":"e_1_3_2_1_18_1","volume-title":"Neural-symbolic computing: An effective methodology for principled integration of machine learning and reasoning. arXiv preprint arXiv:1905.06088","author":"Avila Garcez Artur","year":"2019","unstructured":"Artur d'Avila Garcez, Marco Gori, Luis C Lamb, Luciano Serafini, Michael Spranger, and Son N Tran. 2019. Neural-symbolic computing: An effective methodology for principled integration of machine learning and reasoning. arXiv preprint arXiv:1905.06088 (2019)."},{"key":"e_1_3_2_1_19_1","volume-title":"Multimodal-gpt: A vision and language model for dialogue with humans. arXiv preprint arXiv:2305.04790","author":"Gong Tao","year":"2023","unstructured":"Tao Gong, Chengqi Lyu, Shilong Zhang, Yudong Wang, Miao Zheng, Qian Zhao, Kuikun Liu, Wenwei Zhang, Ping Luo, and Kai Chen. 2023. Multimodal-gpt: A vision and language model for dialogue with humans. arXiv preprint arXiv:2305.04790 (2023)."},{"key":"e_1_3_2_1_20_1","volume-title":"Machine Learning: A constraint-based approach","author":"Gori Marco","year":"2023","unstructured":"Marco Gori, Alessandro Betti, and Stefano Melacci. 2023. Machine Learning: A constraint-based approach. Elsevier."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.622"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00275"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01436"},{"volume-title":"Metamathematics of fuzzy logic","author":"H\u00e1jek Petr","key":"e_1_3_2_1_24_1","unstructured":"Petr H\u00e1jek. 2013. Metamathematics of fuzzy logic. Vol. 4. Springer Science & Business Media."},{"key":"e_1_3_2_1_25_1","volume-title":"Parameter-efficient fine-tuning for large models: A comprehensive survey. arXiv preprint arXiv:2403.14608","author":"Han Zeyu","year":"2024","unstructured":"Zeyu Han, Chao Gao, Jinyang Liu, Jeff Zhang, and Sai Qian Zhang. 2024. Parameter-efficient fine-tuning for large models: A comprehensive survey. arXiv preprint arXiv:2403.14608 (2024)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01470"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58555-6_35"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02266"},{"key":"e_1_3_2_1_29_1","volume-title":"Unified Framework for Open-World Compositional Zero-shot Learning. arXiv preprint arXiv:2412.04083","author":"Jayasekara Hirunima","year":"2024","unstructured":"Hirunima Jayasekara, Khoi Pham, Nirat Saini, and Abhinav Shrivastava. 2024. Unified Framework for Open-World Compositional Zero-shot Learning. arXiv preprint arXiv:2412.04083 (2024)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01025"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_15"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00382"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1049\/iet-cvi.2016.0355"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1038\/scientificamerican0793-76"},{"key":"e_1_3_2_1_35_1","volume-title":"Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942","author":"Lan Zhenzhong","year":"2019","unstructured":"Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. 2019. Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942 (2019)."},{"key":"e_1_3_2_1_36_1","volume-title":"Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461","author":"Lewis Mike","year":"2019","unstructured":"Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov, and Luke Zettlemoyer. 2019. Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461 (2019)."},{"key":"e_1_3_2_1_37_1","volume-title":"Idpro: Flexible interactive video object segmentation by id-queried concurrent propagation","author":"Li Kexin","year":"2024","unstructured":"Kexin Li, Tao Jiang, Zongxin Yang, Yi Yang, Yueting Zhuang, and Jun Xiao. 2024c. Idpro: Flexible interactive video object segmentation by id-queried concurrent propagation. IEEE Transactions on Circuits and Systems for Video Technology (2024)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3611724"},{"key":"e_1_3_2_1_39_1","volume-title":"2024 e. Collaborative Hybrid Propagator for Temporal Misalignment in Audio-Visual Segmentation. arXiv preprint arXiv:2412.08161","author":"Li Kexin","year":"2024","unstructured":"Kexin Li, Zongxin Yang, Yi Yang, and Jun Xiao. 2024 e. Collaborative Hybrid Propagator for Temporal Misalignment in Audio-Visual Segmentation. arXiv preprint arXiv:2412.08161 (2024)."},{"key":"e_1_3_2_1_40_1","volume-title":"A survey on multimodal benchmarks: In the era of large ai models. arXiv preprint arXiv:2409.18142","author":"Li Lin","year":"2024","unstructured":"Lin Li, Guikun Chen, Hanrong Shi, Jun Xiao, and Long Chen. 2024a. A survey on multimodal benchmarks: In the era of large ai models. arXiv preprint arXiv:2409.18142 (2024)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615017"},{"key":"e_1_3_2_1_42_1","volume-title":"European Conference on Computer Vision. Springer, 369-388","author":"Li Rongchang","year":"2024","unstructured":"Rongchang Li, Zhenhua Feng, Tianyang Xu, Linze Li, Xiao-Jun Wu, Muhammad Awais, Sara Atito, and Josef Kittler. 2024b. C2c: Component-to-composition learning for zero-shot compositional action recognition. In European Conference on Computer Vision. Springer, 369-388."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00911"},{"key":"e_1_3_2_1_44_1","volume-title":"Compositional Zero-Shot Learning with Contextualized Cues and Adaptive Contrastive Training. arXiv preprint arXiv:2412.07161","author":"Li Yun","year":"2024","unstructured":"Yun Li, Zhe Liu, and Lina Yao. 2024d. Compositional Zero-Shot Learning with Contextualized Cues and Adaptive Contrastive Training. arXiv preprint arXiv:2412.07161 (2024)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01133"},{"key":"e_1_3_2_1_46_1","unstructured":"Aixin Liu Bei Feng Bin Wang Bingxuan Wang Bo Liu Chenggang Zhao Chengqi Dengr Chong Ruan Damai Dai Daya Guo et al. 2024. Deepseek-v2: A strong economical and efficient mixture-of-experts language model. arXiv preprint arXiv:2405.04434 (2024)."},{"key":"e_1_3_2_1_47_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692 (2019)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3323012"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02256"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00518"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3163667"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00113"},{"key":"e_1_3_2_1_53_1","volume-title":"A logical calculus of the ideas immanent in nervous activity. The bulletin of mathematical biophysics","author":"McCulloch Warren S","year":"1943","unstructured":"Warren S McCulloch and Walter Pitts. 1943. A logical calculus of the ideas immanent in nervous activity. The bulletin of mathematical biophysics, Vol. 5 (1943), 115-133."},{"key":"e_1_3_2_1_54_1","first-page":"62","article-title":"Hybrid neural systems: from simple coupling to fully integrated neural networks","volume":"2","author":"McGarry Kenneth","year":"1999","unstructured":"Kenneth McGarry, Stefan Wermter, and John MacIntyre. 1999. Hybrid neural systems: from simple coupling to fully integrated neural networks. Neural Computing Surveys, Vol. 2, 1 (1999), 62-93.","journal-title":"Neural Computing Surveys"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_11"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018811"},{"key":"e_1_3_2_1_57_1","volume-title":"Learning to compose soft prompts for compositional zero-shot learning. arXiv preprint arXiv:2204.03574","author":"Nayak Nihal V","year":"2022","unstructured":"Nihal V Nayak, Peilin Yu, and Stephen H Bach. 2022. Learning to compose soft prompts for compositional zero-shot learning. arXiv preprint arXiv:2204.03574 (2022)."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-020-09904-8"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-31951-8_25"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00369"},{"key":"e_1_3_2_1_61_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_62_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever et al. 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_1_63_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog Vol. 1 8 (2019) 9."},{"key":"e_1_3_2_1_64_1","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J Liu. 2020. Exploring the limits of transfer learning with a unified text-to-text transformer. Journal of machine learning research, Vol. 21, 140 (2020), 1-67.","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01329"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-49130-1_25"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2022.01.095"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1023\/A:1022665814563"},{"key":"e_1_3_2_1_69_1","volume-title":"Mass: Masked sequence to sequence pre-training for language generation. arXiv preprint arXiv:1905.02450","author":"Song Kaitao","year":"2019","unstructured":"Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, and Tie-Yan Liu. 2019. Mass: Masked sequence to sequence pre-training for language generation. arXiv preprint arXiv:1905.02450 (2019)."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475472"},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"crossref","unstructured":"Ron Sun and Lawrence A Bookman. 1994. Computational architectures integrating neural and symbolic processes: A perspective on the state of the art. (1994).","DOI":"10.1007\/b102608"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/72.557661"},{"key":"e_1_3_2_1_73_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al., 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_74_1","volume-title":"Deep logic networks: Inserting and extracting knowledge from deep belief networks","author":"Tran Son N","year":"2016","unstructured":"Son N Tran and Artur S d'Avila Garcez. 2016. Deep logic networks: Inserting and extracting knowledge from deep belief networks. IEEE transactions on neural networks and learning systems, Vol. 29, 2 (2016), 246-258."},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.artint.2021.103602"},{"key":"e_1_3_2_1_76_1","doi-asserted-by":"publisher","DOI":"10.33073\/pjm-2023-013"},{"key":"e_1_3_2_1_77_1","volume-title":"Actionclip: A new paradigm for video action recognition. arXiv preprint arXiv:2109.08472","author":"Wang Mengmeng","year":"2021","unstructured":"Mengmeng Wang, Jiazheng Xing, and Yong Liu. 2021. Actionclip: A new paradigm for video action recognition. arXiv preprint arXiv:2109.08472 (2021)."},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612490"},{"key":"e_1_3_2_1_79_1","volume-title":"Exploring the reasoning abilities of multimodal large language models (mllms): A comprehensive survey on emerging trends in multimodal reasoning. arXiv preprint arXiv:2401.06805","author":"Wang Yiqi","year":"2024","unstructured":"Yiqi Wang, Wentao Chen, Xiaotian Han, Xudong Lin, Haiteng Zhao, Yongfei Liu, Bohan Zhai, Jianbo Yuan, Quanzeng You, and Hongxia Yang. 2024. Exploring the reasoning abilities of multimodal large language models (mllms): A comprehensive survey on emerging trends in multimodal reasoning. arXiv preprint arXiv:2401.06805 (2024)."},{"key":"e_1_3_2_1_80_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00384"},{"key":"e_1_3_2_1_81_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547862"},{"key":"e_1_3_2_1_82_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2018.05.018"},{"key":"e_1_3_2_1_83_1","volume-title":"A benchmark for compositional visual reasoning. Advances in neural information processing systems","author":"Zerroug Aimen","year":"2022","unstructured":"Aimen Zerroug, Mohit Vaishnav, Julien Colin, Sebastian Musslick, and Thomas Serre. 2022. A benchmark for compositional visual reasoning. Advances in neural information processing systems, Vol. 35 (2022), 29776-29788."},{"key":"e_1_3_2_1_84_1","volume-title":"Mm-llms: Recent advances in multimodal large language models. arXiv preprint arXiv:2401.13601","author":"Zhang Duzhen","year":"2024","unstructured":"Duzhen Zhang, Yahan Yu, Jiahua Dong, Chenxing Li, Dan Su, Chenhui Chu, and Dong Yu. 2024. Mm-llms: Recent advances in multimodal large language models. arXiv preprint arXiv:2401.13601 (2024)."},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20053-3_20"},{"key":"e_1_3_2_1_86_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00174"},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01631"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755134","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T20:03:03Z","timestamp":1765310583000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755134"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":87,"alternative-id":["10.1145\/3746027.3755134","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755134","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}