{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:57:24Z","timestamp":1781539044941,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T00:00:00Z","timestamp":1781481600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"Beijing Natural Science Foundation","award":["4242053"],"award-info":[{"award-number":["4242053"]}]},{"name":"Beijing Natural Science Foundation","award":["L242096"],"award-info":[{"award-number":["L242096"]}]},{"name":"National Natural Science Foundation of China","award":["62476262"],"award-info":[{"award-number":["62476262"]}]},{"name":"National Natural Science Foundation of China","award":["62271467"],"award-info":[{"award-number":["62271467"]}]},{"name":"National Natural Science Foundation of China","award":["62206263"],"award-info":[{"award-number":["62206263"]}]},{"name":"National Natural Science Foundation of China","award":["62306297"],"award-info":[{"award-number":["62306297"]}]},{"name":"National Natural Science Foundation of China","award":["62306296"],"award-info":[{"award-number":["62306296"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,6,16]]},"DOI":"10.1145\/3805622.3810744","type":"proceedings-article","created":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T14:42:57Z","timestamp":1781534577000},"page":"625-634","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["AF-BEV: Object-Aware Adaptive Frustum-based BEV Aggregation for 3D Object Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2108-8940","authenticated-orcid":false,"given":"Bingyu","family":"Zhu","sequence":"first","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8306-1054","authenticated-orcid":false,"given":"Dongbo","family":"Yu","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9648-6073","authenticated-orcid":false,"given":"Yunbiao","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1799-3948","authenticated-orcid":false,"given":"Jun","family":"Xiao","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7348-5844","authenticated-orcid":false,"given":"Haiyong","family":"Jiang","sequence":"additional","affiliation":[{"name":"University of Chinese Academy of Sciences, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,6,15]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"crossref","unstructured":"Jonathan\u00a0T. Barron Ben Mildenhall Matthew Tancik Peter Hedman Ricardo Martin-Brualla and Pratul\u00a0P. Srinivasan. 2021. Mip-NeRF: A Multiscale Representation for Anti-Aliasing Neural Radiance Fields. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2103.13415 (2021). arxiv:https:\/\/arXiv.org\/abs\/2103.13415\u00a0[cs.CV]","DOI":"10.1109\/ICCV48922.2021.00580"},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"crossref","unstructured":"Jonathan\u00a0T. Barron Ben Mildenhall Dor Verbin Pratul\u00a0P. Srinivasan and Peter Hedman. 2023. Zip-NeRF: Anti-Aliased Grid-Based Neural Radiance Fields. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2304.06706 (2023). arxiv:https:\/\/arXiv.org\/abs\/2304.06706\u00a0[cs.CV]","DOI":"10.1109\/ICCV51070.2023.01804"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00938"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Holger Caesar Varun Bankiti Alex\u00a0H. Lang Sourabh Vora Venice\u00a0Erin Liong Qiang Xu Anush Krishnan Yu Pan Giancarlo Baldan and Oscar Beijbom. 2020. nuScenes: A multimodal dataset for autonomous driving. CVPR (2020).","DOI":"10.1109\/CVPR42600.2020.01164"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_3_1_7_2","volume-title":"ECCV","author":"Chen Anpei","year":"2024","unstructured":"Anpei Chen, Haofei Xu, Stefano Esposito, Siyu Tang, and Andreas Geiger. 2024. LaRa: Efficient Large-Baseline Radiance Fields. In ECCV."},{"key":"e_1_3_3_1_8_2","unstructured":"Shaoyu Chen Xinggang Wang Tianheng Cheng Qian Zhang Chang Huang and Wenyu Liu. 2022. Polar Parametrization for Vision-based Surround-View 3D Detection. arXiv:https:\/\/arXiv.org\/abs\/2206.10965 (2022)."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681103"},{"key":"e_1_3_3_1_10_2","unstructured":"MMDetection3D Contributors. 2020. MMDetection3D: OpenMMLab next-generation platform for general 3D object detection. https:\/\/github.com\/open-mmlab\/mmdetection3d (2020)."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01169"},{"key":"e_1_3_3_1_12_2","unstructured":"Junjie Huang and Guan Huang. 2022. BEVDet4D: Exploit Temporal Cues in Multi-camera 3D Object Detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2203.17054 (2022)."},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i1.25185"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19839-7_21"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01298"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","unstructured":"Hongyang Li Chonghao Sima Jifeng Dai Wenhai Wang Lewei Lu Huijie Wang Jia Zeng Zhiqi Li Jiazhi Yang Hanming Deng Hao Tian Enze Xie Jiangwei Xie Li Chen Tianyu Li Yang Li Yulu Gao Xiaosong Jia Si Liu Jianping Shi Dahua Lin and Yu Qiao. 2023. Delving Into the Devils of Bird\u2019s-Eye-View Perception: A Review Evaluation and Recipe. TPAMI (2023) 1\u201320. 10.1109\/TPAMI.2023.3333838","DOI":"10.1109\/TPAMI.2023.3333838"},{"key":"e_1_3_3_1_17_2","unstructured":"Yinhao Li Zheng Ge Guanyi Yu Jinrong Yang Zengran Wang Yukang Shi Jianjian Sun and Zeming Li. 2023. BEVDepth: Acquisition of Reliable Depth for Multi-view 3D Object Detection. AAAI Conference on Artificial Intelligence (AAAI) (2023)."},{"key":"e_1_3_3_1_18_2","unstructured":"Yangguang Li Bin Huang Zeren Chen Yufeng Cui Feng Liang Mingzhu Shen Fenggang Liu Enze Xie Lu Sheng Wanli Ouyang et\u00a0al. 2023. Fast-BEV: A Fast and Strong Bird\u2019s-Eye View Perception Baseline. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2301.12511 (2023)."},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"crossref","unstructured":"Yiheng Li Yang Yang and Zhen Lei. 2025. CoreNet: Conflict Resolution Network for point-pixel misalignment and sub-task suppression of 3D LiDAR-camera object detection. Information Fusion 118 (2025) 102896.","DOI":"10.1016\/j.inffus.2024.102896"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20077-9_1"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00637"},{"key":"e_1_3_3_1_22_2","unstructured":"Xuewu Lin Tianwei Lin Zixiang Pei Lichao Huang and Zhizhong Su. 2022. Sparse4D: Multi-view 3D Object Detection with Sparse Spatial-Temporal Fusion. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2211.10581 (2022). arXiv:arXiv:2211.10581"},{"key":"e_1_3_3_1_23_2","unstructured":"Xuewu Lin Tianwei Lin Zixiang Pei Lichao Huang and Zhizhong Su. 2023. Sparse4D v2: Recurrent Temporal Fusion with Sparse Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.14018 (2023). arXiv:arXiv:2305.14018"},{"key":"e_1_3_3_1_24_2","volume-title":"ECCV","author":"Liu Feng","year":"2024","unstructured":"Feng Liu, Tengteng Huang, Qianjing Zhang, Haotian Yao, Chi Zhang, Fang Wan, Qixiang Ye, and Yanzhao Zhou. 2024. Ray Denoising: Depth-aware Hard Negative Sampling for Multi-view 3D Object Detection. In ECCV."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01703"},{"key":"e_1_3_3_1_26_2","unstructured":"Yingfei Liu Tiancai Wang Xiangyu Zhang and Jian Sun. 2022. Petr: Position embedding transformation for multi-view 3d object detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2203.05625 (2022)."},{"key":"e_1_3_3_1_27_2","unstructured":"Yingfei Liu Junjie Yan Fan Jia Shuailin Li Qi Gao Tiancai Wang Xiangyu Zhang and Jian Sun. 2022. PETRv2: A Unified Framework for 3D Perception from Multi-Camera Images. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2206.01256 (2022)."},{"key":"e_1_3_3_1_28_2","volume-title":"NeurIPS","author":"Liu Zhe","year":"2024","unstructured":"Zhe Liu, Jinghua Hou, Xingyu Wang, Xiaoqing Ye, Jingdong Wang, Hengshuang Zhao, and Xiang Bai. 2024. LION: Linear Group RNN for 3D Object Detection in Point Clouds. In NeurIPS."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19809-0_8"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_24"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","unstructured":"Thomas M\u00fcller Alex Evans Christoph Schied and Alexander Keller. 2022. Instant Neural Graphics Primitives with a Multiresolution Hash Encoding. ACM Trans. Graph. 41 4 Article 102 (July 2022) 15\u00a0pages. 10.1145\/3528223.3530127","DOI":"10.1145\/3528223.3530127"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00313"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58568-6_12"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"publisher","unstructured":"Peng Ping Zhengpeng Yang Lu Tao Quan Shi and Weiping Ding. 2025. BEV-TinySpotter: A novel BEV perception method considering multi-dimensional feature fusion of small target. Information Fusion 116 (2025) 102793. 10.1016\/j.inffus.2024.102793","DOI":"10.1016\/j.inffus.2024.102793"},{"key":"e_1_3_3_1_35_2","volume-title":"WACV","author":"Samuele Pi\u00f1eiro Monteagudo, Henrique and Taccari, Leonardo and Pjetri, Aurel and Sambo, Francesco and Salti,","year":"2025","unstructured":"Pi\u00f1eiro Monteagudo, Henrique and Taccari, Leonardo and Pjetri, Aurel and Sambo, Francesco and Salti, Samuele. 2025. RendBEV: Semantic Novel View Synthesis for Self-Supervised Bird\u2019s Eye View Segmentation. In WACV."},{"key":"e_1_3_3_1_36_2","first-page":"108796","volume-title":"Pattern Recognition","author":"Qian Rui","year":"2022","unstructured":"Rui Qian, Xin Lai, and Xirong Li. 2022. 3D Object Detection for Autonomous Driving: A Survey. In Pattern Recognition , Vol.\u00a0130. Elsevier, 108796."},{"key":"e_1_3_3_1_37_2","unstructured":"Wonseok Roh Gyusam Chang Seokha Moon Giljoo Nam Chanyoung Kim Younghyun Kim Sangpil Kim and Jinkyu Kim. 2022. Ora3d: Overlap region aware multi-view 3d object detection. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2207.00865 (2022)."},{"key":"e_1_3_3_1_38_2","unstructured":"Apoorv Singh and Varun Bankiti. 2023. Surround-View Vision-based 3D Detection for Autonomous Driving: A Survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.06650 (2023). arxiv:https:\/\/arXiv.org\/abs\/2302.06650\u00a0[cs.CV]"},{"key":"e_1_3_3_1_39_2","first-page":"5998","volume-title":"NeurIPS","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N. Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention Is All You Need. In NeurIPS , Vol.\u00a030. 5998\u20136008."},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00335"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00107"},{"key":"e_1_3_3_1_42_2","volume-title":"Conference on Robot Learning (CoRL)","author":"Wang Tai","year":"2021","unstructured":"Tai Wang, Xinge Zhu, Jiangmiao Pang, and Dahua Lin. 2021. Probabilistic and Geometric Depth: Detecting Objects in Perspective. In Conference on Robot Learning (CoRL)."},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00493"},{"key":"e_1_3_3_1_44_2","volume-title":"The Conference on Robot Learning (CoRL)","author":"Wang Yue","year":"2021","unstructured":"Yue Wang, Vitor Guizilini, Tianyuan Zhang, Yilun Wang, Hang Zhao, , and Justin\u00a0M. Solomon. 2021. DETR3D: 3D Object Detection from Multi-view Images via 3D-to-2D Queries. In The Conference on Robot Learning (CoRL)."},{"key":"e_1_3_3_1_45_2","unstructured":"Yu Wang Shaohua Wang Yicheng Li and Mingchun Liu. 2024. A Comprehensive Review of 3D Object Detection in Autonomous Driving: Technological Advances and Future Directions. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.02980 (2024). arxiv:https:\/\/arXiv.org\/abs\/2408.16530\u00a0[cs.CV]"},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3613798"},{"key":"e_1_3_3_1_47_2","unstructured":"Shaoyuan Xie Lingdong Kong Wenwei Zhang Jiawei Ren Liang Pan Kai Chen and Ziwei Liu. 2025. Benchmarking and Improving Bird\u2019s Eye View Perception Robustness in Autonomous Driving. TPAMI (2025)."},{"key":"e_1_3_3_1_48_2","doi-asserted-by":"crossref","unstructured":"Y. Yan Y. Mao and B. Li. 2018. Second: Sparsely embedded convolutional detection. Sensors 18 10 (2018) 3337.","DOI":"10.3390\/s18103337"},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"crossref","unstructured":"Chenyu Yang Yuntao Chen Haofei Tian Chenxin Tao Xizhou Zhu Zhaoxiang Zhang Gao Huang Hongyang Li Y. Qiao Lewei Lu Jie Zhou and Jifeng Dai. 2022. BEVFormer v2: Adapting Modern Image Backbones to Bird\u2019s-Eye-View Recognition via Perspective Supervision. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2211.10439 (2022).","DOI":"10.1109\/CVPR52729.2023.01710"},{"key":"e_1_3_3_1_50_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01390"},{"key":"e_1_3_3_1_51_2","unstructured":"Tianwei Yin Xingyi Zhou and Philipp Kr\u00e4henb\u00fchl. 2021. Center-based 3D Object Detection and Tracking. CVPR (2021)."},{"key":"e_1_3_3_1_52_2","volume-title":"arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1904.07850","author":"Zhou Xingyi","year":"2019","unstructured":"Xingyi Zhou, Dequan Wang, and Philipp Kr\u00e4henb\u00fchl. 2019. Objects as Points. In arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1904.07850."},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00472"},{"key":"e_1_3_3_1_54_2","first-page":"21600","volume-title":"CVPR","author":"Zhu Zijian","year":"2023","unstructured":"Zijian Zhu, Yichi Zhang, Hai Chen, Yinpeng Dong, Shu Zhao, Wenbo Ding, Jiachen Zhong, and Shibao Zheng. 2023. Understanding the Robustness of 3D Object Detection With Bird\u2019s-Eye-View Representations in Autonomous Driving. In CVPR. 21600\u201321610."}],"event":{"name":"ICMR '26: International Conference on Multimedia Retrieval","location":"Amsterdam The Netherlands","acronym":"ICMR '26","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 2026 International Conference on Multimedia Retrieval"],"original-title":[],"deposited":{"date-parts":[[2026,6,15]],"date-time":"2026-06-15T15:38:39Z","timestamp":1781537919000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3805622.3810744"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6,15]]},"references-count":53,"alternative-id":["10.1145\/3805622.3810744","10.1145\/3805622"],"URL":"https:\/\/doi.org\/10.1145\/3805622.3810744","relation":{},"subject":[],"published":{"date-parts":[[2026,6,15]]},"assertion":[{"value":"2026-06-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}