{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T04:48:20Z","timestamp":1781585300068,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Pioneer and Leading Goose R&D Program of Zhejiang","award":["2024C01142"],"award-info":[{"award-number":["2024C01142"]}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U21B2013, 62125106, 62088102, 62322113 and 62376156"],"award-info":[{"award-number":["U21B2013, 62125106, 62088102, 62322113 and 62376156"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100018537","name":"National Science and Technology Major Project","doi-asserted-by":"publisher","award":["2022ZD0119402"],"award-info":[{"award-number":["2022ZD0119402"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100018537","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Shanghai Municipal Science and Technology Major Project","award":["2021SHZDZX0102"],"award-info":[{"award-number":["2021SHZDZX0102"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681043","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"4851-4860","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["SparseFormer: Detecting Objects in HRW Shots via Sparse Vision Transformer"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8449-0489","authenticated-orcid":false,"given":"Wenxi","family":"Li","sequence":"first","affiliation":[{"name":"MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9808-9805","authenticated-orcid":false,"given":"Yuchen","family":"Guo","sequence":"additional","affiliation":[{"name":"Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0850-2446","authenticated-orcid":false,"given":"Jilai","family":"Zheng","sequence":"additional","affiliation":[{"name":"MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3707-3575","authenticated-orcid":false,"given":"Haozhe","family":"Lin","sequence":"additional","affiliation":[{"name":"Beijing National Research Center for Information Science and Technology, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8459-2845","authenticated-orcid":false,"given":"Chao","family":"Ma","sequence":"additional","affiliation":[{"name":"MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3552-0367","authenticated-orcid":false,"given":"Lu","family":"Fang","sequence":"additional","affiliation":[{"name":"Department of Electronic Engineering, BNRist, Tsinghua University, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4029-3322","authenticated-orcid":false,"given":"Xiaokang","family":"Yang","sequence":"additional","affiliation":[{"name":"MoE Key Lab of Artificial Intelligence, AI Institute, Shanghai Jiao Tong University, Shanghai, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Sinan Onur Altinuc, and Alptekin Temizel","author":"Akyon Fatih Cagatay","year":"2022","unstructured":"Fatih Cagatay Akyon, Sinan Onur Altinuc, and Alptekin Temizel. 2022. Slicing aided hyper inference and fine-tuning for small object detection. In ICIP."},{"key":"e_1_3_2_1_2_1","unstructured":"Seyed Majid Azimi Eleonora Vig Reza Bahmanyar Marco K\u00f6rner and Peter Reinartz. 2018. Towards multi-class object detection in unconstrained remote sensing imagery. In ACCV."},{"key":"e_1_3_2_1_3_1","unstructured":"Zhaowei Cai and Nuno Vasconcelos. 2018. Cascade R-CNN: Delving into high quality object detection. In CVPR."},{"key":"e_1_3_2_1_4_1","volume-title":"Chen Change Loy, and Dahua Lin","author":"Chen Kai","year":"2019","unstructured":"Kai Chen, Jiaqi Wang, Jiangmiao Pang, Yuhang Cao, Yu Xiong, Xiaoxiao Li, Shuyang Sun, Wansen Feng, Ziwei Liu, Jiarui Xu, Zheng Zhang, Dazhi Cheng, Chenchen Zhu, Tianheng Cheng, Qijie Zhao, Buyu Li, Xin Lu, Rui Zhu, Yue Wu, Jifeng Dai, Jingdong Wang, Jianping Shi, Wanli Ouyang, Chen Change Loy, and Dahua Lin. 2019. MM Detection: Open MMLab Detection Toolbox and Benchmark. arXiv preprint arXiv:1906.07155 (2019)."},{"key":"e_1_3_2_1_5_1","volume-title":"Towards real-time object detection in GigaPixel-level video. Neurocomputing","author":"Chen Kai","year":"2022","unstructured":"Kai Chen, Zerun Wang, Xueyang Wang, Dahan Gong, Longlong Yu, Yuchen Guo, and Guiguang Ding. 2022. Towards real-time object detection in GigaPixel-level video. Neurocomputing (2022)."},{"key":"e_1_3_2_1_6_1","unstructured":"Xiyang Dai Yinpeng Chen Bin Xiao Dongdong Chen Mengchen Liu Lu Yuan and Lei Zhang. 2021. Dynamic head: Unifying object detection heads with attentions. In CVPR."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Jian Ding Nan Xue Yang Long Gui-Song Xia and Qikai Lu. 2019. Learning RoI Transformer for Oriented Object Detection in Aerial Images. In CVPR.","DOI":"10.1109\/CVPR.2019.00296"},{"key":"e_1_3_2_1_8_1","unstructured":"Alexey Dosovitskiy Lucas Beyer Alexander Kolesnikov Dirk Weissenborn Xiaohua Zhai Thomas Unterthiner Mostafa Dehghani Matthias Minderer Georg Heigold Sylvain Gelly et al. 2021. An image is worth 16x16 words: Transformers for image recognition at scale. In ICLR."},{"key":"e_1_3_2_1_9_1","volume-title":"Christopher KI Williams, John Winn, and Andrew Zisserman.","author":"Everingham Mark","year":"2010","unstructured":"Mark Everingham, Luc Van Gool, Christopher KI Williams, John Winn, and Andrew Zisserman. 2010. The pascal visual object classes (voc) challenge. IJCV (2010)."},{"key":"e_1_3_2_1_10_1","unstructured":"Jiahao Fan Huabin Liu Wenjie Yang John See Aixin Zhang and Weiyao Lin. 2022. Speed Up Object Detection on Gigapixel-Level Images With Patch Arrangement. In CVPR."},{"key":"e_1_3_2_1_11_1","volume-title":"YOLOX: Exceeding YOLO Series in","author":"Ge Zheng","year":"2021","unstructured":"Zheng Ge, Songtao Liu, Feng Wang, Zeming Li, and Jian Sun. 2021. YOLOX: Exceeding YOLO Series in 2021. arXiv preprint arXiv:2107.08430 (2021)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"crossref","unstructured":"Ross Girshick. 2015. Fast R-CNN. In ICCV.","DOI":"10.1109\/ICCV.2015.169"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"crossref","unstructured":"Ross Girshick Jeff Donahue Trevor Darrell and Jitendra Malik. 2014. Rich feature hierarchies for accurate object detection and semantic segmentation. In CVPR.","DOI":"10.1109\/CVPR.2014.81"},{"key":"e_1_3_2_1_14_1","unstructured":"Zonghao Guo Chang Liu Xiaosong Zhang Jianbin Jiao Xiangyang Ji and Qixiang Ye. 2021. Beyond bounding-box: Convex-hull feature adaptation for oriented and densely packed object detection. In CVPR."},{"key":"e_1_3_2_1_15_1","volume-title":"Align deep features for oriented object detection","author":"Han Jiaming","year":"2021","unstructured":"Jiaming Han, Jian Ding, Jie Li, and Gui-Song Xia. 2021. Align deep features for oriented object detection. IEEE TGRS (2021)."},{"key":"e_1_3_2_1_16_1","volume-title":"Redet: A rotation-equivariant detector for aerial object detection. In CVPR.","author":"Han Jiaming","year":"2021","unstructured":"Jiaming Han, Jian Ding, Nan Xue, and Gui-Song Xia. 2021. Redet: A rotation-equivariant detector for aerial object detection. In CVPR."},{"key":"e_1_3_2_1_17_1","unstructured":"Kaiming He Ross Girshick and Piotr Doll\u00e1r. 2019. Rethinking imagenet pre-training. In ICCV."},{"key":"e_1_3_2_1_18_1","unstructured":"Kaiming He Georgia Gkioxari Piotr Doll\u00e1r and Ross Girshick. 2017. Mask r-cnn. In ICCV."},{"key":"e_1_3_2_1_19_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR."},{"key":"e_1_3_2_1_20_1","unstructured":"Glenn Jocher Ayush Chaurasia and Jing Qiu. 2023. YOLO by Ultralytics. https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"e_1_3_2_1_21_1","unstructured":"Changlin Li Taojiannan Yang Sijie Zhu Chen Chen and Shanyue Guan. 2020. Density map guided object detection in aerial images. In CVPRW."},{"key":"e_1_3_2_1_22_1","unstructured":"Wentong Li Yijie Chen Kaixuan Hu and Jianke Zhu. 2022. Oriented reppoints for aerial object detection. In CVPR."},{"key":"e_1_3_2_1_23_1","unstructured":"Wenxi Li Ruxin Zhang Haozhe Lin Yuchen Guo Chao Ma and Xiaokang Yang. 2024. SaccadeDet: A Novel Dual-Stage Architecture for Rapid and Accurate Detection in Gigapixel Images. In ECML-PKDD."},{"key":"e_1_3_2_1_24_1","unstructured":"Wenxi Li Ruxin Zhang Haozhe Lin Yuchen Guo Chao Ma and Xiaokang Yang. 2024. SaccadeMOT: Enhancing Object Detection and Tracking in Gigapixel Images via Scale-Aware Density Estimation. In ECAI."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Haozhe Lin Zequn Chen Jinzhi Zhang Bing Bai Yu Wang Ruqi Huang and Lu Fang. 2023. RealGraph: A Multiview Dataset for 4D Real-world Context Graph Generation. In ICCV.","DOI":"10.1109\/ICCV51070.2023.00348"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Haozhe Lin Chunyu Wei Li He Yuchen Guo Yunqi Zhao Shanglong Li and Lu Fang. 2024. GigaTraj: Predicting Long-term Trajectories of Hundreds of Pedestrians in Gigapixel Complex Scenes. In CVPR.","DOI":"10.1109\/CVPR52733.2024.01829"},{"key":"e_1_3_2_1_27_1","unstructured":"Tsung-Yi Lin Priya Goyal Ross Girshick Kaiming He and Piotr Doll\u00e1r. 2017. Focal loss for dense object detection. In ICCV."},{"key":"e_1_3_2_1_28_1","unstructured":"Tsung-Yi Lin Michael Maire Serge Belongie James Hays Pietro Perona Deva Ramanan Piotr Doll\u00e1r and C Lawrence Zitnick. 2014. Microsoft COCO: Common Objects in Context. In ECCV."},{"key":"e_1_3_2_1_29_1","volume-title":"SSD: Single shot multibox detector. In ECCV.","author":"Liu Wei","year":"2016","unstructured":"Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C Berg. 2016. SSD: Single shot multibox detector. In ECCV."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Ze Liu Yutong Lin Yue Cao Han Hu Yixuan Wei Zheng Zhang Stephen Lin and Baining Guo. 2021. Swin transformer: Hierarchical vision transformer using shifted windows. In ICCV.","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"crossref","unstructured":"Tao Ma Bing Bai Haozhe Lin Heyuan Wang Yu Wang Lin Luo and Lu Fang. 2024. When Visual Grounding Meets Gigapixel-level Large-scale Scenes: Benchmark and Approach. In CVPR.","DOI":"10.1109\/CVPR52733.2024.02088"},{"key":"e_1_3_2_1_32_1","unstructured":"Chris J Maddison Andriy Mnih and Yee Whye Teh. 2017. The concrete distribution: A continuous relaxation of discrete random variables. In ICLR."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"Lingchen Meng Hengduo Li Bor-Chun Chen Shiyi Lan Zuxuan Wu Yu-Gang Jiang and Ser-Nam Lim. 2022. AdaViT: Adaptive Vision Transformers for Efficient Image Recognition. In CVPR.","DOI":"10.1109\/CVPR52688.2022.01199"},{"key":"e_1_3_2_1_34_1","volume-title":"Autofocus: Efficient multi-scale inference. In ICCV.","author":"Najibi Mahyar","year":"2019","unstructured":"Mahyar Najibi, Bharat Singh, and Larry S Davis. 2019. Autofocus: Efficient multi-scale inference. In ICCV."},{"key":"e_1_3_2_1_35_1","unstructured":"Xingjia Pan Yuqiang Ren Kekai Sheng Weiming Dong Haolei Yuan Xiaowei Guo Chongyang Ma and Changsheng Xu. 2020. Dynamic refinement network for oriented and densely packed object detection. In CVPR."},{"key":"e_1_3_2_1_36_1","unstructured":"Yongming Rao Wenliang Zhao Benlin Liu Jiwen Lu Jie Zhou and Cho-Jui Hsieh. 2021. DynamicViT: Efficient Vision Transformers with Dynamic Token Sparsification. In NeurIPS."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Joseph Redmon Santosh Divvala Ross Girshick and Ali Farhadi. 2016. You only look once: Unified real-time object detection. In CVPR.","DOI":"10.1109\/CVPR.2016.91"},{"key":"e_1_3_2_1_38_1","volume-title":"Yolov3: An incremental improvement. arXiv preprint arXiv:1804.02767","author":"Redmon Joseph","year":"2018","unstructured":"Joseph Redmon and Ali Farhadi. 2018. Yolov3: An incremental improvement. arXiv preprint arXiv:1804.02767 (2018)."},{"key":"e_1_3_2_1_39_1","unstructured":"Shaoqing Ren Kaiming He Ross Girshick and Jian Sun. 2015. Faster R-CNN: Towards real-time object detection with region proposal networks. In NeurIPS."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"crossref","unstructured":"Bharat Singh and Larry S Davis. 2018. An analysis of scale invariance in object detection snip. In CVPR.","DOI":"10.1109\/CVPR.2018.00377"},{"key":"e_1_3_2_1_41_1","volume-title":"Sniper: Efficient multi-scale training. In NeurIPS.","author":"Singh Bharat","year":"2018","unstructured":"Bharat Singh, Mahyar Najibi, and Larry S Davis. 2018. Sniper: Efficient multi-scale training. In NeurIPS."},{"key":"e_1_3_2_1_42_1","unstructured":"Lin Song Songyang Zhang Songtao Liu Zeming Li Xuming He Hongbin Sun Jian Sun and Nanning Zheng. 2021. Dynamic grained encoder for vision transformers. In NeurIPS."},{"key":"e_1_3_2_1_43_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is all you need. In NeurIPS."},{"key":"e_1_3_2_1_44_1","volume-title":"Learning center probability map for detecting objects in aerial images","author":"Wang Jinwang","year":"2020","unstructured":"Jinwang Wang, Wen Yang, Heng-Chao Li, Haijian Zhang, and Gui-Song Xia. 2020. Learning center probability map for detecting objects in aerial images. IEEE TGRS (2020)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"crossref","unstructured":"Junke Wang Xitong Yang Hengduo Li Li Liu Zuxuan Wu and Yu-Gang Jiang. 2022. Efficient video transformers with spatial-temporal token selection. In ECCV.","DOI":"10.1007\/978-3-031-19833-5_5"},{"key":"e_1_3_2_1_46_1","volume-title":"Pnp-detr: Towards efficient visual analysis with transformers. In ICCV.","author":"Wang Tao","year":"2021","unstructured":"Tao Wang, Li Yuan, Yunpeng Chen, Jiashi Feng, and Shuicheng Yan. 2021. Pnp-detr: Towards efficient visual analysis with transformers. In ICCV."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Wenhai Wang Enze Xie Xiang Li Deng-Ping Fan Kaitao Song Ding Liang Tong Lu Ping Luo and Ling Shao. 2021. Pyramid vision transformer: A versatile backbone for dense prediction without convolutions. In ICCV.","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eng.2023.05.020"},{"key":"e_1_3_2_1_49_1","volume-title":"Panda: A gigapixel-level human-centric video dataset. In CVPR.","author":"Wang Xueyang","year":"2020","unstructured":"Xueyang Wang, Xiya Zhang, Yinheng Zhu, Yuchen Guo, Xiaoyun Yuan, Liuyu Xiang, Zerun Wang, Guiguang Ding, David Brady, Qionghai Dai, et al. 2020. Panda: A gigapixel-level human-centric video dataset. In CVPR."},{"key":"e_1_3_2_1_50_1","volume-title":"Cvt: Introducing convolutions to vision transformers. In ICCV.","author":"Wu Haiping","year":"2021","unstructured":"Haiping Wu, Bin Xiao, Noel Codella, Mengchen Liu, Xiyang Dai, Lu Yuan, and Lei Zhang. 2021. Cvt: Introducing convolutions to vision transformers. In ICCV."},{"key":"e_1_3_2_1_51_1","volume-title":"DOTA: A large-scale dataset for object detection in aerial images. In CVPR.","author":"Xia Gui-Song","year":"2018","unstructured":"Gui-Song Xia, Xiang Bai, Jian Ding, Zhen Zhu, Serge Belongie, Jiebo Luo, Mihai Datcu, Marcello Pelillo, and Liangpei Zhang. 2018. DOTA: A large-scale dataset for object detection in aerial images. In CVPR."},{"key":"e_1_3_2_1_52_1","volume-title":"Nystr\u00f6mformer: A nystr\u00f6m-based algorithm for approximating self-attention. In AAAI.","author":"Xiong Yunyang","year":"2021","unstructured":"Yunyang Xiong, Zhanpeng Zeng, Rudrasis Chakraborty, Mingxing Tan, Glenn Fung, Yin Li, and Vikas Singh. 2021. Nystr\u00f6mformer: A nystr\u00f6m-based algorithm for approximating self-attention. In AAAI."},{"key":"e_1_3_2_1_53_1","volume-title":"Querydet: Cascaded sparse query for accelerating high-resolution small object detection. In CVPR.","author":"Yang Chenhongyi","year":"2022","unstructured":"Chenhongyi Yang, Zehao Huang, and Naiyan Wang. 2022. Querydet: Cascaded sparse query for accelerating high-resolution small object detection. In CVPR."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"crossref","unstructured":"Fan Yang Heng Fan Peng Chu Erik Blasch and Haibin Ling. 2019. Clustered object detection in aerial images. In ICCV.","DOI":"10.1109\/ICCV.2019.00840"},{"key":"e_1_3_2_1_55_1","unstructured":"Jianwei Yang Chunyuan Li Pengchuan Zhang Xiyang Dai Bin Xiao Lu Yuan and Jianfeng Gao. 2021. Focal Attention for Long-Range Interactions in Vision Transformers. In NeurIPS."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"crossref","unstructured":"Xue Yang and Junchi Yan. 2020. Arbitrary-Oriented Object Detection with Circular Smooth Label. In ECCV.","DOI":"10.1007\/978-3-030-58598-3_40"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"crossref","unstructured":"Xue Yang Junchi Yan Ziming Feng and Tao He. 2021. R3det: Refined single-stage detector with feature refinement for rotating object. In AAAI.","DOI":"10.1609\/aaai.v35i4.16426"},{"key":"e_1_3_2_1_58_1","volume-title":"Scrdet: Detecting small, cluttered and rotated objects via instance-level feature denoising and rotation loss smoothing","author":"Yang Xue","year":"2022","unstructured":"Xue Yang, Junchi Yan, Wenlong Liao, Xiaokang Yang, Jin Tang, and Tao He. 2022. Scrdet: Detecting small, cluttered and rotated objects via instance-level feature denoising and rotation loss smoothing. IEEE TPAMI (2022)."},{"key":"e_1_3_2_1_59_1","volume-title":"CAD-Net: A context-aware detection network for objects in remote sensing imagery","author":"Zhang Gongjie","year":"2019","unstructured":"Gongjie Zhang, Shijian Lu, and Wei Zhang. 2019. CAD-Net: A context-aware detection network for objects in remote sensing imagery. IEEE TGRS (2019)."},{"key":"e_1_3_2_1_60_1","volume-title":"Dino: Detr with improved denoising anchor boxes for end-to-end object detection. In ICLR.","author":"Zhang Hao","year":"2023","unstructured":"Hao Zhang, Feng Li, Shilong Liu, Lei Zhang, Hang Su, Jun Zhu, Lionel Ni, and Harry Shum. 2023. Dino: Detr with improved denoising anchor boxes for end-to-end object detection. In ICLR."},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"crossref","unstructured":"Shifeng Zhang Cheng Chi Yongqiang Yao Zhen Lei and Stan Z Li. 2020. Bridging the gap between anchor-based and anchor-free detection via adaptive training sample selection. In CVPR.","DOI":"10.1109\/CVPR42600.2020.00978"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681043","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681043","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:37Z","timestamp":1750295857000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681043"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":61,"alternative-id":["10.1145\/3664647.3681043","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681043","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}