{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:41:20Z","timestamp":1755823280615,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612189","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:26:54Z","timestamp":1698391614000},"page":"2714-2722","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Selecting Learnable Training Samples is All DETRs Need in Crowded Pedestrian Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1179-2539","authenticated-orcid":false,"given":"Feng","family":"Gao","sequence":"first","affiliation":[{"name":"Chongqing University of Post and Telecommunications &amp; Guangyang Bay Laboratory, Chongqing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2802-8139","authenticated-orcid":false,"given":"Jiaxu","family":"Leng","sequence":"additional","affiliation":[{"name":"Chongqing University of Post and Telecommunications &amp; Guangyang Bay Laboratory, Chongqing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6041-588X","authenticated-orcid":false,"given":"Ji","family":"Gan","sequence":"additional","affiliation":[{"name":"Chongqing University of Post and Telecommunications &amp; Guangyang Bay Laboratory, Chongqing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7985-0037","authenticated-orcid":false,"given":"Xinbo","family":"Gao","sequence":"additional","affiliation":[{"name":"Chongqing University of Post and Telecommunications &amp; Guangyang Bay Laboratory, Chongqing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58580-8_33"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00740"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.530"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6690"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01223"},{"key":"e_1_3_2_2_7_1","volume-title":"Sinkhorn distances: Lightspeed computation of optimal transport. Advances in neural information processing systems","author":"Cuturi Marco","year":"2013","unstructured":"Marco Cuturi. 2013. Sinkhorn distances: Lightspeed computation of optimal transport. Advances in neural information processing systems, Vol. 26 (2013)."},{"key":"e_1_3_2_2_8_1","volume-title":"Pedestrian detection: An evaluation of the state of the art","author":"Dollar Piotr","year":"2011","unstructured":"Piotr Dollar, Christian Wojek, Bernt Schiele, and Pietro Perona. 2011. Pedestrian detection: An evaluation of the state of the art. IEEE transactions on pattern analysis and machine intelligence, Vol. 34, 4 (2011), 743--761."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00667"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00037"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.169"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01117"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3094800"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01076"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3171250"},{"key":"e_1_3_2_2_16_1","volume-title":"Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems","author":"Li Xiang","year":"2020","unstructured":"Xiang Li, Wenhai Wang, Lijun Wu, Shuo Chen, Xiaolin Hu, Jun Li, Jinhui Tang, and Jian Yang. 2020. Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection. In Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems 2020, NeurIPS 2020, December 6-12, 2020, virtual, Hugo Larochelle, Marc'Aurelio Ranzato, Raia Hadsell, Maria-Florina Balcan, and Hsuan-Tien Lin (Eds.)."},{"key":"e_1_3_2_2_17_1","volume-title":"DETR for crowd pedestrian detection. arXiv preprint arXiv:2012.06785","author":"Lin Matthieu","year":"2020","unstructured":"Matthieu Lin, Chuming Li, Xingyuan Bu, Ming Sun, Chen Lin, Junjie Yan, Wanli Ouyang, and Zhidong Deng. 2020. DETR for crowd pedestrian detection. arXiv preprint arXiv:2012.06785 (2020)."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16313"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3038371"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00107"},{"key":"e_1_3_2_2_23_1","volume-title":"Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems","author":"Ren Shaoqing","year":"2015","unstructured":"Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster r-cnn: Towards real-time object detection with region proposal networks. Advances in neural information processing systems, Vol. 28 (2015)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00075"},{"key":"e_1_3_2_2_25_1","volume-title":"Crowdhuman: A benchmark for detecting human in a crowd. arXiv preprint arXiv:1805.00123","author":"Shao Shuai","year":"2018","unstructured":"Shuai Shao, Zijian Zhao, Boxun Li, Tete Xiao, Gang Yu, Xiangyu Zhang, and Jian Sun. 2018. Crowdhuman: A benchmark for detecting human in a crowd. arXiv preprint arXiv:1805.00123 (2018)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.255"},{"key":"e_1_3_2_2_27_1","volume-title":"International Conference on Machine Learning. PMLR, 9934--9944","author":"Sun Peize","year":"2021","unstructured":"Peize Sun, Yi Jiang, Enze Xie, Wenqi Shao, Zehuan Yuan, Changhu Wang, and Ping Luo. 2021a. What makes for end-to-end object detection?. In International Conference on Machine Learning. PMLR, 9934--9944."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01422"},{"key":"e_1_3_2_2_29_1","volume-title":"Autopedestrian: an automatic data augmentation and loss function search scheme for pedestrian detection","author":"Tang Yi","year":"2021","unstructured":"Yi Tang, Baopu Li, Min Liu, Boyu Chen, Yaonan Wang, and Wanli Ouyang. 2021. Autopedestrian: an automatic data augmentation and loss function search scheme for pedestrian detection. IEEE transactions on image processing, Vol. 30 (2021), 8483--8496."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299143"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00972"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00811"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01344"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.3040854"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2967274"},{"key":"e_1_3_2_2_36_1","volume-title":"Tel Aviv","author":"Zeng Fangao","year":"2022","unstructured":"Fangao Zeng, Bin Dong, Yuang Zhang, Tiancai Wang, Xiangyu Zhang, and Yichen Wei. 2022. Motr: End-to-end multiple-object tracking with transformer. In Computer Vision-ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part XXVII. Springer, 659--675."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.474"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00978"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3041679"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3041679"},{"key":"e_1_3_2_2_41_1","volume-title":"KGSnet: key-point-guided super-resolution network for pedestrian detection in the wild","author":"Zhang Yongqiang","year":"2020","unstructured":"Yongqiang Zhang, Yancheng Bai, Mingli Ding, Shibiao Xu, and Bernard Ghanem. 2020a. KGSnet: key-point-guided super-resolution network for pedestrian detection in the wild. IEEE transactions on neural networks and learning systems, Vol. 32, 5 (2020), 2251--2265."},{"key":"e_1_3_2_2_42_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 11622--11631","author":"Lin Weiyao","year":"2021","unstructured":"Yuang Zhang, Huanyu He, Jianguo Li, Yuxi Li, John See, and Weiyao Lin. 2021. Variational pedestrian detection. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 11622--11631."},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00093"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2022.3142445"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2020.3019390"},{"key":"e_1_3_2_2_46_1","volume-title":"Deformable DETR: Deformable Transformers for End-to-End Object Detection. In International Conference on Learning Representations.","author":"Zhu Xizhou","year":"2020","unstructured":"Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, and Jifeng Dai. 2020. Deformable DETR: Deformable Transformers for End-to-End Object Detection. In International Conference on Learning Representations."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Ottawa ON Canada","acronym":"MM '23"},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612189","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612189","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:06:38Z","timestamp":1755821198000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612189"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":46,"alternative-id":["10.1145\/3581783.3612189","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612189","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}