{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T01:25:57Z","timestamp":1776216357426,"version":"3.50.1"},"reference-count":31,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2023,12,17]],"date-time":"2023-12-17T00:00:00Z","timestamp":1702771200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,17]],"date-time":"2023-12-17T00:00:00Z","timestamp":1702771200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"the National Natural Science Foundation of China","doi-asserted-by":"crossref","award":["62272049,62236006,61972375,62172045"],"award-info":[{"award-number":["62272049,62236006,61972375,62172045"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"name":"the Key Project of Beijing Municipal Commission of Education","award":["KZ201911417048"],"award-info":[{"award-number":["KZ201911417048"]}]},{"name":"the Major Project of Technological Innovation 2030 \u2013 \u201cNew Generation Artificial Intelligence\u201d","award":["2018AAA0100800"],"award-info":[{"award-number":["2018AAA0100800"]}]},{"name":"Premium Funding Project for Academic Human Resources Development in Beijing Union University","award":["BPHR2020AZ01,BPH2020EZ01"],"award-info":[{"award-number":["BPHR2020AZ01,BPH2020EZ01"]}]},{"name":"the Academic Research Projects of Beijing Union University","award":["ZKZD202301"],"award-info":[{"award-number":["ZKZD202301"]}]},{"name":"the Science and Technology Project of Beijing Municipal Commission of Education","award":["KM202111417009,KM201811417005"],"award-info":[{"award-number":["KM202111417009,KM201811417005"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["SIViP"],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1007\/s11760-023-02896-2","type":"journal-article","created":{"date-parts":[[2023,12,17]],"date-time":"2023-12-17T09:02:07Z","timestamp":1702803727000},"page":"2243-2253","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["IDPD: improved deformable-DETR for crowd pedestrian detection"],"prefix":"10.1007","volume":"18","author":[{"given":"Wenjing","family":"Han","sequence":"first","affiliation":[]},{"given":"Ning","family":"He","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Fengxi","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Shengjie","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,17]]},"reference":[{"key":"2896_CR1","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229 (2020). Springer","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"2896_CR2","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable detr: Deformable transformers for end-to-end object detection. arXiv preprint arXiv:2010.04159 (2020)"},{"key":"2896_CR3","doi-asserted-by":"crossref","unstructured":"Jia, D., Yuan, Y., He, H., Wu, X., Yu, H., Lin, W., Sun, L., Zhang, C., Hu, H.: Detrs with hybrid matching. arXiv preprint arXiv:2207.13080 (2022)","DOI":"10.1109\/CVPR52729.2023.01887"},{"key":"2896_CR4","doi-asserted-by":"crossref","unstructured":"Li, F., Zhang, H., Liu, S., Guo, J., Ni, L.M., Zhang, L.: Dn-detr: Accelerate detr training by introducing query denoising. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13619\u201313627 (2022)","DOI":"10.1109\/CVPR52688.2022.01325"},{"key":"2896_CR5","unstructured":"Zhang, H., Li, F., Liu, S., Zhang, L., Su, H., Zhu, J., Ni, L.M., Shum, H.-Y.: Dino: Detr with improved denoising anchor boxes for end-to-end object detection. arXiv preprint arXiv:2203.03605 (2022)"},{"key":"2896_CR6","unstructured":"Lin, M., Li, C., Bu, X., Sun, M., Lin, C., Yan, J., Ouyang, W., Deng, Z.: Detr for crowd pedestrian detection. arXiv preprint arXiv:2012.06785 (2020)"},{"key":"2896_CR7","doi-asserted-by":"crossref","unstructured":"Zheng, A., Zhang, Y., Zhang, X., Qi, X., Sun, J.: Progressive end-to-end object detection in crowded scenes. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 857\u2013866 (2022)","DOI":"10.1109\/CVPR52688.2022.00093"},{"key":"2896_CR8","doi-asserted-by":"crossref","unstructured":"Chu, X., Zheng, A., Zhang, X., Sun, J.: Detection in crowded scenes: One proposal, multiple predictions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12214\u201312223 (2020)","DOI":"10.1109\/CVPR42600.2020.01223"},{"key":"2896_CR9","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Doll\u00e1r, P., Girshick, R., He, K., Hariharan, B., Belongie, S.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2117\u20132125 (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"2896_CR10","unstructured":"Li, C., Zhou, A., Yao, A.: Omni-dimensional dynamic convolution. arXiv preprint arXiv:2209.07947 (2022)"},{"key":"2896_CR11","unstructured":"Shao, S., Zhao, Z., Li, B., Xiao, T., Yu, G., Zhang, X., Sun, J.: Crowdhuman: A benchmark for detecting human in a crowd. arXiv preprint arXiv:1805.00123 (2018)"},{"key":"2896_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"2896_CR13","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2896_CR14","doi-asserted-by":"crossref","unstructured":"Zhang, S., Benenson, R., Schiele, B.: Citypersons: A diverse dataset for pedestrian detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3213\u20133221 (2017)","DOI":"10.1109\/CVPR.2017.474"},{"key":"2896_CR15","first-page":"845","volume":"28","author":"S Ren","year":"2015","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: Towards real-time object detection with region proposal networks. Adv. Neural Inform. Process. Syst. 28, 845 (2015)","journal-title":"Adv. Neural Inform. Process. Syst."},{"key":"2896_CR16","doi-asserted-by":"crossref","unstructured":"Zhou, C., Yuan, J.: Bi-box regression for pedestrian detection and occlusion estimation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 135\u2013151 (2018)","DOI":"10.1007\/978-3-030-01246-5_9"},{"key":"2896_CR17","unstructured":"Zhang, K., Xiong, F., Sun, P., Hu, L., Li, B., Yu, G.: Double anchor r-cnn for human detection in a crowd. arXiv preprint arXiv:1909.09998 (2019)"},{"key":"2896_CR18","doi-asserted-by":"crossref","unstructured":"Chi, C., Zhang, S., Xing, J., Lei, Z., Li, S.Z., Zou, X.: Relational learning for joint head and human detection. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, pp. 10647\u201310654 (2020)","DOI":"10.1609\/aaai.v34i07.6691"},{"key":"2896_CR19","doi-asserted-by":"crossref","unstructured":"Liu, S., Huang, D., Wang, Y.: Adaptive nms: Refining pedestrian detection in a crowd. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6459\u20136468 (2019)","DOI":"10.1109\/CVPR.2019.00662"},{"key":"2896_CR20","doi-asserted-by":"crossref","unstructured":"Zhou, P., Zhou, C., Peng, P., Du, J., Sun, X., Guo, X., Huang, F.: Noh-nms: Improving pedestrian detection by nearby objects hallucination. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 1967\u20131975 (2020)","DOI":"10.1145\/3394171.3413617"},{"key":"2896_CR21","doi-asserted-by":"crossref","unstructured":"Wang, X., Xiao, T., Jiang, Y., Shao, S., Sun, J., Shen, C.: Repulsion loss: Detecting pedestrians in a crowd. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7774\u20137783 (2018)","DOI":"10.1109\/CVPR.2018.00811"},{"key":"2896_CR22","doi-asserted-by":"crossref","unstructured":"Zhang, S., Wen, L., Bian, X., Lei, Z., Li, S.Z.: Occlusion-aware r-cnn: detecting pedestrians in a crowd. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 637\u2013653 (2018)","DOI":"10.1007\/978-3-030-01219-9_39"},{"key":"2896_CR23","doi-asserted-by":"crossref","unstructured":"Meng, D., Chen, X., Fan, Z., Zeng, G., Li, H., Yuan, Y., Sun, L., Wang, J.: Conditional detr for fast training convergence. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 3651\u20133660 (2021)","DOI":"10.1109\/ICCV48922.2021.00363"},{"key":"2896_CR24","doi-asserted-by":"crossref","unstructured":"Wang, Y., Zhang, X., Yang, T., Sun, J.: Anchor detr: Query design for transformer-based detector. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36, pp. 2567\u20132575 (2022)","DOI":"10.1609\/aaai.v36i3.20158"},{"key":"2896_CR25","unstructured":"Liu, S., Li, F., Zhang, H., Yang, X., Qi, X., Su, H., Zhu, J., Zhang, L.: Dab-detr: Dynamic anchor boxes are better queries for detr. arXiv preprint arXiv:2201.12329 (2022)"},{"key":"2896_CR26","doi-asserted-by":"crossref","unstructured":"Ge, Z., Jie, Z., Huang, X., Xu, R., Yoshie, O.: Ps-rcnn: Detecting secondary human instances in a crowd via primary object suppression. In: 2020 IEEE International Conference on Multimedia and Expo (ICME), pp. 1\u20136 (2020). IEEE","DOI":"10.1109\/ICME46284.2020.9102793"},{"key":"2896_CR27","doi-asserted-by":"crossref","unstructured":"Rukhovich, D., Sofiiuk, K., Galeev, D., Barinova, O., Konushin, A.: Iterdet: iterative scheme for object detection in crowded environments. In: Joint IAPR International Workshops on Statistical Techniques in Pattern Recognition (SPR) and Structural and Syntactic Pattern Recognition (SSPR), pp. 344\u2013354 (2021). Springer","DOI":"10.1007\/978-3-030-73973-7_33"},{"key":"2896_CR28","unstructured":"Xu, Z., Li, B., Yuan, Y., Dang, A.: Beta R-CNN: Looking into pedestrian detection from another perspective. Adv. Neural. Inf. Process. Syst. 33, 19953\u201319963 (2020)"},{"key":"2896_CR29","doi-asserted-by":"crossref","unstructured":"Zhang, S., Wang, X., Wang, J., Pang, J., Lyu, C., Zhang, W., Luo, P., Chen, K.: Dense distinct query for end-to-end object detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7329\u20137338 (2023)","DOI":"10.1109\/CVPR52729.2023.00708"},{"key":"2896_CR30","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-cam: Visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 618\u2013626 (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"2896_CR31","doi-asserted-by":"crossref","unstructured":"Liu, W., Liao, S., Hu, W., Liang, X., Chen, X.: Learning efficient single-stage pedestrian detectors by asymptotic localization fitting. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 618\u2013634 (2018)","DOI":"10.1007\/978-3-030-01264-9_38"}],"container-title":["Signal, Image and Video Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-023-02896-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11760-023-02896-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11760-023-02896-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,19]],"date-time":"2024-03-19T20:14:00Z","timestamp":1710879240000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11760-023-02896-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,17]]},"references-count":31,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2024,4]]}},"alternative-id":["2896"],"URL":"https:\/\/doi.org\/10.1007\/s11760-023-02896-2","relation":{},"ISSN":["1863-1703","1863-1711"],"issn-type":[{"value":"1863-1703","type":"print"},{"value":"1863-1711","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,12,17]]},"assertion":[{"value":"26 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 October 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 November 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 December 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}}]}}