{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T20:12:59Z","timestamp":1775592779187,"version":"3.50.1"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,2,28]],"date-time":"2023-02-28T00:00:00Z","timestamp":1677542400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,2,28]],"date-time":"2023-02-28T00:00:00Z","timestamp":1677542400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Performance Analysis and Optimal Design of Networked Intelligent Systems under Multiple Communication Constraints","award":["62173049"],"award-info":[{"award-number":["62173049"]}]},{"name":"Collaboration and Optimization of Hybrid Multi-Intelligent Systems Based on Learning Algorithms","award":["61772086"],"award-info":[{"award-number":["61772086"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Real-Time Image Proc"],"published-print":{"date-parts":[[2023,4]]},"DOI":"10.1007\/s11554-023-01287-7","type":"journal-article","created":{"date-parts":[[2023,2,28]],"date-time":"2023-02-28T18:13:01Z","timestamp":1677607981000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":21,"title":["Improved YOLOX for pedestrian detection in crowded scenes"],"prefix":"10.1007","volume":"20","author":[{"given":"Fei","family":"Gao","sequence":"first","affiliation":[]},{"given":"Changxin","family":"Cai","sequence":"additional","affiliation":[]},{"given":"Ruohui","family":"Jia","sequence":"additional","affiliation":[]},{"given":"Xinzhong","family":"Hu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,2,28]]},"reference":[{"key":"1287_CR1","unstructured":"Ba, J.L., Kiros, J.R., Hinton, G.E.: Layer normalization. arXiv preprint arXiv:1607.06450 (2016)"},{"key":"1287_CR2","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. arXiv preprint arXiv:1409.0473 (2014)"},{"key":"1287_CR3","unstructured":"Bochkovskiy, A., Wang, C.Y., Liao, H.Y.M.: Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934 (2020)"},{"key":"1287_CR4","doi-asserted-by":"crossref","unstructured":"Bodla, N., Singh, B., Chellappa, R., Davis, L.S.: Soft-nms\u2013improving object detection with one line of code. In: Proceedings of the IEEE international conference on computer vision, pp. 5561\u20135569 (2017)","DOI":"10.1109\/ICCV.2017.593"},{"key":"1287_CR5","unstructured":"Cao, H., Wang, Y., Chen, J., Jiang, D., Zhang, X., Tian, Q., Wang, M.: Swin-unet: Unet-like pure transformer for medical image segmentation. arXiv preprint arXiv:2105.05537 (2021)"},{"key":"1287_CR6","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: European conference on computer vision, pp. 213\u2013229. Springer (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1287_CR7","doi-asserted-by":"publisher","first-page":"19959","DOI":"10.1109\/ACCESS.2018.2815149","volume":"6","author":"J Chu","year":"2018","unstructured":"Chu, J., Guo, Z., Leng, L.: Object detection based on multi-layer convolution feature fusion and online hard example mining. IEEE Access 6, 19959\u201319967 (2018)","journal-title":"IEEE Access"},{"key":"1287_CR8","doi-asserted-by":"publisher","first-page":"114705","DOI":"10.1109\/ACCESS.2020.3003917","volume":"8","author":"J Chu","year":"2020","unstructured":"Chu, J., Zhang, Y., Li, S., Leng, L., Miao, J.: Syncretic-nms: A merging non-maximum suppression algorithm for instance segmentation. IEEE Access 8, 114705\u2013114714 (2020)","journal-title":"IEEE Access"},{"key":"1287_CR9","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: A large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition, pp. 248\u2013255. IEEE (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"8","key":"1287_CR10","doi-asserted-by":"publisher","first-page":"1532","DOI":"10.1109\/TPAMI.2014.2300479","volume":"36","author":"P Doll\u00e1r","year":"2014","unstructured":"Doll\u00e1r, P., Appel, R., Belongie, S., Perona, P.: Fast feature pyramids for object detection. IEEE Trans. Patt. Analy. Mach. Intell. 36(8), 1532\u20131545 (2014)","journal-title":"IEEE Trans. Patt. Analy. Mach. Intell."},{"issue":"1","key":"1287_CR11","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, S., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes challenge: A retrospective. Intern. J. Comput. Vision 111(1), 98\u2013136 (2015)","journal-title":"Intern. J. Comput. Vision"},{"key":"1287_CR12","unstructured":"Ge, Z., Liu, S., Wang, F., Li, Z., Sun, J.: Yolox: Exceeding yolo series in 2021. arXiv preprint arXiv:2107.08430 (2021)"},{"key":"1287_CR13","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp. 2961\u20132969 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"1287_CR14","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1287_CR15","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., He, K., Doll\u00e1r, P.: Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.324"},{"key":"1287_CR16","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., Zitnick, C.L.: Microsoft coco: Common objects in context. In: European conference on computer vision, pp. 740\u2013755. Springer (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1287_CR17","doi-asserted-by":"crossref","unstructured":"Liu, S., Huang, D., Wang, Y.: Adaptive nms: Refining pedestrian detection in a crowd. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 6459\u20136468 (2019)","DOI":"10.1109\/CVPR.2019.00662"},{"key":"1287_CR18","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.Y., Berg, A.C.: Ssd: Single shot multibox detector. In: European conference on computer vision, pp. 21\u201337. Springer (2016)","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1287_CR19","doi-asserted-by":"crossref","unstructured":"Liu, Z., Lin, Y., Cao, Y., Hu, H., Wei, Y., Zhang, Z., Lin, S., Guo, B.: Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021)","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1287_CR20","unstructured":"Nam, W., Doll\u00e1r, P., Han, J.H.: Local decorrelation for improved detection. arXiv preprint arXiv:1406.1134 (2014)"},{"key":"1287_CR21","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. In: Advances in neural information processing systems, vol. 28 (2015)"},{"key":"1287_CR22","doi-asserted-by":"crossref","unstructured":"Rezatofighi, H., Tsoi, N., Gwak, J., Sadeghian, A., Reid, I., Savarese, S.: Generalized intersection over union: A metric and a loss for bounding box regression. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 658\u2013666 (2019)","DOI":"10.1109\/CVPR.2019.00075"},{"key":"1287_CR23","first-page":"344","volume-title":"Joint IAPR International Workshops on Statistical Techniques in Pattern Recognition (SPR) and Structural and Syntactic Pattern Recognition (SSPR)","author":"D Rukhovich","year":"2021","unstructured":"Rukhovich, D., Sofiiuk, K., Galeev, D., Barinova, O., Konushin, A.: Iterdet: iterative scheme for object detection in crowded environments. In: Joint IAPR International Workshops on Statistical Techniques in Pattern Recognition (SPR) and Structural and Syntactic Pattern Recognition (SSPR), pp. 344\u2013354. Springer, Germany (2021)"},{"key":"1287_CR24","unstructured":"Shao, S., Zhao, Z., Li, B., Xiao, T., Yu, G., Zhang, X., Sun, J.: Crowdhuman: A benchmark for detecting human in a crowd. arXiv preprint arXiv:1805.00123 (2018)"},{"issue":"4","key":"1287_CR25","first-page":"1922","volume":"44","author":"Z Tian","year":"2020","unstructured":"Tian, Z., Shen, C., Chen, H., He, T.: Fcos:a simple and strong anchor-free object detector. IEEE Trans. Patt. Analy. Mach. Intell. 44(4), 1922 (2020)","journal-title":"IEEE Trans. Patt. Analy. Mach. Intell."},{"key":"1287_CR26","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. In: Advances in neural information processing systems, vol. 30 (2017)"},{"key":"1287_CR27","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 7794\u20137803 (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"1287_CR28","doi-asserted-by":"crossref","unstructured":"Wang, X., Xiao, T., Jiang, Y., Shao, S., Sun, J., Shen, C.: Repulsion loss: Detecting pedestrians in a crowd. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7774\u20137783 (2018)","DOI":"10.1109\/CVPR.2018.00811"},{"key":"1287_CR29","unstructured":"Wu, B., Xu, C., Dai, X., Wan, A., Zhang, P., Yan, Z., Tomizuka, M., Gonzalez, J., Keutzer, K., Vajda, P.: Visual transformers: Token-based image representation and processing for computer vision. arXiv preprint arXiv:2006.03677 (2020)"},{"key":"1287_CR30","doi-asserted-by":"crossref","unstructured":"Yang, B., Yan, J., Lei, Z., Li, S.Z.: Convolutional channel features. In: Proceedings of the IEEE international conference on computer vision, pp. 82\u201390 (2015)","DOI":"10.1109\/ICCV.2015.18"},{"key":"1287_CR31","doi-asserted-by":"crossref","unstructured":"Yu, J., Jiang, Y., Wang, Z., Cao, Z., Huang, T.: Unitbox: An advanced object detection network. In: Proceedings of the 24th ACM international conference on Multimedia, pp. 516\u2013520 (2016)","DOI":"10.1145\/2964284.2967274"},{"key":"1287_CR32","doi-asserted-by":"crossref","unstructured":"Zhang, L., Lin, L., Liang, X., He, K.: Is faster r-cnn doing well for pedestrian detection? In: European conference on computer vision, pp. 443\u2013457. Springer (2016)","DOI":"10.1007\/978-3-319-46475-6_28"},{"key":"1287_CR33","doi-asserted-by":"crossref","unstructured":"Zhang, S., Wen, L., Bian, X., Lei, Z., Li, S.Z.: Occlusion-aware r-cnn: detecting pedestrians in a crowd. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 637\u2013653 (2018)","DOI":"10.1007\/978-3-030-01219-9_39"},{"issue":"2","key":"1287_CR34","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1109\/TMM.2019.2929005","volume":"22","author":"S Zhang","year":"2019","unstructured":"Zhang, S., Xie, Y., Wan, J., Xia, H., Li, S.Z., Guo, G.: Widerperson: a diverse dataset for dense pedestrian detection in the wild. IEEE Trans. Multimed. 22(2), 380\u2013393 (2019)","journal-title":"IEEE Trans. Multimed."},{"issue":"4","key":"1287_CR35","doi-asserted-by":"publisher","first-page":"1010","DOI":"10.3390\/s20041010","volume":"20","author":"Y Zhang","year":"2020","unstructured":"Zhang, Y., Chu, J., Leng, L., Miao, J.: Mask-refined r-cnn: a network for refining object details in instance segmentation. Sensors 20(4), 1010 (2020)","journal-title":"Sensors"},{"key":"1287_CR36","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Wang, P., Liu, W., Li, J., Ye, R., Ren, D.: Distance-iou loss: Faster and better learning for bounding box regression. In: Proceedings of the AAAI conference on artificial intelligence, vol.\u00a034, pp. 12993\u201313000 (2020)","DOI":"10.1609\/aaai.v34i07.6999"},{"key":"1287_CR37","doi-asserted-by":"crossref","unstructured":"Zhou, C., Yuan, J.: Multi-label learning of part detectors for heavily occluded pedestrian detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3486\u20133495 (2017)","DOI":"10.1109\/ICCV.2017.377"}],"container-title":["Journal of Real-Time Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-023-01287-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11554-023-01287-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-023-01287-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,4,13]],"date-time":"2023-04-13T19:18:44Z","timestamp":1681413524000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11554-023-01287-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,28]]},"references-count":37,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2023,4]]}},"alternative-id":["1287"],"URL":"https:\/\/doi.org\/10.1007\/s11554-023-01287-7","relation":{},"ISSN":["1861-8200","1861-8219"],"issn-type":[{"value":"1861-8200","type":"print"},{"value":"1861-8219","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,2,28]]},"assertion":[{"value":"25 September 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 February 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 February 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"24"}}