{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T22:20:35Z","timestamp":1773526835481,"version":"3.50.1"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2024,6,28]],"date-time":"2024-06-28T00:00:00Z","timestamp":1719532800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,6,28]],"date-time":"2024-06-28T00:00:00Z","timestamp":1719532800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Key Problems Research on Action Atom Modeling and Deep Interactive Relation Deduction for Complex Group Activities","award":["61672305"],"award-info":[{"award-number":["61672305"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Pattern Anal Applic"],"published-print":{"date-parts":[[2024,9]]},"DOI":"10.1007\/s10044-024-01290-z","type":"journal-article","created":{"date-parts":[[2024,6,28]],"date-time":"2024-06-28T08:02:55Z","timestamp":1719561775000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Fine grained dual level attention mechanisms with spacial context information fusion for object detection"],"prefix":"10.1007","volume":"27","author":[{"given":"Haigang","family":"Deng","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4799-3831","authenticated-orcid":false,"given":"Chuanxu","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chengwei","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhang","family":"Hao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,6,28]]},"reference":[{"issue":"7","key":"1290_CR1","doi-asserted-by":"publisher","first-page":"2542","DOI":"10.3390\/s22072542","volume":"22","author":"Z Wei","year":"2022","unstructured":"Wei Z, Zhang F, Chang S et al (2022) Mmwave radar and vision fusion for object detection in autonomous driving: a review[J]. Sensors 22(7):2542","journal-title":"Sensors"},{"key":"1290_CR2","doi-asserted-by":"publisher","unstructured":"Hu J, Zhang C, Xu S, Chen C (2021) An Invasive Target Detection and Localization Strategy Using Pan-Tilt-Zoom Cameras for Security Applications. In 2021 IEEE International Conference on Real-time Computing and Robotics (RCAR) 1236\u20131241. IEEE. https:\/\/doi.org\/10.1109\/RCAR52367.2021.9517521","DOI":"10.1109\/RCAR52367.2021.9517521"},{"key":"1290_CR3","doi-asserted-by":"crossref","unstructured":"Kumar K, Shrimankar DD, Singh N (2018) SOMES: an efficient SOM technique for event summarization in multi-view surveillance videos[C]\/\/Recent Findings in Intelligent Computing Techniques: Proceedings of the 5th ICACNI 2017, Volume 3. Springer Singapore, : 383\u2013389","DOI":"10.1007\/978-981-10-8633-5_38"},{"key":"1290_CR4","doi-asserted-by":"crossref","unstructured":"Negi A, Kumar K, Saini P (2023) Object of interest and unsupervised learning-based Framework for an effective video summarization using deep Learning[J]. IETE J Res, : 1\u201312","DOI":"10.1080\/03772063.2023.2220693"},{"key":"1290_CR5","doi-asserted-by":"crossref","unstructured":"Negi A, Kumar K, Saini P et al (2022) Object detection based approach for an efficient video summarization with system statistics over cloud[C]\/\/2022 IEEE 9th Uttar Pradesh Section International Conference on Electrical, Electronics and Computer Engineering (UPCON). IEEE, : 1\u20136","DOI":"10.1109\/UPCON56432.2022.9986376"},{"issue":"2","key":"1290_CR6","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1109\/TMM.2017.2741423","volume":"20","author":"K Kumar","year":"2017","unstructured":"Kumar K, Shrimankar DD (2017) F-DES: fast and deep event summarization[J]. IEEE Trans Multimedia 20(2):323\u2013334","journal-title":"IEEE Trans Multimedia"},{"key":"1290_CR7","doi-asserted-by":"publisher","unstructured":"Dai J, Qi H, Xiong Y, Li Y, Zhang G, Hu H, Wei Y (2017) Deformable convolutional networks. In Proceedings of the IEEE international conference on computer vision 764\u2013773. https:\/\/doi.org\/10.1109\/ICCV.2017.89","DOI":"10.1109\/ICCV.2017.89"},{"key":"1290_CR8","doi-asserted-by":"publisher","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers.Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part I 16. Springer International Publishing 213\u2013229. https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"1290_CR9","doi-asserted-by":"publisher","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. Proceedings of the IEEE conference on computer vision and pattern recognition 7132\u20137141. https:\/\/doi.org\/10.1109\/TPAMI.2019.2913372","DOI":"10.1109\/TPAMI.2019.2913372"},{"key":"1290_CR10","doi-asserted-by":"publisher","first-page":"792","DOI":"10.1109\/ICCV48922.2021.00082","volume":"783","author":"Z Qin","year":"2021","unstructured":"Qin Z, Zhang P, Wu F, Li X (2021) Fcanet: frequency channel attention networks. Proc IEEE\/CVF Int Conf Comput Vis 783:792. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00082","journal-title":"Proc IEEE\/CVF Int Conf Comput Vis"},{"key":"1290_CR11","doi-asserted-by":"publisher","unstructured":"Yang Z, Zhu L, Wu Y, Yang Y (2020) Gated channel transformation for visual recognition. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 11794\u201311803. https:\/\/doi.org\/10.1109\/CVPR42600.2020.01181","DOI":"10.1109\/CVPR42600.2020.01181"},{"key":"1290_CR12","doi-asserted-by":"publisher","unstructured":"Fu J, Liu J, Tian H, Li Y, Bao Y, Fang Z, Lu H (2019) Dual attention network for scene segmentation. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 3146\u20133154. https:\/\/doi.org\/10.1109\/CVPR.2019.00326","DOI":"10.1109\/CVPR.2019.00326"},{"key":"1290_CR13","doi-asserted-by":"publisher","unstructured":"Woo S, Park J, Lee JY, Kweon IS (2018) Cbam: Convolutional block attention module. In Proceedings of the European conference on computer vision (ECCV) 3\u201319. https:\/\/doi.org\/10.1007\/978-3-030-01234-2_1","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"1290_CR14","doi-asserted-by":"publisher","unstructured":"Park J, Woo S, Lee JY, Kweon IS (2018) Bam: Bottleneck attention module. arXiv preprint arXiv:1807.06514. https:\/\/doi.org\/10.48550\/arXiv.1807.06514","DOI":"10.48550\/arXiv.1807.06514"},{"key":"1290_CR15","doi-asserted-by":"publisher","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In Proceedings of the IEEE conference on computer vision and pattern recognition 580\u2013587. https:\/\/doi.org\/10.1109\/CVPR.2014.81","DOI":"10.1109\/CVPR.2014.81"},{"key":"1290_CR16","doi-asserted-by":"publisher","unstructured":"Girshick R (2015) Fast r-cnn. In Proceedings of the IEEE international conference on computer vision 1440\u20131448. https:\/\/doi.org\/10.1109\/ICCV.2015.169","DOI":"10.1109\/ICCV.2015.169"},{"key":"1290_CR17","doi-asserted-by":"publisher","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: towards real-time object detection with region proposal networks. Adv Neural Inf Process Syst 28. https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"1290_CR18","doi-asserted-by":"publisher","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: Unified, real-time object detection. In Proceedings of the IEEE conference on computer vision and pattern recognition 779\u2013788. https:\/\/doi.org\/10.1109\/CVPR.2016.91","DOI":"10.1109\/CVPR.2016.91"},{"key":"1290_CR19","doi-asserted-by":"publisher","unstructured":"Redmon J, Farhadi A (2017) YOLO9000: better, faster, stronger. In Proceedings of the IEEE conference on computer vision and pattern recognition 7263\u20137271. https:\/\/doi.org\/10.1109\/CVPR.2017.690","DOI":"10.1109\/CVPR.2017.690"},{"key":"1290_CR20","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1804.02767","author":"J Redmon","year":"2018","unstructured":"Redmon J, Farhadi A (2018) Yolov3: an incremental improvement. arXiv Preprint. https:\/\/doi.org\/10.48550\/arXiv.1804.02767. arXiv:1804.02767","journal-title":"arXiv Preprint"},{"key":"1290_CR21","doi-asserted-by":"publisher","unstructured":"Bochkovskiy A, Wang CY, Liao HYM (2020) Yolov4: Optimal speed and accuracy of object detection. arXiv preprint arXiv:2004.10934. https:\/\/doi.org\/10.48550\/arXiv.2004.10934","DOI":"10.48550\/arXiv.2004.10934"},{"key":"1290_CR22","unstructured":"Jocher G et al (2021) YOLOV5, https:\/\/github.com\/ultralytics\/yolov5"},{"key":"1290_CR23","unstructured":"Li C, Li L, Jiang H et al YOLOv6: a single-stage object detection framework for industrial applications[J]. arXiv preprint arXiv:2209.02976, 2022."},{"key":"1290_CR24","doi-asserted-by":"crossref","unstructured":"Wang CY, Bochkovskiy A, Liao HYM (2023) YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors[C]\/\/Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. : 7464\u20137475","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"1290_CR25","unstructured":": YOLO V8, Jocher G, Chaurasia A, Qiu J (2023) YOLO by Ultralytics. https:\/\/github.com\/ultralytics\/ultralytics, Accessed: February 30, 2023"},{"key":"1290_CR26","doi-asserted-by":"publisher","unstructured":"Hu J, Shen L, Albanie S, Sun G, Vedaldi A (2018) Gather-excite: exploiting feature context in convolutional neural networks. Adv Neural Inf Process Syst 31. https:\/\/doi.org\/10.48550\/arXiv.1810.12348","DOI":"10.48550\/arXiv.1810.12348"},{"key":"1290_CR27","doi-asserted-by":"publisher","unstructured":"Lee H, Kim HE, Nam H (2019) Srm: A style-based recalibration module for convolutional neural networks. In Proceedings of the IEEE\/CVF International conference on computer vision 1854\u20131862. https:\/\/doi.org\/10.1109\/ICCV.2019.00194","DOI":"10.1109\/ICCV.2019.00194"},{"key":"1290_CR28","doi-asserted-by":"publisher","unstructured":"Wang Q, Wu B, Zhu P, Li P, Zuo W, Hu Q (2020) ECA-Net: Efficient channel attention for deep convolutional neural networks. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 11534\u201311542. https:\/\/doi.org\/10.1109\/CVPR42600.2020.01155","DOI":"10.1109\/CVPR42600.2020.01155"},{"key":"1290_CR29","doi-asserted-by":"publisher","unstructured":"Hou Q, Zhou D, Feng J (2021) Coordinate attention for efficient mobile network design. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition 13713\u201313722. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01350","DOI":"10.1109\/CVPR46437.2021.01350"},{"key":"1290_CR30","doi-asserted-by":"crossref","unstructured":"Kumar A, Singh N, Kumar P et al (2017) A novel superpixel based color spatial feature for salient object detection[C]\/\/2017 conference on information and communication technology (CICT). IEEE, : 1\u20135","DOI":"10.1109\/INFOCOMTECH.2017.8340630"},{"key":"1290_CR31","doi-asserted-by":"publisher","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu CY, Berg AC (2016) Ssd: Single shot multibox detector. In Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings Part I 14: 21\u201337. Springer International Publishing. https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1290_CR32","doi-asserted-by":"publisher","unstructured":"Lim JS, Astrid M, Yoon HJ, Lee SI (2021) Small object detection using context and attention. In 2021 International Conference on Artificial Intelligence in Information and Communication (ICAIIC) 181\u2013186. IEEE. https:\/\/doi.org\/10.1109\/ICAIIC51459.2021.9415217","DOI":"10.1109\/ICAIIC51459.2021.9415217"},{"issue":"7","key":"1290_CR33","doi-asserted-by":"publisher","first-page":"3423","DOI":"10.1109\/TIP.2019.2896952","volume":"28","author":"Y Yuan","year":"2019","unstructured":"Yuan Y, Xiong Z, Wang Q (2019) VSSA-NET: Vertical spatial sequence attention network for traffic sign detection. IEEE Trans Image Process 28(7):3423\u20133434. https:\/\/doi.org\/10.1109\/TIP.2019.2896952","journal-title":"IEEE Trans Image Process"},{"issue":"8","key":"1290_CR34","doi-asserted-by":"publisher","first-page":"4716","DOI":"10.1109\/TSMC.2019.2945053","volume":"51","author":"Y Liu","year":"2019","unstructured":"Liu Y, Cao S, Lasang P, Shen S (2019) Modular lightweight network for road object detection using a feature fusion approach. IEEE Trans Syst Man Cybernetics: Syst 51(8):4716\u20134728. https:\/\/doi.org\/10.1109\/TSMC.2019.2945053","journal-title":"IEEE Trans Syst Man Cybernetics: Syst"},{"issue":"2","key":"1290_CR35","doi-asserted-by":"publisher","first-page":"579","DOI":"10.1109\/TPAMI.2019.2933510","volume":"44","author":"J Han","year":"2019","unstructured":"Han J, Yao X, Cheng G, Feng X, Xu D (2019) P-CNN: part-based convolutional neural networks for fine-grained visual categorization. IEEE Trans Pattern Anal Mach Intell 44(2):579\u2013590. https:\/\/doi.org\/10.1109\/TPAMI.2019.2933510","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1290_CR36","doi-asserted-by":"publisher","unstructured":"Xu S, Wang X, Lv W, Chang Q, Cui C, Deng K, Lai B (2022) PP-YOLOE: an evolved version of YOLO. https:\/\/doi.org\/10.48550\/arXiv.2203.16250. arXiv preprint arXiv:2203.16250","DOI":"10.48550\/arXiv.2203.16250"},{"key":"1290_CR37","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2107.08430","author":"Z Ge","year":"2021","unstructured":"Ge Z, Liu S, Wang F, Li Z, Sun J (2021) Yolox: Exceeding Yolo series in 2021. arXiv Preprint. https:\/\/doi.org\/10.48550\/arXiv.2107.08430. arXiv:2107.08430","journal-title":"arXiv Preprint"},{"key":"1290_CR38","unstructured":"Mingxing Tan R, Pang, Quoc VL (2020) EfficientDet: Scalable and efficient object detection. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pages 10781\u201310790, 2, 7"},{"key":"1290_CR39","first-page":"7","volume":"5","author":"T Liang","year":"2022","unstructured":"Liang T, Chu X, Liu Y, Wang Y, Tang Z, Chu W, Chen J, Haibin Ling (2022) CBNet: a composite backbone network architecture for object detection. IEEE Trans Image Process (TIP) 5:7","journal-title":"IEEE Trans Image Process (TIP)"},{"key":"1290_CR40","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition 770\u2013778. https:\/\/doi.org\/10.48550\/arXiv.1512.03385","DOI":"10.48550\/arXiv.1512.03385"},{"key":"1290_CR41","doi-asserted-by":"publisher","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen L, C (2018) Mobilenetv2: Inverted residuals and linear bottlenecks. In Proceedings of the IEEE conference on computer vision and pattern recognition 4510\u20134520. https:\/\/doi.org\/10.1109\/CVPR.2018.00474","DOI":"10.1109\/CVPR.2018.00474"},{"key":"1290_CR42","unstructured":"Alexey Dosovitskiy L, Beyer A, Kolesnikov D, Weissenborn X, Zhai T, Unterthiner M, Dehghani M, Minderer G, Heigold S, Gelly et al (2021) An image is worth 16x16 words: Transformers for image recognition at scale. International Conference on Learning Representations"},{"key":"1290_CR43","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der Maaten L, Weinberger KQ (2017) Densely connected convolutional networks, in Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"key":"1290_CR44","doi-asserted-by":"crossref","unstructured":"Hou Q, Jiang Z, Yuan L, Cheng M-M (2022) Shuicheng Yan, and Jiashi Feng. Vision permutator: a permutable mlp-like architecture for visual recognition. IEEE Trans Pattern Anal Mach Intell","DOI":"10.1109\/TPAMI.2022.3145427"},{"key":"1290_CR45","doi-asserted-by":"crossref","unstructured":"Real E, Aggarwal A, Huang Y, Le QV (2018) Regularized evolution for image classifier architecture search, in AAAI","DOI":"10.1609\/aaai.v33i01.33014780"},{"key":"1290_CR46","unstructured":"Lv T, Bai C, Wang C, Mdmlp (2022) Image classification from scratch on small datasets with mlp[J]. arXiv preprint arXiv:2205.14477"},{"key":"1290_CR47","doi-asserted-by":"publisher","unstructured":"Selvaraju RR, Cogswell M, Das A, Vedantam R, Parikh D, Batra D (2017) Grad-cam: Visual explanations from deep networks via gradient-based localization. In Proceedings of the IEEE international conference on computer vision 618\u2013626. https:\/\/doi.org\/10.1109\/ICCV.2017.74","DOI":"10.1109\/ICCV.2017.74"},{"key":"1290_CR48","unstructured":"Yuan Y, Huang L, Guo J et al (2018) Ocnet: object context network for scene parsing[J]. arXiv preprint arXiv:1809.00916"},{"key":"1290_CR49","first-page":"36440","volume":"35","author":"Q Zhang","year":"2022","unstructured":"Zhang Q, Yang YB (2022) Rest v2: simpler, faster and stronger [J]. Adv Neural Inf Process Syst 35:36440\u201336452","journal-title":"Adv Neural Inf Process Syst"}],"container-title":["Pattern Analysis and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10044-024-01290-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10044-024-01290-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10044-024-01290-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,12]],"date-time":"2024-09-12T18:08:59Z","timestamp":1726164539000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10044-024-01290-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,28]]},"references-count":49,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2024,9]]}},"alternative-id":["1290"],"URL":"https:\/\/doi.org\/10.1007\/s10044-024-01290-z","relation":{},"ISSN":["1433-7541","1433-755X"],"issn-type":[{"value":"1433-7541","type":"print"},{"value":"1433-755X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6,28]]},"assertion":[{"value":"12 April 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 June 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 June 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"75"}}