{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,28]],"date-time":"2025-06-28T06:23:52Z","timestamp":1751091832752,"version":"3.37.3"},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"15","license":[{"start":{"date-parts":[[2020,9,24]],"date-time":"2020-09-24T00:00:00Z","timestamp":1600905600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,9,24]],"date-time":"2020-09-24T00:00:00Z","timestamp":1600905600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Yuyou Talent Support Plan of North China University of Technology","award":["107051360019XN132\/017"],"award-info":[{"award-number":["107051360019XN132\/017"]}]},{"name":"Fundamental Research Funds for Beijing Universities","award":["110052971803\/037"],"award-info":[{"award-number":["110052971803\/037"]}]},{"name":"Special Research Foundation of North China University of Technology","award":["PXM2017_014212_000014"],"award-info":[{"award-number":["PXM2017_014212_000014"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1007\/s11042-020-09827-0","type":"journal-article","created":{"date-parts":[[2020,9,24]],"date-time":"2020-09-24T03:45:12Z","timestamp":1600919112000},"page":"23275-23295","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Video object detection algorithm based on dynamic combination of sparse feature propagation and dense feature aggregation"],"prefix":"10.1007","volume":"80","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9779-9466","authenticated-orcid":false,"given":"Danyang","family":"Cao","sequence":"first","affiliation":[]},{"given":"Jinfeng","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Zhixin","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,9,24]]},"reference":[{"doi-asserted-by":"publisher","unstructured":"Bertasius, G, Torresani, L and Shi, J (2018). Object detection in video with spatiotemporal sampling networks. European conference on computer vision (pp. 342-357). https:\/\/doi.org\/10.1007\/978-3-030-01258-8_21","key":"9827_CR1","DOI":"10.1007\/978-3-030-01258-8_21"},{"doi-asserted-by":"publisher","unstructured":"Bhandari B, Alsadoon A, Prasad PWC, Abdullah S, Haddad S (2020) Deep learning neural network for texture feature extraction in oral cancer: enhanced loss function. Multimed Tools Appl. https:\/\/doi.org\/10.1007\/s11042-020-09384-6","key":"9827_CR2","DOI":"10.1007\/s11042-020-09384-6"},{"doi-asserted-by":"publisher","unstructured":"Brazil, G and Liu, X (2019). M3d-rpn: monocular 3d region proposal network for object detection. In proceedings of the IEEE international conference on computer vision (pp. 9287-9296). https:\/\/doi.org\/10.1109\/ICCV.2019.00938","key":"9827_CR3","DOI":"10.1109\/ICCV.2019.00938"},{"unstructured":"Dai, J, Li, Y, He, K and Sun, J (2016). R-FCN: object detection via region-based fully convolutional networks. arXiv: computer vision and pattern recognition","key":"9827_CR4"},{"doi-asserted-by":"publisher","unstructured":"Dosovitskiy, A, Fischery, P, Ilg, E, Hausser, P, Hazirbas, C, Golkov, V, ... and Brox, T (2015). FlowNet: Learning Optical Flow with Convolutional Networks. international conference on computer vision (pp. 2758\u20132766). https:\/\/doi.org\/10.1109\/ICCV.2015.316","key":"9827_CR5","DOI":"10.1109\/ICCV.2015.316"},{"doi-asserted-by":"publisher","unstructured":"Fattal, A, Karg, M, Scharfenberger, C and Adamy, J (2017). Saliency-guided region proposal network for CNN based object detection. International conference on intelligent transportation systems (pp 1-7). https:\/\/doi.org\/10.1109\/itsc.2017.8317756","key":"9827_CR6","DOI":"10.1109\/itsc.2017.8317756"},{"doi-asserted-by":"publisher","unstructured":"Feichtenhofer, C, Pinz, A and Zisserman, A (2017). Detect to track and track to detect. International conference on computer vision (pp. 3057-3065). https:\/\/doi.org\/10.1109\/ICCV.2017.330","key":"9827_CR7","DOI":"10.1109\/ICCV.2017.330"},{"doi-asserted-by":"publisher","unstructured":"Gao, F, Huang, Z, Wang, Z and Wang, S (2016). An object detection acceleration framework based on low-power heterogeneous manycore architecture. The internet of things. https:\/\/doi.org\/10.1109\/WF-IoT.2016.7845407","key":"9827_CR8","DOI":"10.1109\/WF-IoT.2016.7845407"},{"doi-asserted-by":"publisher","unstructured":"Girshick, R (2015). Fast R-CNN. International conference on computer vision (pp. 1140-1148). https:\/\/doi.org\/10.1109\/ICCV.2015.169","key":"9827_CR9","DOI":"10.1109\/ICCV.2015.169"},{"doi-asserted-by":"publisher","unstructured":"Girshick, R, Donahue, J, Darrell, T and Malik, J (2014). Rich feature hierarchies for accurate object detection and semantic segmentation. Computer vision and pattern recognition (pp. 580-587). https:\/\/doi.org\/10.1109\/CVPR.2014.81","key":"9827_CR10","DOI":"10.1109\/CVPR.2014.81"},{"issue":"3","key":"9827_CR11","doi-asserted-by":"publisher","first-page":"2633","DOI":"10.1007\/s11042-013-1566-x","volume":"72","author":"C Guo","year":"2014","unstructured":"Guo C, Liu D, Guo Y, Sun Y (2014) An adaptive graph cut algorithm for video moving objects detection. Multimed Tools Appl 72(3):2633\u20132652. https:\/\/doi.org\/10.1007\/s11042-013-1566-x","journal-title":"Multimed Tools Appl"},{"unstructured":"Han, W, Khorrami, P, Paine, TL, Ramachandran, P, Babaeizadeh, M, Shi, H, ... and Huang, TS (2016). Seq-NMS for Video Object Detection. arXiv: Computer Vision and Pattern Recognition","key":"9827_CR12"},{"doi-asserted-by":"publisher","unstructured":"Hu, H, Wang, W, Zheng, A and Luo, B (2019). MMA: motion memory attention network for video object detection. International conference on image and graphics (pp. 167-178). https:\/\/doi.org\/10.1007\/978-3-030-34110-7_15","key":"9827_CR13","DOI":"10.1007\/978-3-030-34110-7_15"},{"doi-asserted-by":"publisher","unstructured":"Huang, J, Rathod, V, Sun, C, Zhu, M, Korattikara, A, Fathi, A, ... & Murphy, K (2017). Speed\/Accuracy Trade-Offs for Modern Convolutional Object Detectors. computer vision and pattern recognition (pp. 3296\u20133297). https:\/\/doi.org\/10.1109\/CVPR.2017.351","key":"9827_CR14","DOI":"10.1109\/CVPR.2017.351"},{"doi-asserted-by":"publisher","unstructured":"Ilg, E, Mayer, N, Saikia, T, Keuper, M, Dosovitskiy, A and Brox, T (2016). Flownet 2.0: evolution of optical flow estimation with deep networks. https:\/\/doi.org\/10.1109\/CVPR.2017.179","key":"9827_CR15","DOI":"10.1109\/CVPR.2017.179"},{"issue":"10","key":"9827_CR16","doi-asserted-by":"publisher","first-page":"2896","DOI":"10.1109\/TCSVT.2017.2736553","volume":"28","author":"K Kang","year":"2018","unstructured":"Kang K, Li H, Yan J, Zeng X, Yang B, Xiao T, Zhang C, Wang Z, Wang R, Wang X, Ouyang W (2018) T-CNN: Tubelets with convolutional neural networks for object detection from videos. IEEE Transactions on Circuits and Systems for Video Technology 28(10):2896\u20132907. https:\/\/doi.org\/10.1109\/TCSVT.2017.2736553","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"9827_CR17","doi-asserted-by":"publisher","first-page":"817","DOI":"10.1109\/CVPR.2016.95","volume":"2016","author":"K Kang","year":"2016","unstructured":"Kang K, Ouyang W, Li H, Wang X (2016) Object detection from video tubelets with convolutional neural networks. IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2016:817\u2013825. https:\/\/doi.org\/10.1109\/CVPR.2016.95","journal-title":"IEEE Conference on Computer Vision and Pattern Recognition (CVPR)"},{"doi-asserted-by":"publisher","unstructured":"Konig, D, Adam, M, Jarvers, C, Layher, G, Neumann, H and Teutsch, M (2017). Fully convolutional region proposal networks for multispectral person detection. In proceedings of the IEEE conference on computer vision and pattern recognition workshops (pp. 49-56). https:\/\/doi.org\/10.1109\/CVPRW.2017.36","key":"9827_CR18","DOI":"10.1109\/CVPRW.2017.36"},{"issue":"4","key":"9827_CR19","doi-asserted-by":"publisher","first-page":"2021","DOI":"10.1109\/TIP.2018.2882926","volume":"28","author":"L Li","year":"2019","unstructured":"Li L, Hu Q, Li X (2019) Moving object detection in video via hierarchical modeling and alternating optimization. IEEE Trans Image Process 28(4):2021\u20132036. https:\/\/doi.org\/10.1109\/TIP.2018.2882926","journal-title":"IEEE Trans Image Process"},{"doi-asserted-by":"publisher","unstructured":"Li, K, Huang, Z, Cheng, Y and Lee, C (2014). A maximal figure-of-merit learning approach to maximizing mean average precision with deep neural network based classifiers. International conference on acoustics speech and signal processing (pp. 4503-4507). https:\/\/doi.org\/10.1109\/ICASSP.2014.6854454","key":"9827_CR20","DOI":"10.1109\/ICASSP.2014.6854454"},{"key":"9827_CR21","doi-asserted-by":"publisher","first-page":"29307","DOI":"10.1007\/s11042-018-6857-9","volume":"78","author":"Q Li","year":"2019","unstructured":"Li Q, Zhan S, Xu L, Wu C (2019) Facial micro-expression recognition based on the fusion of deep learning and enhanced optical flow. Multimed Tools Appl 78:29307\u201329322. https:\/\/doi.org\/10.1007\/s11042-018-6857-9","journal-title":"Multimed Tools Appl"},{"doi-asserted-by":"publisher","unstructured":"Liu, W, Anguelov, D, Erhan, D, Szegedy, C, Reed, S, Fu, C and Berg, AC (2016). SSD: single shot MultiBox detector. European conference on computer vision (pp. 21-37). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2","key":"9827_CR22","DOI":"10.1007\/978-3-319-46448-0_2"},{"doi-asserted-by":"publisher","unstructured":"Martin, P, Benoispineau, J, Peteri, R and Morlier, J (2019). Optimal choice of motion estimation methods for fine-grained action classification with 3D convolutional networks. International conference on image processing. https:\/\/doi.org\/10.1109\/ICIP.2019.8803780","key":"9827_CR23","DOI":"10.1109\/ICIP.2019.8803780"},{"issue":"20","key":"9827_CR24","doi-asserted-by":"publisher","first-page":"26901","DOI":"10.1007\/s11042-018-5893-9","volume":"77","author":"B Meng","year":"2018","unstructured":"Meng B, Liu X, Wang X (2018) Human action recognition based on quaternion spatial-temporal convolutional neural network and LSTM in RGB videos. Multimed Tools Appl 77(20):26901\u201326918. https:\/\/doi.org\/10.1007\/s11042-018-5893-9","journal-title":"Multimed Tools Appl"},{"issue":"8","key":"9827_CR25","doi-asserted-by":"publisher","first-page":"1079","DOI":"10.1109\/TPAMI.2004.51","volume":"26","author":"S Nadimi","year":"2004","unstructured":"Nadimi S, Bhanu B (2004) Physical models for moving shadow and object detection in video. IEEE Trans Pattern Anal Mach Intell 26(8):1079\u20131087. https:\/\/doi.org\/10.1109\/TPAMI.2004.51","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"doi-asserted-by":"publisher","unstructured":"Nam, H and Han, B (2016). Learning multi-domain convolutional neural networks for visual tracking. Computer vision and pattern recognition (pp. 3119-3127). https:\/\/doi.org\/10.1109\/ICCV.2015.357","key":"9827_CR26","DOI":"10.1109\/ICCV.2015.357"},{"doi-asserted-by":"publisher","unstructured":"Redmon, J, Divvala, SK, Girshick, R and Farhadi, A (2016). You only look once: unified, real-time object detection. Computer vision and pattern recognition (pp. 779-788). https:\/\/doi.org\/10.1109\/CVPR.2016.91","key":"9827_CR27","DOI":"10.1109\/CVPR.2016.91"},{"issue":"6","key":"9827_CR28","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren S, He K, Girshick R, Sun J (2017) Faster r-cnn: towards real-time object detection with region proposal networks. IEEE Transactions on Pattern Analysis & Machine Intelligence 39(6):1137\u20131149. https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","journal-title":"IEEE Transactions on Pattern Analysis & Machine Intelligence"},{"doi-asserted-by":"crossref","unstructured":"Shen, R, Wang, W and Zhang, S (2018). Missing recover with recurrent neural networks for video object detection. In big data: 6th CCF conference, big data 2018, Xi\u2019an, China, October 11-13, 2018, proceedings (Vol. 945, p. 278). Springer","key":"9827_CR29","DOI":"10.1007\/978-981-13-2922-7_19"},{"doi-asserted-by":"publisher","unstructured":"Verikas, A, Radeva, P, Nikolaev, DP, Zhang, W, Zhou, J and Heravi, EJ, et al. (2017). Classification of foods by transferring knowledge from ImageNet dataset. International conference on machine vision (Vol.10341, pp.1034128). https:\/\/doi.org\/10.1117\/12.2268737","key":"9827_CR30","DOI":"10.1117\/12.2268737"},{"doi-asserted-by":"publisher","unstructured":"Wang, L, Ouyang, W, Wang, X and Lu, H (2015). Visual tracking with fully convolutional networks. International conference on computer vision(pp.3119-3127). https:\/\/doi.org\/10.1109\/ICCV.2015.357","key":"9827_CR31","DOI":"10.1109\/ICCV.2015.357"},{"doi-asserted-by":"publisher","unstructured":"Wang, X, Xie, X and Lai, J (2018). Convolutional LSTM based video object detection. Chinese conference on pattern recognition (pp. 99-109). https:\/\/doi.org\/10.1007\/978-3-030-03335-4_9","key":"9827_CR32","DOI":"10.1007\/978-3-030-03335-4_9"},{"unstructured":"Wang, N and Yeung, D (2013). Learning a deep compact image representation for visual tracking. Neural information processing systems (pp. 809-817). http:\/\/respository.ust.hk\/ir\/Record\/1783.1-61168","key":"9827_CR33"},{"doi-asserted-by":"publisher","unstructured":"Wang, S, Zhou, Y, Yan, J and Deng, Z (2018). Fully motion-aware network for video object detection. European conference on computer vision (pp. 557-573). https:\/\/doi.org\/10.1007\/978-3-030-01261-8_33","key":"9827_CR34","DOI":"10.1007\/978-3-030-01261-8_33"},{"doi-asserted-by":"publisher","unstructured":"Woo, S, Hwang, S and Kweon, IS (2018). StairNet: top-down semantic aggregation for accurate one shot detection. Workshop on applications of computer vision (pp. 1093-1102). https:\/\/doi.org\/10.1109\/WACV.2018.00125","key":"9827_CR35","DOI":"10.1109\/WACV.2018.00125"},{"doi-asserted-by":"publisher","unstructured":"Xiao, F and Lee, YJ (2018). Video object detection with an aligned spatial-temporal memory. European conference on computer vision (pp. 494-510). https:\/\/doi.org\/10.1007\/978-3-030-01237-3_30","key":"9827_CR36","DOI":"10.1007\/978-3-030-01237-3_30"},{"unstructured":"Yang, Ming and Ji, Shuiwang and Xu, Wei and Wang, Jinjun and Lv, Fengjun and Yu, Kai and Gong, Yihong and Dikmen, Mert and Lin, Dennis and Huang, Thomas. (2011). Detecting human actions in surveillance videos. TREC video retrieval evaluation. https:\/\/www.researchgate.net\/publication\/229045898","key":"9827_CR37"},{"key":"9827_CR38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-41299-9_47","volume-title":"Pattern Recognition. ACPR 2019. Lecture notes in computer science, vol 12047","author":"R Zhang","year":"2020","unstructured":"Zhang R, Miao Z, Ma C, Hao S (2020) Aggregating Motion and Attention for Video Object Detection. In: Palaiahnakote S, Sanniti di Baja G, Wang L, Yan W (eds) Pattern Recognition. ACPR 2019. Lecture notes in computer science, vol 12047. Springer, Cham. https:\/\/doi.org\/10.1007\/978-3-030-41299-9_47"},{"doi-asserted-by":"publisher","unstructured":"Zhu, X, Dai, J, Yuan, L and Wei, Y (2018). Towards high performance video object detection. Computer vision and pattern recognition (pp. 7210-7218). https:\/\/doi.org\/10.1109\/cvpr.2018.00753","key":"9827_CR39","DOI":"10.1109\/cvpr.2018.00753"},{"doi-asserted-by":"publisher","unstructured":"Zhu, X, Wang, Y, Dai, J, Yuan, L and Wei, Y (2017). Flow-guided feature aggregation for video object detection. International conference on computer vision (pp. 408-417). https:\/\/doi.org\/10.1109\/iccv.2017.52","key":"9827_CR40","DOI":"10.1109\/iccv.2017.52"},{"doi-asserted-by":"publisher","unstructured":"Zhu, X, Xiong, Y, Dai, J, Yuan, L and Wei, Y (2017). Deep Feature Flow for Video Recognition. 2017 IEEE conference on computer vision and pattern recognition (CVPR). IEEE. https:\/\/doi.org\/10.1109\/CVPR.2017.441","key":"9827_CR41","DOI":"10.1109\/CVPR.2017.441"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-09827-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-020-09827-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-09827-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,9,24]],"date-time":"2021-09-24T07:42:28Z","timestamp":1632469348000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-020-09827-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,24]]},"references-count":41,"journal-issue":{"issue":"15","published-print":{"date-parts":[[2021,6]]}},"alternative-id":["9827"],"URL":"https:\/\/doi.org\/10.1007\/s11042-020-09827-0","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2020,9,24]]},"assertion":[{"value":"5 November 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 August 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 September 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 September 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}