{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:27:48Z","timestamp":1740122868364,"version":"3.37.3"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"18","license":[{"start":{"date-parts":[[2022,3,26]],"date-time":"2022-03-26T00:00:00Z","timestamp":1648252800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,3,26]],"date-time":"2022-03-26T00:00:00Z","timestamp":1648252800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2022,7]]},"DOI":"10.1007\/s11042-022-12948-3","type":"journal-article","created":{"date-parts":[[2022,3,28]],"date-time":"2022-03-28T21:06:32Z","timestamp":1648501592000},"page":"26361-26379","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Fast\u2013slow visual network for action recognition in videos"],"prefix":"10.1007","volume":"81","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9505-4049","authenticated-orcid":false,"given":"Heng","family":"Hu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tongcun","family":"Liu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hailin","family":"Feng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,3,26]]},"reference":[{"issue":"5","key":"12948_CR1","doi-asserted-by":"publisher","first-page":"5919","DOI":"10.1007\/s11042-018-6875-7","volume":"78","author":"D Avola","year":"2019","unstructured":"Avola D, Bernardi M, Foresti GL (2019) Fusing depth and colour information for human action recognition[J]. Multimed Tools Appl 78(5):5919\u20135939","journal-title":"Multimed Tools Appl"},{"key":"12948_CR2","doi-asserted-by":"crossref","unstructured":"Carreira J, Zisserman A 2017 Quo vadis, action recognition? a new model and the kinetics dataset. in proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","DOI":"10.1109\/CVPR.2017.502"},{"issue":"3","key":"12948_CR3","first-page":"85","volume":"28","author":"H Chu","year":"2008","unstructured":"Chu H et al (2008) Target tracking algorithm based on camshift algorithm combined with difference in frame. Journal of Projectiles, Rockets, Missiles and Guidance 28(3):85\u201388","journal-title":"Journal of Projectiles, Rockets, Missiles and Guidance"},{"key":"12948_CR4","first-page":"248","volume":"2009","author":"J Deng","year":"2009","unstructured":"Deng J, Dong W, Socher R et al (2009) Imagenet: A large-scale hierarchical image database[C]. 2009 IEEE conference on computer vision and pattern recognition. Ieee 2009:248\u2013255","journal-title":"Ieee"},{"key":"12948_CR5","doi-asserted-by":"crossref","unstructured":"Dhiman C, Vishwakarma DK (2020) View-invariant deep architecture for human action recognition using two-stream motion and shape temporal dynamics[J].\u00a0IEEE Trans Image Process\u00a029:3835\u20133844","DOI":"10.1109\/TIP.2020.2965299"},{"key":"12948_CR6","doi-asserted-by":"crossref","unstructured":"Diba A, Sharma V, Van Gool L (2017) Deep temporal linear encoding networks[C]. Proceedings of the IEEE conference on Computer Vision and Pattern Recognition\u00a02017:2329\u20132338","DOI":"10.1109\/CVPR.2017.168"},{"key":"12948_CR7","doi-asserted-by":"crossref","unstructured":"Feichtenhofer C, Pinz A, Zisserman A (2016) Convolutional two-stream network fusion for video action recognition[C]. Proceedings of the IEEE conference on computer vision and pattern recognition 2016:1933\u20131941","DOI":"10.1109\/CVPR.2016.213"},{"key":"12948_CR8","doi-asserted-by":"crossref","unstructured":"Feichtenhofer C, Fan H, Malik J et al (2019) Slowfast networks for video recognition[C]. Proceedings of the IEEE\/CVF international conference on computer vision\u00a02019:6202\u20136211","DOI":"10.1109\/ICCV.2019.00630"},{"issue":"14","key":"12948_CR9","doi-asserted-by":"publisher","first-page":"20533","DOI":"10.1007\/s11042-019-7404-z","volume":"78","author":"H Ge","year":"2019","unstructured":"Ge H, Yan Z, Yu W et al (2019) An attention mechanism based convolutional LSTM network for video action recognition[J]. Multimed Tools Appl 78(14):20533\u201320556","journal-title":"Multimed Tools Appl"},{"key":"12948_CR10","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S et al (2016) Deep residual learning for image recognition[C]. Proceedings of the IEEE conference on computer vision and pattern recognition 2016:770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"12948_CR11","doi-asserted-by":"crossref","unstructured":"Kuehne H, Jhuang H, Garrote E et al (2011) HMDB: a large video database for human motion recognition[C].\u00a02011 International conference on computer vision. IEEE\u00a02011:2556\u20132563","DOI":"10.1109\/ICCV.2011.6126543"},{"issue":"5","key":"12948_CR12","doi-asserted-by":"publisher","first-page":"715","DOI":"10.1541\/ieejeiss1987.120.5_715","volume":"120","author":"T Kashiwagi","year":"2000","unstructured":"Kashiwagi T, Oe S, Terada K (2000) Edge characteristic of color image and edge detection using color histogram. IEEJ Transactions on Electronics, Information and Systems 120(5):715\u2013723","journal-title":"IEEJ Transactions on Electronics, Information and Systems"},{"key":"12948_CR13","unstructured":"Kay W, Carreira J, Simonyan K et al (2017) The kinetics human action video dataset[J]. arXiv preprint arXiv:1705.06950"},{"key":"12948_CR14","doi-asserted-by":"crossref","unstructured":"Kumar K (2019) EVS-DK: Event video skimming using deep keyframe[J].\u00a0J Vis Commun Image Represent 58:345\u2013352","DOI":"10.1016\/j.jvcir.2018.12.009"},{"key":"12948_CR15","doi-asserted-by":"crossref","unstructured":"Kumar K, Shrimankar DD (2018) ESUMM: event summarization on scale-free networks[J]. IETE Technical Review","DOI":"10.1080\/02564602.2018.1454347"},{"key":"12948_CR16","doi-asserted-by":"crossref","unstructured":"Kumar K, Shrimankar DD, Singh N (2018) V-LESS: a video from linear event summaries[C]. Proceedings of 2nd International Conference on Computer Vision & Image Processing. Springer, Singapore, pp 385\u2013395","DOI":"10.1007\/978-981-10-7895-8_30"},{"key":"12948_CR17","doi-asserted-by":"crossref","unstructured":"Kumar K, Shrimankar DD, Singh N (2019) Key-lectures: keyframes extraction in video lectures[M]\/\/Machine intelligence and signal analysis. Springer, Singapore, pp 453\u2013459","DOI":"10.1007\/978-981-13-0923-6_39"},{"key":"12948_CR18","doi-asserted-by":"crossref","unstructured":"Lan Z, Zhu Y, Hauptmann AG et al (2017) Deep local video feature for action recognition[C]\/\/Proceedings of the IEEE conference on computer vision and pattern recognition workshops\u00a02017:1\u20137","DOI":"10.1109\/CVPRW.2017.161"},{"key":"12948_CR19","doi-asserted-by":"crossref","unstructured":"Peng L, Lafortune EPF, Greenberg DP et al (1997) Use of computer graphic simulation to explain color histogram structure[C]. Color and Imaging Conference. Society for Imaging Science and Technology\u00a01997(1):187\u2013192","DOI":"10.2352\/CIC.1997.5.1.art00037"},{"key":"12948_CR20","doi-asserted-by":"crossref","unstructured":"Pengcheng D, Siyuan C, Zhenyu Z et al (2019) Human Behavior Recognition Based on IC3D[C].\u00a02019 Chinese Control And Decision Conference (CCDC). IEEE\u00a02019:3333\u20133337","DOI":"10.1109\/CCDC.2019.8832995"},{"key":"12948_CR21","doi-asserted-by":"crossref","unstructured":"Qiu Z, Yao T, Mei T (2017) Learning spatio-temporal representation with pseudo-3d residual networks[C]. Proceedings of the IEEE International Conference on Computer Vision\u00a02017:5533\u20135541","DOI":"10.1109\/ICCV.2017.590"},{"key":"12948_CR22","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos[J]. Advances in neural information processing systems, 27"},{"key":"12948_CR23","unstructured":"Simonyan K, Zisserman A, 2014 Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"key":"12948_CR24","doi-asserted-by":"crossref","unstructured":"Solanki A, Bamrara R, Kumar K et al (2020) VEDL: a novel video event searching technique using deep learning[M]. Soft Computing: Theories and Applications. Springer, Singapore, pp 905\u2013914","DOI":"10.1007\/978-981-15-0751-9_83"},{"key":"12948_CR25","unstructured":"Soomro K, Zamir AR, Shah M 2012 A dataset of 101 human action classes from videos in the wild. Center for Research in Computer Vision, 2(11)"},{"key":"12948_CR26","doi-asserted-by":"crossref","unstructured":"Sun L, Jia K, Yeung DY et al (2015) Human action recognition using factorized spatio-temporal convolutional networks[C]. Proceedings of the IEEE international conference on computer vision\u00a02015:4597\u20134605","DOI":"10.1109\/ICCV.2015.522"},{"key":"12948_CR27","doi-asserted-by":"crossref","unstructured":"Szegedy C, et al. 2015 Going deeper with convolutions. in Proceedings of the IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"12948_CR28","doi-asserted-by":"crossref","unstructured":"Tang Q, Dai S.G, Yang J 2013 Object tracking algorithm based on camshift combining background subtraction with three frame difference. In applied mechanics and materials. 2013. Trans Tech Publ","DOI":"10.4028\/www.scientific.net\/AMM.373-375.1116"},{"key":"12948_CR29","doi-asserted-by":"crossref","unstructured":"Tran D, Bourdev L, Fergus R et al (2015) Learning spatiotemporal features with 3d convolutional networks[C]. Proceedings of the IEEE international conference on computer vision\u00a02015:4489\u20134497","DOI":"10.1109\/ICCV.2015.510"},{"key":"12948_CR30","doi-asserted-by":"crossref","unstructured":"Wang H, Schmid C (2013) Action recognition with improved trajectories[C]. Proceedings of the IEEE international conference on computer vision\u00a02013:3551\u20133558","DOI":"10.1109\/ICCV.2013.441"},{"key":"12948_CR31","doi-asserted-by":"crossref","unstructured":"Wang L, Qiao Y, Tang X (2015) Action recognition with trajectory-pooled deep-convolutional descriptors[C]. Proceedings of the IEEE conference on computer vision and pattern recognition\u00a02015:4305\u20134314","DOI":"10.1109\/CVPR.2015.7299059"},{"key":"12948_CR32","doi-asserted-by":"crossref","unstructured":"Wang L, Xiong Y, Wang Z et al (2016) Temporal segment networks: Towards good practices for deep action recognition[C]. European conference on computer vision. Springer, Cham, pp\u00a020\u201336","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"12948_CR33","doi-asserted-by":"crossref","unstructured":"Wu H, Liu J, Zhu X et al (2021) Multi-scale spatial-temporal integration convolutional tube for human action recognition[C]. Proceedings of the Twenty-Ninth International Conference on International Joint Conferences on Artificial Intelligence\u00a02021:753\u2013759","DOI":"10.24963\/ijcai.2020\/105"},{"key":"12948_CR34","doi-asserted-by":"crossref","unstructured":"Xu Y, Chen M, Xie T (2017) Method for state recognition of egg embryo in vaccines production based on support vector machine[J]. DEStech Transactions on Engineering and Technology Research, (tmcm)","DOI":"10.12783\/dtetr\/tmcm2017\/12621"},{"key":"12948_CR35","doi-asserted-by":"crossref","unstructured":"Yang C, Xu Y, Shi J et al (2020) Temporal pyramid network for action recognition[C]. Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition\u00a02020:591\u2013600","DOI":"10.1109\/CVPR42600.2020.00067"},{"issue":"9C","key":"12948_CR36","first-page":"949","volume":"30","author":"GH Yoo","year":"2005","unstructured":"Yoo GH, Park JM, You KS et al (2005) Content-Based Image Retrieval Using Adaptive Color Histogram[J]. The Journal of Korean Institute of Communications and Information Sciences 30(9C):949\u2013954","journal-title":"The Journal of Korean Institute of Communications and Information Sciences"},{"key":"12948_CR37","doi-asserted-by":"crossref","unstructured":"Zhang D, Dai X, Wang YF (2018) Dynamic temporal pyramid network: a closer look at multi-scale modeling for activity detection[C]. Asian Conference on Computer Vision. Springer, Cham, pp\u00a0712\u2013728","DOI":"10.1007\/978-3-030-20870-7_44"},{"key":"12948_CR38","doi-asserted-by":"crossref","unstructured":"Zhong X, Tu K, Xia H (2017) Mean-shift algorithm fusing multi feature[C].\u00a02017 IEEE 2nd Advanced Information Technology, Electronic and Automation Control Conference (IAEAC). IEEE\u00a02017: 245\u20131249","DOI":"10.1109\/IAEAC.2017.8054213"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-12948-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-022-12948-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-12948-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,19]],"date-time":"2023-11-19T09:56:09Z","timestamp":1700387769000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-022-12948-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,3,26]]},"references-count":38,"journal-issue":{"issue":"18","published-print":{"date-parts":[[2022,7]]}},"alternative-id":["12948"],"URL":"https:\/\/doi.org\/10.1007\/s11042-022-12948-3","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2022,3,26]]},"assertion":[{"value":"8 February 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 May 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 March 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 March 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"We declare that we do not have any commercial or associative interest that represents a conflict of interest in connection with the work submitted.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}