{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,18]],"date-time":"2026-01-18T04:07:04Z","timestamp":1768709224491,"version":"3.49.0"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"16","license":[{"start":{"date-parts":[[2023,2,14]],"date-time":"2023-02-14T00:00:00Z","timestamp":1676332800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,2,14]],"date-time":"2023-02-14T00:00:00Z","timestamp":1676332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"Wuhan Science and Technology Bureau"},{"DOI":"10.13039\/501100011319","name":"Jianghan University","doi-asserted-by":"publisher","award":["3015-08210173"],"award-info":[{"award-number":["3015-08210173"]}],"id":[{"id":"10.13039\/501100011319","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2023,7]]},"DOI":"10.1007\/s11042-023-14499-7","type":"journal-article","created":{"date-parts":[[2023,2,14]],"date-time":"2023-02-14T09:06:40Z","timestamp":1676365600000},"page":"24289-24308","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":6,"title":["Multi-scale space-time transformer for driving behavior detection"],"prefix":"10.1007","volume":"82","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9857-531X","authenticated-orcid":false,"given":"Jun","family":"Gao","sequence":"first","affiliation":[]},{"given":"Jiangang","family":"Yi","sequence":"additional","affiliation":[]},{"given":"Yi Lu","family":"Murphey","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,2,14]]},"reference":[{"key":"14499_CR1","doi-asserted-by":"crossref","unstructured":"Akai N, Hirayama T, Morales LY et al (2019) Driving behavior modeling based on hidden markov models with driver\u2019s eye-gaze measurement and ego-vehicle localization. IEEE intelligent vehicles symposium. IEEE, pp 949\u2013956","DOI":"10.1109\/IVS.2019.8814287"},{"key":"14499_CR2","doi-asserted-by":"crossref","unstructured":"Arnab A, Dehghani M, Heigold G et al (2021) Vivit: a video vision transformer. Proceedings of the IEEE\/CVF international conference on computer vision, pp 6836\u20136846","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"14499_CR3","unstructured":"Bertasius G, Wang H, Torresani L (2021) Is space-time attention all you need for video understanding? arXiv:2102.05095"},{"key":"14499_CR4","unstructured":"Bulat A, Perez Rua JM, Sudhakaran S et al (2021) Space-time mixing attention for video transformer. Advances in neural information processing systems, 34"},{"key":"14499_CR5","doi-asserted-by":"crossref","unstructured":"Cai Z, Fan Q, Feris RS, et al (2016) A unified multi-scale deep convolutional neural network for fast object detection. European conference on computer vision, pp 354\u2013370","DOI":"10.1007\/978-3-319-46493-0_22"},{"key":"14499_CR6","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G et al (2020) End-to-end object detection with transformers. European conference on computer vision, pp 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"14499_CR7","doi-asserted-by":"crossref","unstructured":"Carreira J, Zisserman A (2017) Quo vadis, action recognition? a new model and the kinetics dataset. Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6299\u20136308","DOI":"10.1109\/CVPR.2017.502"},{"issue":"4","key":"14499_CR8","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2017","unstructured":"Chen LC, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2017) Deeplab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Trans Pattern Anal Mach Intell 40(4):834\u2013848","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"14499_CR9","doi-asserted-by":"publisher","first-page":"3561","DOI":"10.1109\/TITS.2019.2937287","volume":"21","author":"Q Deng","year":"2019","unstructured":"Deng Q, Wang J, Hillebrand K, Benjamin CR, Soffker D (2019) Prediction performance of lane changing behaviors: a study of combining environmental and eye-tracking data in a driving simulator. IEEE Trans Intell Transp Syst 21:3561\u20133570","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"14499_CR10","unstructured":"Devlin J, Chang M W, Lee K et al (2018) BERT: pre-training of deep bidirectional transformers for language understanding. arXiv:1810.04805"},{"key":"14499_CR11","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1016\/j.trf.2018.04.004","volume":"56","author":"A D\u00edaz-\u00c1lvarez","year":"2018","unstructured":"D\u00edaz-\u00c1lvarez A, Clavijo M, Jim\u00e9nez F, Talavera E, Serradilla F (2018) Modelling the human lane-change execution behaviour through multilayer perceptrons and convolutional neural networks. Transport Res F: Traffic Psychol Behav 56:134\u2013148","journal-title":"Transport Res F: Traffic Psychol Behav"},{"key":"14499_CR12","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A et al (2020) An image is worth 16x16 words: transformers for image recognition at scale. arXiv: 2010.11929"},{"key":"14499_CR13","doi-asserted-by":"crossref","unstructured":"Fan H, Xiong B, Mangalam K et al (2021) Multiscale vision transformers. Proceedings of the IEEE\/CVF international conference on computer vision, pp 6824\u20136835","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"14499_CR14","doi-asserted-by":"crossref","unstructured":"Feichtenhofer C, Fan H, Malik J, et al (2019) Slowfast networks for video recognition. Proceedings of the IEEE\/CVF international conference on computer vision, pp 6202\u20136211","DOI":"10.1109\/ICCV.2019.00630"},{"issue":"12","key":"14499_CR15","doi-asserted-by":"publisher","first-page":"1837","DOI":"10.1007\/s00607-019-00712-9","volume":"101","author":"J Gao","year":"2019","unstructured":"Gao J, Murphey YL, Zhu HH (2019) Personalized detection of lane changing behavior using multisensor data fusion. Computing 101(12):1837\u20131860","journal-title":"Computing"},{"issue":"3","key":"14499_CR16","doi-asserted-by":"publisher","first-page":"1234","DOI":"10.1080\/23249935.2021.1936279","volume":"18","author":"J Gao","year":"2021","unstructured":"Gao J, Yi JG, Murphey YL (2021) Joint learning of video images and physiological signals for lane-changing behavior prediction. Transp A: Transp Sci 18(3):1234\u20131253. https:\/\/doi.org\/10.1080\/23249935.2021.1936279","journal-title":"Transp A: Transp Sci"},{"issue":"1","key":"14499_CR17","first-page":"831","volume":"10","author":"J Gao","year":"2022","unstructured":"Gao J, Murphey YL, Yi JG et al (2022) A data-driven lane-changing behavior detection system based on sequence learning. Transp B: Transp Dyn 10(1):831\u2013848","journal-title":"Transp B: Transp Dyn"},{"key":"14499_CR18","doi-asserted-by":"crossref","unstructured":"Ghiasi G, Lin TY, Le QV (2019) Nas-fpn: learning scalable feature pyramid architecture for object detection. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7036\u20137045","DOI":"10.1109\/CVPR.2019.00720"},{"key":"14499_CR19","doi-asserted-by":"crossref","unstructured":"Hong J, Sapp B, Philbin J (2019) Rules of the road: predicting driving behavior with a convolutional model of semantic interactions. Proc IEEE Conf Comput Vis Pattern Recognit:8454\u20138462","DOI":"10.1109\/CVPR.2019.00865"},{"key":"14499_CR20","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der Maaten L et al (2017) Densely connected convolutional networks. Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4700\u20134708","DOI":"10.1109\/CVPR.2017.243"},{"key":"14499_CR21","doi-asserted-by":"crossref","unstructured":"Lin T Y, Doll\u00e1r P, Girshick R et al (2017) Feature pyramid networks for object detection. Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"14499_CR22","doi-asserted-by":"crossref","unstructured":"Lin T Y, Goyal P, Girshick R et al (2017) Focal loss for dense object detection. Proceedings of the IEEE international conference on computer vision, pp 2980\u20132988","DOI":"10.1109\/ICCV.2017.324"},{"key":"14499_CR23","unstructured":"Liu Y, Ott M, Goyal N et al (2019) Roberta: a robustly optimized bert pretraining approach. arXiv:1907.11692"},{"key":"14499_CR24","unstructured":"Liu L, Jiang H, He P et al (2019) On the variance of the adaptive learning rate and beyond. arXiv:1908.03265"},{"key":"14499_CR25","doi-asserted-by":"crossref","unstructured":"Liu Z, Ning J, Cao Y et al (2021) Video swin transformer. arXiv:2106.13230","DOI":"10.1109\/CVPR52688.2022.00320"},{"issue":"2","key":"14499_CR26","first-page":"77","volume":"8","author":"YL Murphey","year":"2021","unstructured":"Murphey YL, Wang K, Molnar LJ, Eby DW, Giordani B, Persad C, Stent S (2021) Development of data mining methodologies to advance knowledge of driver behaviors in naturalistic driving. SAE Int J Transp Saf 8(2):77\u201394","journal-title":"SAE Int J Transp Saf"},{"key":"14499_CR27","doi-asserted-by":"crossref","unstructured":"Newell A, Yang K, Deng J (2016) Stacked hourglass networks for human pose estimation. European conference on computer vision, pp 483\u2013499","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"14499_CR28","doi-asserted-by":"crossref","unstructured":"Peng X, Liu R, Murphey YL et al (2018) Driving behavior detection via sequence learning from vehicle signals and video images. 24th international conference on pattern recognition, pp 1265\u20131270","DOI":"10.1109\/ICPR.2018.8546255"},{"key":"14499_CR29","doi-asserted-by":"crossref","unstructured":"Ramanishka V, Chen Y T, Misu T et al (2018) Toward driving scene understanding: a dataset for learning driver behavior and causal reasoning. Proceedings of International Conference on Computer Vision and Pattern Recognition, pp 7699\u20137707","DOI":"10.1109\/CVPR.2018.00803"},{"key":"14499_CR30","doi-asserted-by":"publisher","first-page":"682","DOI":"10.3389\/fnins.2017.00682","volume":"11","author":"B Rueckauer","year":"2017","unstructured":"Rueckauer B, Lungu IA, Hu Y, Pfeiffer M, Liu SC (2017) Conversion of continuous-valued deep networks to efficient event-driven networks for image classification. Front Neurosci 11:682","journal-title":"Front Neurosci"},{"key":"14499_CR31","doi-asserted-by":"crossref","unstructured":"Sevilla-Lara L, Zha S, Yan Z et al (2021) Only time can tell: discovering temporal data for temporal modeling. Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 535\u2013544","DOI":"10.1109\/WACV48630.2021.00058"},{"key":"14499_CR32","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv:1409.1556"},{"key":"14499_CR33","doi-asserted-by":"crossref","unstructured":"Sun K, Xiao B, Liu D et al (2019) Deep high-resolution representation learning for human pose estimation. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5693\u20135703","DOI":"10.1109\/CVPR.2019.00584"},{"key":"14499_CR34","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y et al (2015) Going deeper with convolutions. Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"14499_CR35","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S et al (2016) Rethinking the inception architecture for computer vision. Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"14499_CR36","doi-asserted-by":"crossref","unstructured":"Szegedy C, Ioffe S, Vanhoucke V, Alemi A (2017) Inception-v4, inception-resnet and the impact of residual connections on learning. Thirty-first AAAI conference on artificial intelligence, vol 31","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"14499_CR37","unstructured":"Touvron H, Cord M, Douze M et al (2021) Training data-efficient image transformers & distillation through attention. International conference on machine learning, pp 10347\u201310357"},{"key":"14499_CR38","unstructured":"Vaswani A, Shazeer N, Parmar N et al (2017) Attention is all you need. Advances in neural information processing systems, pp 5998\u20136008"},{"key":"14499_CR39","doi-asserted-by":"crossref","unstructured":"Wang W, Xie E, Li X et al (2021) Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. arXiv:2102.12122","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"14499_CR40","doi-asserted-by":"crossref","unstructured":"Wang W, Zhou S, Li J, et al. (2021) Temporal pulses driven spiking neural network for time and power efficient object recognition in autonomous driving. 25th international conference on pattern recognition, pp 6359\u20136366","DOI":"10.1109\/ICPR48806.2021.9412302"},{"issue":"5","key":"14499_CR41","first-page":"1381","volume":"38","author":"QH Wang","year":"2021","unstructured":"Wang QH, Wang LN, Xu S (2021) Research and application of spiking neural network model based on LSTM structure. Appl Res Comput 38(5):1381\u20131386","journal-title":"Appl Res Comput"},{"key":"14499_CR42","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1016\/j.trc.2019.07.002","volume":"106","author":"DF Xie","year":"2019","unstructured":"Xie DF, Fang ZZ, Jia B, He Z (2019) A data-driven lane-changing model based on deep learning. Transp Res Part C Emerg Technol 106:41\u201360","journal-title":"Transp Res Part C Emerg Technol"},{"key":"14499_CR43","doi-asserted-by":"publisher","first-page":"114442","DOI":"10.1016\/j.eswa.2020.114442","volume":"169","author":"J Xie","year":"2021","unstructured":"Xie J, Hu K, Li G, Guo Y (2021) CNN-based driving maneuver classification using multi-sliding window fusion. Expert Syst Appl 169:114442","journal-title":"Expert Syst Appl"},{"key":"14499_CR44","doi-asserted-by":"crossref","unstructured":"Xu H, Das A, Saenko K (2017) R-c3d: region convolutional 3d network for temporal activity detection. Proceedings of the IEEE international conference on computer vision, pp 5783\u20135792","DOI":"10.1109\/ICCV.2017.617"},{"key":"14499_CR45","doi-asserted-by":"crossref","unstructured":"Yang F, Yang H, Fu J et al (2020) Learning texture transformer network for image super-resolution. Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 5791\u20135800","DOI":"10.1109\/CVPR42600.2020.00583"},{"key":"14499_CR46","doi-asserted-by":"publisher","first-page":"97","DOI":"10.3389\/fnins.2021.601109","volume":"15","author":"S Yang","year":"2021","unstructured":"Yang S, Gao T, Wang J et al (2021) Efficient spike-driven learning with dendritic event-based processing. Front Neurosci 15:97","journal-title":"Front Neurosci"},{"key":"14499_CR47","doi-asserted-by":"crossref","unstructured":"Zhao H, Shi J, Qi X et al (2017) Pyramid scene parsing network. Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2881\u20132890","DOI":"10.1109\/CVPR.2017.660"},{"key":"14499_CR48","unstructured":"Zhou B, Lapedriza A, Xiao J et al (2014) Learning deep features for scene recognition using places database. Adv Neural Inf Proces Syst 27"},{"key":"14499_CR49","unstructured":"Zhu X, Su W, Lu L et al (2020) Deformable DETR: deformable transformers for end-to-end object detection. arXiv: 2010.04159"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-14499-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-14499-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-14499-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,23]],"date-time":"2023-06-23T18:53:40Z","timestamp":1687546420000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-14499-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,14]]},"references-count":49,"journal-issue":{"issue":"16","published-print":{"date-parts":[[2023,7]]}},"alternative-id":["14499"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-14499-7","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,2,14]]},"assertion":[{"value":"7 August 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 June 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 January 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 February 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"No potential conflict of interest is reported by the authors.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}