{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T01:31:51Z","timestamp":1775093511171,"version":"3.50.1"},"reference-count":69,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,8,14]],"date-time":"2025-08-14T00:00:00Z","timestamp":1755129600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,14]],"date-time":"2025-08-14T00:00:00Z","timestamp":1755129600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Real-Time Image Proc"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s11554-025-01745-4","type":"journal-article","created":{"date-parts":[[2025,8,14]],"date-time":"2025-08-14T17:23:57Z","timestamp":1755192237000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["USH: an efficient real-time distracted driving detection model"],"prefix":"10.1007","volume":"22","author":[{"given":"He","family":"Wang","sequence":"first","affiliation":[]},{"given":"Yuan","family":"Li","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,14]]},"reference":[{"key":"1745_CR1","unstructured":"World Health Organization (WHO): Road Traffic Injuries. WHO (2021). https:\/\/www.who.int\/news-room\/fact-sheets\/detail\/road-traffic-injuries"},{"key":"1745_CR2","unstructured":"National Highway Traffic Safety Administration: National Motor Vehicle Crash Causation Survey: Report to Congress. U.S. Department of Transportation (2008). https:\/\/crashstats.nhtsa.dot.gov\/Api\/Public\/ViewPublication\/811059"},{"key":"1745_CR3","unstructured":"National Highway Traffic Safety Administration (NHTSA): Traffic Safety Facts: Distracted Driving. NHTSA (2023). https:\/\/www.nhtsa.gov\/risky-driving\/distracted-driving"},{"issue":"2","key":"1745_CR4","doi-asserted-by":"publisher","first-page":"e0171090","DOI":"10.1371\/journal.pone.0171090","volume":"12","author":"H Zhao","year":"2017","unstructured":"Zhao, H., Yin, Z., Xiang, H., et al.: Preliminary study on alterations of altitude road traffic in China from 2006 to 2013. PLoS ONE 12(2), e0171090 (2017)","journal-title":"PLoS ONE"},{"issue":"5","key":"1745_CR5","doi-asserted-by":"publisher","first-page":"1771","DOI":"10.1016\/j.aap.2011.04.008","volume":"43","author":"MA Regan","year":"2011","unstructured":"Regan, M.A., Hallett, C., Gordon, C.P.: Driver distraction and driver inattention: definition, relationship and taxonomy. Accid. Anal. Prev. 43(5), 1771\u20131781 (2011). https:\/\/doi.org\/10.1016\/j.aap.2011.04.008","journal-title":"Accid. Anal. Prev."},{"key":"1745_CR6","doi-asserted-by":"publisher","first-page":"370","DOI":"10.1016\/j.aap.2017.06.021","volume":"106","author":"P Choudhary","year":"2017","unstructured":"Choudhary, P., Velaga, N.R.: Mobile phone use during driving: effects on speed and effectiveness of driver compensatory behaviour. Accid. Anal. Prev. 106, 370\u2013378 (2017). https:\/\/doi.org\/10.1016\/j.aap.2017.06.021","journal-title":"Accid. Anal. Prev."},{"issue":"4","key":"1745_CR7","first-page":"36","volume":"31","author":"L Penghui","year":"2018","unstructured":"Penghui, L., Mengxia, H., Wenhui, Z., et al.: Influence of distraction on driver\u2019s reaction time to traffic conflicts. China J. Highw. Transp. 31(4), 36\u201341 (2018)","journal-title":"China J. Highw. Transp."},{"key":"1745_CR8","doi-asserted-by":"publisher","first-page":"1756","DOI":"10.11908\/j.issn.0253-374x.2019.12.010","volume":"47","author":"LF Zhang","year":"2019","unstructured":"Zhang, L.F., Cui, B.Y., Wang, J.H., et al.: Effects of naturalistic mobile phone operations on driving control behavior. J. Tongji Univ. Nat. Sci. 47, 1756\u20131763 (2019). https:\/\/doi.org\/10.11908\/j.issn.0253-374x.2019.12.010","journal-title":"J. Tongji Univ. Nat. Sci."},{"issue":"2","key":"1745_CR9","doi-asserted-by":"publisher","first-page":"1200","DOI":"10.3390\/app13021200","volume":"13","author":"J He","year":"2023","unstructured":"He, J., Li, Z., Ma, Y., et al.: Physiological and behavioral changes of passive fatigue on drivers during on-road driving. Appl. Sci. 13(2), 1200 (2023). https:\/\/doi.org\/10.3390\/app13021200","journal-title":"Appl. Sci."},{"key":"1745_CR10","doi-asserted-by":"publisher","first-page":"7068349","DOI":"10.1155\/2018\/7068349","volume":"2018","author":"A Voulodimos","year":"2018","unstructured":"Voulodimos, A., Doulamis, N., Doulamis, A., et al.: Deep learning for computer vision: a brief review. Comput. Intell. Neurosci. 2018, 7068349 (2018). https:\/\/doi.org\/10.1155\/2018\/7068349","journal-title":"Comput. Intell. Neurosci."},{"key":"1745_CR11","doi-asserted-by":"publisher","first-page":"107408","DOI":"10.1016\/j.engappai.2023.107408","volume":"128","author":"H Gao","year":"2024","unstructured":"Gao, H., Liu, Y.: Improving real-time driver distraction detection via constrained attention mechanism. Eng. Appl. Artif. Intell. 128, 107408 (2024). https:\/\/doi.org\/10.1016\/j.engappai.2023.107408","journal-title":"Eng. Appl. Artif. Intell."},{"key":"1745_CR12","doi-asserted-by":"publisher","unstructured":"Bajwa, G., Fazeen, M., Dantu, R.: Detecting Driver Distraction Using Stimuli-Response EEG Analysis. arXiv (2019). https:\/\/doi.org\/10.48550\/arXiv.1904.09100","DOI":"10.48550\/arXiv.1904.09100"},{"issue":"6","key":"1745_CR13","first-page":"239","volume":"30","author":"K Mattsson","year":"2007","unstructured":"Mattsson, K.: In-vehicle prediction of truck driver sleepiness: lane position variables. Lulea Tekniska Univ. 30(6), 239\u2013244 (2007)","journal-title":"Lulea Tekniska Univ."},{"issue":"2","key":"1745_CR14","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1109\/TCSI.2005.857555","volume":"1","author":"Z Mardi","year":"2011","unstructured":"Mardi, Z., Ashtiani, S.N.M., Mikaili, M.: EEG-based drowsiness detection for safe driving using chaotic features and statistical tests. J. Med. Signals Sens. 1(2), 130\u2013137 (2011). https:\/\/doi.org\/10.1109\/TCSI.2005.857555","journal-title":"J. Med. Signals Sens."},{"key":"1745_CR15","doi-asserted-by":"publisher","unstructured":"Reddy, B., Kim, Y.H., Yun, S., et al.: Real-time driver drowsiness detection for embedded system using model compression of deep neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, Hawaii, pp. 121\u2013128 (2017). https:\/\/doi.org\/10.1109\/CVPRW.2017.59","DOI":"10.1109\/CVPRW.2017.59"},{"issue":"3","key":"1745_CR16","doi-asserted-by":"publisher","first-page":"617","DOI":"10.1007\/s11760-019-01589-z","volume":"14","author":"M Alotaibi","year":"2020","unstructured":"Alotaibi, M., Alotaibi, B.: Distracted driver classification using deep learning. Signal Image Video Process. 14(3), 617\u2013624 (2020). https:\/\/doi.org\/10.1007\/s11760-019-01589-z","journal-title":"Signal Image Video Process."},{"issue":"1","key":"1745_CR17","doi-asserted-by":"publisher","first-page":"689","DOI":"10.32604\/cmc.2021.015989","volume":"68","author":"KA Al Shalfan","year":"2021","unstructured":"Al Shalfan, K.A., Zakariah, M.: Detecting driver distraction using deep-learning approach. Comput. Mater. Contin. 68(1), 689\u2013704 (2021). https:\/\/doi.org\/10.32604\/cmc.2021.015989","journal-title":"Comput. Mater. Contin."},{"key":"1745_CR18","unstructured":"Wang, Y.: Research on Driver Distraction Detection Based on Deep Learning. Wuhan University of Science and Technology (2024)"},{"key":"1745_CR19","unstructured":"Wu, Z.: Research on Real-Time Driver Distraction Detection Method Based on Lightweight Capsule Network. Guilin University of Technology (2024)"},{"key":"1745_CR20","doi-asserted-by":"publisher","first-page":"107910","DOI":"10.1016\/j.engappai.2024.107910","volume":"132","author":"X Tang","year":"2024","unstructured":"Tang, X., Chen, Y., Ma, Y., et al.: A lightweight model combining convolutional neural network and transformer for driver distraction recognition. Eng. Appl. Artif. Intell. 132, 107910 (2024). https:\/\/doi.org\/10.1016\/j.engappai.2024.107910","journal-title":"Eng. Appl. Artif. Intell."},{"key":"1745_CR21","unstructured":"Luo, Y.: Research on Driver Distraction Behavior Recognition and Model Lightweighting Based on Deep Learning. Hunan University (2023)"},{"key":"1745_CR22","doi-asserted-by":"publisher","unstructured":"He, K., Zhang, X., Ren, S., et al.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016). https:\/\/doi.org\/10.48550\/arXiv.1512.03385","DOI":"10.48550\/arXiv.1512.03385"},{"key":"1745_CR23","doi-asserted-by":"publisher","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., et al.: Mask R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2961\u20132969 (2017). https:\/\/doi.org\/10.48550\/arXiv.1703.06870","DOI":"10.48550\/arXiv.1703.06870"},{"key":"1745_CR24","doi-asserted-by":"publisher","unstructured":"Lyu, M., Zhou, J., Chen, H., et al.: Box-level active detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23766\u201323775 (2023). https:\/\/doi.org\/10.48550\/arXiv.2303.13089","DOI":"10.48550\/arXiv.2303.13089"},{"key":"1745_CR25","doi-asserted-by":"publisher","unstructured":"Tan, M., Le, Q.: EfficientNet: Rethinking model scaling for convolutional neural networks. In: International Conference on Machine Learning, PMLR, pp. 6105\u20136114 (2019). https:\/\/doi.org\/10.48550\/arXiv.1905.11946","DOI":"10.48550\/arXiv.1905.11946"},{"key":"1745_CR26","doi-asserted-by":"publisher","unstructured":"Ridnik, T., Lawen, H., Noy, A., et al.: TresNet: High performance GPU-dedicated architecture. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1400\u20131409 (2021). https:\/\/doi.org\/10.48550\/arXiv.2003.13630","DOI":"10.48550\/arXiv.2003.13630"},{"key":"1745_CR27","doi-asserted-by":"publisher","unstructured":"Howard, A.G., Zhu, M., Chen, B., et\u00a0al.: MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications. arXiv (2017). https:\/\/doi.org\/10.48550\/arXiv.1704.04861","DOI":"10.48550\/arXiv.1704.04861"},{"key":"1745_CR28","doi-asserted-by":"publisher","unstructured":"Ding, X., Zhang, X., Ma, N., et\u00a0al.: RepVGG: Making VGG-style ConvNets great again. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13733\u201313742 (2021). https:\/\/doi.org\/10.1109\/CVPR46437.2021.01352","DOI":"10.1109\/CVPR46437.2021.01352"},{"key":"1745_CR29","doi-asserted-by":"publisher","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., et\u00a0al.: An Image is Worth 16\u00a0$$\\times$$\u00a016 Words: Transformers for Image Recognition at Scale. arXiv (2020). https:\/\/doi.org\/10.48550\/arXiv.2010.11929","DOI":"10.48550\/arXiv.2010.11929"},{"key":"1745_CR30","doi-asserted-by":"publisher","unstructured":"Kitaev, N., Kaiser, \u0141., Levskaya, A.: Reformer: The Efficient Transformer. arXiv (2020). https:\/\/doi.org\/10.48550\/arXiv.2001.04451","DOI":"10.48550\/arXiv.2001.04451"},{"key":"1745_CR31","doi-asserted-by":"publisher","first-page":"14541","DOI":"10.48550\/arXiv.2205.13213","volume":"35","author":"Z Pan","year":"2022","unstructured":"Pan, Z., Cai, J., Zhuang, B.: Fast vision transformers with HiLo attention. Adv. Neural. Inf. Process. Syst. 35, 14541\u201314554 (2022). https:\/\/doi.org\/10.48550\/arXiv.2205.13213","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1745_CR32","doi-asserted-by":"publisher","unstructured":"Wang, W., Xie, E., Li, X., et\u00a0al.: Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 568\u2013578 (2021). https:\/\/doi.org\/10.48550\/arXiv.2102.12122","DOI":"10.48550\/arXiv.2102.12122"},{"key":"1745_CR33","doi-asserted-by":"publisher","unstructured":"Wang, S., Li, B.Z., Khabsa, M., et\u00a0al.: Linformer: Self-Attention With Linear Complexity. arXiv (2020). https:\/\/doi.org\/10.48550\/arXiv.2006.04768","DOI":"10.48550\/arXiv.2006.04768"},{"key":"1745_CR34","doi-asserted-by":"publisher","unstructured":"Mehta, S., Rastegari, M.: Separable self-attention for mobile vision transformers. arXiv (2022). https:\/\/doi.org\/10.48550\/arXiv.2206.02680","DOI":"10.48550\/arXiv.2206.02680"},{"key":"1745_CR35","doi-asserted-by":"publisher","first-page":"3386","DOI":"10.48550\/arXiv.2204.07780","volume":"31","author":"G Luo","year":"2022","unstructured":"Luo, G., Zhou, Y., Sun, X., et al.: Towards lightweight transformer via group-wise transformation for vision-and-language tasks. IEEE Trans. Image Process. 31, 3386\u20133398 (2022). https:\/\/doi.org\/10.48550\/arXiv.2204.07780","journal-title":"IEEE Trans. Image Process."},{"key":"1745_CR36","doi-asserted-by":"publisher","unstructured":"Maaz, M., Shaker, A., Cholakkal, H., et al.: Edgenext: efficiently amalgamated CNN-transformer architecture for mobile vision applications. In: European Conference on Computer Vision, pp. 3\u201320. Cham: Springer Nature Switzerland (2022). https:\/\/doi.org\/10.48550\/arXiv.2206.10589","DOI":"10.48550\/arXiv.2206.10589"},{"key":"1745_CR37","doi-asserted-by":"publisher","unstructured":"Liu, Z., Lin, Y., Cao, Y., et\u00a0al.: Swin transformer: Hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10012\u201310022 (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.00986","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1745_CR38","doi-asserted-by":"publisher","first-page":"22470","DOI":"10.48550\/arXiv.2107.05768","volume":"34","author":"H Ren","year":"2021","unstructured":"Ren, H., Dai, H., Dai, Z., et al.: Combiner: full attention transformer with sparse computation cost. Adv. Neural. Inf. Process. Syst. 34, 22470\u201322482 (2021). https:\/\/doi.org\/10.48550\/arXiv.2107.05768","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1745_CR39","doi-asserted-by":"publisher","unstructured":"Chen, M., Peng, H., Fu, J., et al.: Autoformer: Searching transformers for visual recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12270\u201312280 (2021). https:\/\/doi.org\/10.48550\/arXiv.2107.00651","DOI":"10.48550\/arXiv.2107.00651"},{"key":"1745_CR40","doi-asserted-by":"publisher","unstructured":"Mehta, S., Rastegari, M.: MobileViT: Lightweight, General-Purpose, and Mobile-Friendly Vision Transformer. https:\/\/doi.org\/10.48550\/arXiv.2110.02178 (2021)","DOI":"10.48550\/arXiv.2110.02178"},{"key":"1745_CR41","doi-asserted-by":"publisher","unstructured":"Sandler, M., Howard, A., Zhu, M., et\u00a0al.: MobileNetV2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4510\u20134520 (2018). https:\/\/doi.org\/10.48550\/arXiv.1801.04381","DOI":"10.48550\/arXiv.1801.04381"},{"key":"1745_CR42","doi-asserted-by":"publisher","DOI":"10.1145\/3065386","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky, A., et al.: ImageNet classification with deep convolutional neural networks. Adv. Neural. Inf. Process. Syst. (2012). https:\/\/doi.org\/10.1145\/3065386","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1745_CR43","doi-asserted-by":"publisher","unstructured":"Touvron, H., Cord, M., Douze, M., et al.: Training data-efficient image transformers & distillation through attention. In: International Conference on Machine Learning, pp. 10347\u201310357. PMLR (2021). https:\/\/doi.org\/10.48550\/arXiv.2012.12877","DOI":"10.48550\/arXiv.2012.12877"},{"issue":"3","key":"1745_CR44","doi-asserted-by":"publisher","first-page":"415","DOI":"10.48550\/arXiv.2106.13797","volume":"8","author":"W Wang","year":"2022","unstructured":"Wang, W., Xie, E., Li, X., et al.: PVT v2: improved baselines with pyramid vision transformer. Comput. Vis. Med. 8(3), 415\u2013424 (2022). https:\/\/doi.org\/10.48550\/arXiv.2106.13797","journal-title":"Comput. Vis. Med."},{"key":"1745_CR45","doi-asserted-by":"publisher","unstructured":"Dong, X., Bao, J., Chen, D., Zhang, W., Yu, N., Yuan, L., Chen, D., Guo, B.: CSWin Transformer: A general vision transformer backbone with cross-shaped windows. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12124\u201312134 (2022). https:\/\/doi.org\/10.48550\/arXiv.2107.00652","DOI":"10.48550\/arXiv.2107.00652"},{"key":"1745_CR46","doi-asserted-by":"publisher","first-page":"9355","DOI":"10.48550\/arXiv.2104.13840","volume":"34","author":"X Chu","year":"2021","unstructured":"Chu, X., Tian, Z., Wang, Y., et al.: Twins: revisiting the design of spatial attention in vision transformers. Adv. Neural. Inf. Process. Syst. 34, 9355\u20139366 (2021). https:\/\/doi.org\/10.48550\/arXiv.2104.13840","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1745_CR47","doi-asserted-by":"publisher","unstructured":"Fan, H., Xiong, B., Mangalam, K., et al.: Multiscale vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6824\u20136835 (2021). https:\/\/doi.org\/10.48550\/arXiv.2104.11227","DOI":"10.48550\/arXiv.2104.11227"},{"key":"1745_CR48","doi-asserted-by":"publisher","unstructured":"Zhu, L., Wang, X., Ke, Z., et al.: BiFormer: Vision transformer with bi-level routing attention. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10323\u201310333 (2023). https:\/\/doi.org\/10.48550\/arXiv.2303.08810","DOI":"10.48550\/arXiv.2303.08810"},{"key":"1745_CR49","doi-asserted-by":"publisher","unstructured":"Lin, T.Y., Goyal, P., Girshick, R., et al.: Focal loss for dense object detection. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017). https:\/\/doi.org\/10.48550\/arXiv.1708.02002","DOI":"10.48550\/arXiv.1708.02002"},{"key":"1745_CR50","doi-asserted-by":"publisher","first-page":"16344","DOI":"10.48550\/arXiv.2205.14135","volume":"35","author":"T Dao","year":"2022","unstructured":"Dao, T., Fu, D., Ermon, S., et al.: FlashAttention: Fast and memory-efficient exact attention with IO-awareness. Adv. Neural. Inf. Process. Syst. 35, 16344\u201316359 (2022). https:\/\/doi.org\/10.48550\/arXiv.2205.14135","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1745_CR51","doi-asserted-by":"publisher","unstructured":"Hua, W., Dai, Z., Liu, H., et al.: Transformer quality in linear time. In: International Conference on Machine Learning, pp. 9099\u20139117. PMLR (2022). https:\/\/doi.org\/10.48550\/arXiv.2202.10447","DOI":"10.48550\/arXiv.2202.10447"},{"key":"1745_CR52","doi-asserted-by":"publisher","first-page":"711","DOI":"10.48550\/arXiv.2007.00072","volume":"3","author":"A Ivanov","year":"2021","unstructured":"Ivanov, A., Dryden, N., Ben-Nun, T., et al.: Data movement is all you need: a case study on optimizing transformers. Proc. Mach. Learn. Syst. 3, 711\u2013732 (2021). https:\/\/doi.org\/10.48550\/arXiv.2007.00072","journal-title":"Proc. Mach. Learn. Syst."},{"key":"1745_CR53","doi-asserted-by":"publisher","unstructured":"Pan, J., Bulat, A., Tan, F., et al.: EdgeViTs: Competing light-weight CNNs on mobile devices with vision transformers. In: European Conference on Computer Vision, pp. 294\u2013311. Cham: Springer Nature Switzerland (2022). https:\/\/doi.org\/10.48550\/arXiv.2205.03436","DOI":"10.48550\/arXiv.2205.03436"},{"key":"1745_CR54","doi-asserted-by":"publisher","unstructured":"Cai, H., Gan, C., Han, S.: EfficientViT: Enhanced Linear Attention for High-Resolution Low-Computation Visual Recognition. https:\/\/doi.org\/10.48550\/arXiv.2205.14756 (2022)","DOI":"10.48550\/arXiv.2205.14756"},{"key":"1745_CR55","doi-asserted-by":"publisher","first-page":"12934","DOI":"10.48550\/arXiv.2206.01191","volume":"35","author":"Y Li","year":"2022","unstructured":"Li, Y., Yuan, G., Wen, Y., et al.: EfficientFormer: vision transformers at MobileNet speed. Adv. Neural. Inf. Process. Syst. 35, 12934\u201312949 (2022). https:\/\/doi.org\/10.48550\/arXiv.2206.01191","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"1745_CR56","doi-asserted-by":"publisher","unstructured":"Wu, H., Xiao, B., Codella, N., et\u00a0al.: CVT: Introducing convolutions to vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 22\u201331 (2021). https:\/\/doi.org\/10.48550\/arXiv.2103.15808","DOI":"10.48550\/arXiv.2103.15808"},{"key":"1745_CR57","doi-asserted-by":"publisher","unstructured":"Gong, C., Wang, D.: NASViT: neural architecture search for efficient vision transformers with gradient conflict-aware supernet training. In: ICLR Proceedings (2022). https:\/\/doi.org\/10.48550\/arXiv.2201.09771","DOI":"10.48550\/arXiv.2201.09771"},{"key":"1745_CR58","doi-asserted-by":"publisher","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., et\u00a0al.: Attention is all you need. Adv. Neural Inf. Process. Syst. (2017). https:\/\/doi.org\/10.48550\/arXiv.1706.03762","DOI":"10.48550\/arXiv.1706.03762"},{"key":"1745_CR59","doi-asserted-by":"publisher","unstructured":"Yu, W., Luo, M., Zhou, P., et\u00a0al.: MetaFormer is actually what you need for vision. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10819\u201310829 (2022). https:\/\/doi.org\/10.48550\/arXiv.2111.11418","DOI":"10.48550\/arXiv.2111.11418"},{"key":"1745_CR60","doi-asserted-by":"publisher","unstructured":"Yun, S., Ro, Y.: ShViT: Single-head vision transformer with memory-efficient macro design. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5756\u20135767 (2024). https:\/\/doi.org\/10.48550\/arXiv.2401.16456","DOI":"10.48550\/arXiv.2401.16456"},{"key":"1745_CR61","doi-asserted-by":"publisher","unstructured":"Howard, A., Sandler, M., Chu, G., et\u00a0al.: Searching for MobileNetV3. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1314\u20131324 (2019). https:\/\/doi.org\/10.1109\/ICCV.2019.00140","DOI":"10.1109\/ICCV.2019.00140"},{"key":"1745_CR62","doi-asserted-by":"publisher","unstructured":"Wadekar, S.N., Chaurasia, A.: MobileViTv3: Mobile-Friendly Vision Transformer with Simple and Effective Fusion of Local, Global and Input Features. arXiv (2022). https:\/\/doi.org\/10.48550\/arXiv.2209.15159","DOI":"10.48550\/arXiv.2209.15159"},{"key":"1745_CR63","doi-asserted-by":"publisher","unstructured":"Shaker, A., Maaz, M., Rasheed, H., Khan, S., Yang, M.-H., Khan, F.S.: SwiftFormer: efficient additive attention for transformer-based real-time mobile vision applications. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 17379\u201317390. IEEE (2023). https:\/\/doi.org\/10.1109\/ICCV51070.2023.01598","DOI":"10.1109\/ICCV51070.2023.01598"},{"key":"1745_CR64","doi-asserted-by":"publisher","unstructured":"Yu, W., Wang, X.: MambaOut: Do We Really Need Mamba for Vision? arXiv (2024). https:\/\/doi.org\/10.48550\/arXiv.2405.07992","DOI":"10.48550\/arXiv.2405.07992"},{"key":"1745_CR65","doi-asserted-by":"publisher","unstructured":"Han, D., Wang, Z., Xia, Z., Han, Y., Pu, Y., Ge, C., Song, J., Song, S., Zheng, B., Huang, G.: Demystify Mamba in Vision: A Linear Attention Perspective. arXiv (2024). https:\/\/doi.org\/10.48550\/arXiv.2405.16605","DOI":"10.48550\/arXiv.2405.16605"},{"issue":"7","key":"1745_CR66","doi-asserted-by":"publisher","first-page":"8823","DOI":"10.1109\/TITS.2021.3086411","volume":"23","author":"P Li","year":"2022","unstructured":"Li, P., et al.: Driver distraction detection using octave-like convolutional neural network. IEEE Trans. Intell. Transp. Syst. 23(7), 8823\u20138833 (2022). https:\/\/doi.org\/10.1109\/TITS.2021.3086411","journal-title":"IEEE Trans. Intell. Transp. Syst."},{"issue":"10","key":"1745_CR67","doi-asserted-by":"publisher","first-page":"18248","DOI":"10.3934\/mbe.2023811","volume":"20","author":"Z Zhu","year":"2023","unstructured":"Zhu, Z., Wang, S., Gu, S., Li, Y., Li, J., Shuai, L., Qi, G.: Driver distraction detection based on lightweight networks and tiny object detection. Math. Biosci. Eng. 20(10), 18248\u201318266 (2023). https:\/\/doi.org\/10.3934\/mbe.2023811","journal-title":"Math. Biosci. Eng."},{"key":"1745_CR68","doi-asserted-by":"publisher","first-page":"117342","DOI":"10.1016\/j.image.2025.117342","volume":"138","author":"S Gu","year":"2025","unstructured":"Gu, S., Wen, B., Chen, S., Li, Y., Qi, G., Shuai, L., Zhu, Z.: Driver distraction detection based on adaptive tiny targets and lightweight networks. Signal Process. Image Commun. 138, 117342 (2025). https:\/\/doi.org\/10.1016\/j.image.2025.117342","journal-title":"Signal Process. Image Commun."},{"key":"1745_CR69","doi-asserted-by":"publisher","first-page":"109921","DOI":"10.1016\/j.engappai.2024.109921","volume":"143","author":"H Sun","year":"2025","unstructured":"Sun, H., Ma, Y.: MAViT: a lightweight hybrid model with mutual attention mechanism for driver behavior recognition. Eng. Appl. Artif. Intell. 143, 109921 (2025). https:\/\/doi.org\/10.1016\/j.engappai.2024.109921","journal-title":"Eng. Appl. Artif. Intell."}],"container-title":["Journal of Real-Time Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-025-01745-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11554-025-01745-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-025-01745-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,6]],"date-time":"2025-10-06T14:39:11Z","timestamp":1759761551000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11554-025-01745-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,14]]},"references-count":69,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["1745"],"URL":"https:\/\/doi.org\/10.1007\/s11554-025-01745-4","relation":{},"ISSN":["1861-8200","1861-8219"],"issn-type":[{"value":"1861-8200","type":"print"},{"value":"1861-8219","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,14]]},"assertion":[{"value":"10 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 August 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 August 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"164"}}