{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,3]],"date-time":"2026-06-03T14:23:56Z","timestamp":1780496636677,"version":"3.54.1"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2022,1,7]],"date-time":"2022-01-07T00:00:00Z","timestamp":1641513600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,7]],"date-time":"2022-01-07T00:00:00Z","timestamp":1641513600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"the national key r&d program of china","award":["2020YFB1600400"],"award-info":[{"award-number":["2020YFB1600400"]}]},{"DOI":"10.13039\/501100001809","name":"national natural science foundation of china","doi-asserted-by":"publisher","award":["61806198"],"award-info":[{"award-number":["61806198"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"the key research and development program of guangzhou","award":["202007050002"],"award-info":[{"award-number":["202007050002"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2022,4]]},"DOI":"10.1007\/s11227-021-04151-2","type":"journal-article","created":{"date-parts":[[2022,1,7]],"date-time":"2022-01-07T11:03:33Z","timestamp":1641553413000},"page":"8268-8284","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":44,"title":["Driver attention prediction based on convolution and transformers"],"prefix":"10.1007","volume":"78","author":[{"given":"Chao","family":"Gou","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7041-559X","authenticated-orcid":false,"given":"Yuchen","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dan","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,1,7]]},"reference":[{"key":"4151_CR1","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1631\/FITEE.1700053","volume":"18","author":"Z Nanning","year":"2017","unstructured":"Nanning Z, Liu Z, Pengju R, Ma Y, Chen ST, Yu S, Xue J, Chen B, Wang F (2017) Hybrid-augmented intelligence: collaboration and cognition. Front Inf Technol Electron Eng 18:153\u2013179","journal-title":"Front Inf Technol Electron Eng"},{"key":"4151_CR2","doi-asserted-by":"crossref","unstructured":"A Tawari, B Kang (2017) A computational framework for drivers visual attention using a fully convolutional architecture. IEEE Intelligent Vehicles Symposium (IV), pp. 887\u2013894","DOI":"10.1109\/IVS.2017.7995828"},{"issue":"7","key":"4151_CR3","doi-asserted-by":"publisher","first-page":"1720","DOI":"10.1109\/TPAMI.2018.2845370","volume":"41","author":"A Palazzi","year":"2018","unstructured":"Palazzi A, Abati D, Solera F, Cucchiara R et al (2018) Predicting the drivers focus of attention: the dr (eye) ve project. IEEE Trans Pattern Anal Mach Intell 41(7):1720\u20131733","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"4151_CR4","doi-asserted-by":"crossref","unstructured":"Y Xia, J Kim, J Canny, K Zipser, T Canas-Bajo, D Whitney (2020) Periphery-fovea multi-resolution driving model guided by human attention. In: The IEEE Winter Conference on Applications of Computer Vision, pp. 1767\u20131775","DOI":"10.1109\/WACV45572.2020.9093524"},{"key":"4151_CR5","doi-asserted-by":"crossref","unstructured":"A Pal, Mondal S, Christensen H (2020) looking at the right stuff guided semantic-gaze for autonomous driving. In: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 11880\u201311889","DOI":"10.1109\/CVPR42600.2020.01190"},{"key":"4151_CR6","unstructured":"A Vaswani, N Shazeer, N Parmar, J Uszkoreit, L Jones, AN Gomez, L Kaiser, Illia Polosukhin (2017) Attention is all you need. ArXiv, abs\/1706.03762"},{"key":"4151_CR7","doi-asserted-by":"crossref","unstructured":"Zheng S, Lu J, Zhao H, Zhu X, Luo Z, Wang Y, Fu Y, Feng J, Xiang T, Torr PH, Zhang L. (2020)Rethinking semantic segmentation from a sequence-to-sequence perspective with transformers. ArXiv, abs\/2012.15840","DOI":"10.1109\/CVPR46437.2021.00681"},{"key":"4151_CR8","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, Uszkoreit J, Houlsby N (2020) An image is worth 16x16 words: transformers for image recognition at scale. ArXiv, abs\/2010.11929"},{"key":"4151_CR9","unstructured":"Han K, Xiao A, Wu E, Guo J, Xu C, Wang Y (2021) Transformer in transformer. ArXiv, abs\/2103.00112"},{"issue":"5","key":"4151_CR10","doi-asserted-by":"publisher","first-page":"2146","DOI":"10.1109\/TITS.2019.2915540","volume":"21","author":"T Deng","year":"2020","unstructured":"Deng T, Yan H, Qin L, Ngo T, Manjunath BS (2020) How do drivers allocate their potential attention? driving fixation prediction via convolutional neural networks. IEEE Trans Intell Transp Syst 21(5):2146\u20132154","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"4151_CR11","doi-asserted-by":"crossref","unstructured":"Fang J, Yan D, Qiao J, Xue J, Yu H (2021) Dada: driver attention prediction in driving accident scenarios. In: IEEE Transactions on Intelligent Transportation Systems, pp. 1\u201313","DOI":"10.1109\/TITS.2020.3044678"},{"key":"4151_CR12","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European Conference on Computer Vision, pp. 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"4151_CR13","unstructured":"Yan H, Li Z, Li W, Wang C, Wu M, Zhang C (2021) Contnet: Why not use convolution and transformer at the same time? ArXiv, abs\/2104.13497"},{"key":"4151_CR14","unstructured":"Yang G, Tang H, Ding M, Sebe N, Ricci E (2021)Transformers solve the limited receptive field for monocular depth prediction. ArXiv, abs\/2103.12091"},{"key":"4151_CR15","doi-asserted-by":"crossref","unstructured":"Xia Y, Zhang D, Kim J, Nakayama K, Zipser K, Whitney D (2018)Predicting driver attention in critical situations. In: Asian conference on computer vision, pp. 658\u2013674. Springer","DOI":"10.1007\/978-3-030-20873-8_42"},{"key":"4151_CR16","doi-asserted-by":"crossref","unstructured":"Kim J, Rohrbach A, Darrell T, Canny J, Akata Z (2018) Textual explanations for self-driving vehicles. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 577\u2013593,","DOI":"10.1007\/978-3-030-01216-8_35"},{"issue":"4715","key":"4151_CR17","doi-asserted-by":"publisher","first-page":"782","DOI":"10.1126\/science.4023713","volume":"229","author":"J Moran","year":"1985","unstructured":"Moran J, Desimone R (1985) Selective attention gates visual processing in the extrastriate cortex. Science 229(4715):782\u20134","journal-title":"Science"},{"key":"4151_CR18","unstructured":"Alaparthi S, Mishra M (2020) Bidirectional encoder representations from transformers (bert): a sentiment analysis odyssey. ArXiv, abs\/2007.01127"},{"key":"4151_CR19","doi-asserted-by":"crossref","unstructured":"Prakash A, Chitta K, Geiger A (2021) Multi-modal fusion transformer for end-to-end autonomous driving. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7077\u20137087","DOI":"10.1109\/CVPR46437.2021.00700"},{"key":"4151_CR20","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3082763","author":"Z Yuan","year":"2021","unstructured":"Yuan Z, Song X, Bai L, Wang Z, Ouyang W (2021) Temporal-channel transformer for 3d lidar-based video object detection for autonomous driving. IEEE Trans Circuits Syst Video Technol. https:\/\/doi.org\/10.1109\/TCSVT.2021.3082763","journal-title":"IEEE Trans Circuits Syst Video Technol"},{"key":"4151_CR21","doi-asserted-by":"crossref","unstructured":"Sheng H, Cai S, Liu Y, Deng B, Huang J, Hua XS, Zhao MJ (2021) Improving 3d object detection with channel-wise transformer. In:Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV), pp.2743\u20132752","DOI":"10.1109\/ICCV48922.2021.00274"},{"key":"4151_CR22","doi-asserted-by":"publisher","first-page":"2999","DOI":"10.1109\/TITS.2018.2870909","volume":"20","author":"A Morando","year":"2019","unstructured":"Morando A, Victor T, Dozza M (2019) A reference model for driver attention in automation: Glance behavior changes during lateral and longitudinal assistance. IEEE Trans Intell Transp Syst 20:2999\u20133009","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"4151_CR23","doi-asserted-by":"crossref","unstructured":"Fang J, Yan D, Qiao J, Xue J, Wang H, Li S (2019) Dada-2000: can driving accident be predicted by driver attention analyzed by a benchmark. In: 2019 IEEE Intelligent Transportation Systems Conference (ITSC), pp. 4303\u20134309. IEEE","DOI":"10.1109\/ITSC.2019.8917218"},{"key":"4151_CR24","doi-asserted-by":"publisher","first-page":"4198","DOI":"10.1109\/TMM.2020.3038311","volume":"23","author":"K Lv","year":"2020","unstructured":"Lv K, Sheng H, Xiong Z, Li W, Zheng L (2020) Improving driver gaze prediction with reinforced attention. IEEE Trans Multimed 23:4198\u20134207","journal-title":"IEEE Trans Multimed"},{"issue":"9","key":"4151_CR25","doi-asserted-by":"publisher","first-page":"3059","DOI":"10.1109\/TITS.2017.2766216","volume":"19","author":"T Deng","year":"2018","unstructured":"Deng T, Yan H, Li YJ (2018) Learning to boost bottom-up fixation prediction in driving environments via random forest. IEEE Trans Intell Transp Syst 19(9):3059\u20133067","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"4151_CR26","doi-asserted-by":"crossref","unstructured":"Tawari A, Mallela P, Martin S (2018) Learning to attend to salient targets in driving videos using fully convolutional rnn. In: 2018 21st International Conference on Intelligent Transportation Systems (ITSC), pp. 3225\u20133232. IEEE","DOI":"10.1109\/ITSC.2018.8569438"},{"key":"4151_CR27","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2021.3053178","author":"F Lateef","year":"2021","unstructured":"Lateef F, Kas M, Ruichek Y (2021) Saliency heat-map as visual attention for autonomous driving using generative adversarial network (gan). IEEE Trans Intell Transp Syst. https:\/\/doi.org\/10.1109\/TITS.2021.3053178","journal-title":"IEEE Trans Intell Transp Syst"},{"key":"4151_CR28","doi-asserted-by":"crossref","unstructured":"Shirpour M, Beauchemin S, Bauer M (2021) Driver\u2019s eye fixation prediction by deep neural network. In: VISIGRAPP (4: VISAPP), pp. 67\u201375","DOI":"10.5220\/0010220800670075"},{"key":"4151_CR29","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2018","unstructured":"Chen LC, Papandreou G, Kokkinos I, Murphy K, Yuille A (2018) Deeplab: semantic image segmentation with deep convolutional nets, atrous convolution and fully connected crfs. IEEE Trans Pattern Anal Mach Intell 40:834\u2013848","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"4151_CR30","doi-asserted-by":"crossref","unstructured":"Xu H, Gao Y, Yu F, Darrell T (2017) End-to-end learning of driving models from large-scale video datasets. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3530\u20133538","DOI":"10.1109\/CVPR.2017.376"},{"key":"4151_CR31","unstructured":"Yu F, Xian W, Chen Y, Liu F, Liao M, Madhavan V, Darrell T (2018) Bdd100k: a diverse driving video database with scalable annotation tooling. ArXiv, abs\/1805.04687"},{"key":"4151_CR32","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) Yolo9000: better, faster, stronger. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7263\u20137271","DOI":"10.1109\/CVPR.2017.690"},{"key":"4151_CR33","doi-asserted-by":"publisher","first-page":"251","DOI":"10.3758\/s13428-012-0226-9","volume":"45","author":"O Meur","year":"2013","unstructured":"Meur O, Baccino T (2013) Methods for comparing scanpaths and saliency maps: strengths and weaknesses. Behav Res Methods 45:251\u2013266","journal-title":"Behav Res Methods"},{"key":"4151_CR34","doi-asserted-by":"crossref","unstructured":"Wang W, Shen J, Xie J, Cheng MM, Ling H, Borji A (2021) Revisiting video saliency prediction in the deep learning era. In: IEEE Transactions on Pattern Analysis and Machine Intelligence, pp. 43:220\u2013237","DOI":"10.1109\/TPAMI.2019.2924417"},{"issue":"11","key":"4151_CR35","doi-asserted-by":"publisher","first-page":"1254","DOI":"10.1109\/34.730558","volume":"20","author":"L Itti","year":"1998","unstructured":"Itti L, Koch C, Niebur E (1998) A model of saliency-based visual attention for rapid scene analysis. IEEE Trans Pattern Anal Mach Intell 20(11):1254\u20131259","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"4151_CR36","unstructured":"Harel J, Koch C, Perona P (2006) Graph-based visual saliency. In: Neural Information Processing Systems (NIPS), pp. 545\u2013552"},{"key":"4151_CR37","doi-asserted-by":"crossref","unstructured":"Huang X, Shen C, Boix X, Zhao Q (2015) Salicon: Reducing the semantic gap in saliency prediction by adapting deep neural networks. In: 2015 IEEE International Conference on Computer Vision (ICCV), pp. 262\u2013270","DOI":"10.1109\/ICCV.2015.38"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-021-04151-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-021-04151-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-021-04151-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,1]],"date-time":"2022-04-01T14:12:54Z","timestamp":1648822374000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-021-04151-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,7]]},"references-count":37,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2022,4]]}},"alternative-id":["4151"],"URL":"https:\/\/doi.org\/10.1007\/s11227-021-04151-2","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"value":"0920-8542","type":"print"},{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,1,7]]},"assertion":[{"value":"15 October 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 January 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}