{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,8,14]],"date-time":"2024-08-14T21:13:32Z","timestamp":1723670012444},"reference-count":40,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,6,1]],"date-time":"2024-06-01T00:00:00Z","timestamp":1717200000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1016\/j.patcog.2024.110357","type":"journal-article","created":{"date-parts":[[2024,2,21]],"date-time":"2024-02-21T11:20:32Z","timestamp":1708514432000},"page":"110357","update-policy":"http:\/\/dx.doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":2,"special_numbering":"C","title":["Efficient image analysis with triple attention vision transformer"],"prefix":"10.1016","volume":"150","author":[{"ORCID":"http:\/\/orcid.org\/0000-0003-4147-6876","authenticated-orcid":false,"given":"Gehui","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"http:\/\/orcid.org\/0000-0002-1992-3900","authenticated-orcid":false,"given":"Tongtong","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2024.110357_b1","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.108620","article-title":"LiTMNet: A deep CNN for efficient HDR image reconstruction from a single LDR image","volume":"127","author":"Wu","year":"2022","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2024.110357_b2","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2022.108827","article-title":"GasHis-transformer: A multi-scale visual transformer approach for gastric histopathological image detection","volume":"130","author":"Chen","year":"2022","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.patcog.2024.110357_b3","first-page":"3965","article-title":"Coatnet: Marrying convolution and attention for all data sizes","volume":"34","author":"Dai","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2024.110357_b4","doi-asserted-by":"crossref","unstructured":"W. Wang, E. Xie, X. Li, D.-P. Fan, K. Song, D. Liang, T. Lu, P. Luo, L. Shao, Pyramid vision transformer: A versatile backbone for dense prediction without convolutions, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 568\u2013578.","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"10.1016\/j.patcog.2024.110357_b5","doi-asserted-by":"crossref","unstructured":"Z. Liu, Y. Lin, Y. Cao, H. Hu, Y. Wei, Z. Zhang, S. Lin, B. Guo, Swin transformer: Hierarchical vision transformer using shifted windows, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 10012\u201310022.","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"10.1016\/j.patcog.2024.110357_b6","series-title":"International Conference on Machine Learning","first-page":"12633","article-title":"Global context vision transformers","author":"Hatamizadeh","year":"2023"},{"key":"10.1016\/j.patcog.2024.110357_b7","series-title":"European Conference on Computer Vision","first-page":"459","article-title":"Maxvit: Multi-axis vision transformer","author":"Tu","year":"2022"},{"key":"10.1016\/j.patcog.2024.110357_b8","series-title":"European Conference on Computer Vision","first-page":"74","article-title":"Davit: Dual attention vision transformers","author":"Ding","year":"2022"},{"key":"10.1016\/j.patcog.2024.110357_b9","series-title":"Resnet in resnet: Generalizing residual architectures","author":"Targ","year":"2016"},{"key":"10.1016\/j.patcog.2024.110357_b10","doi-asserted-by":"crossref","unstructured":"H. Wu, B. Xiao, N. Codella, M. Liu, X. Dai, L. Yuan, L. Zhang, Cvt: Introducing convolutions to vision transformers, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 22\u201331.","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"10.1016\/j.patcog.2024.110357_b11","doi-asserted-by":"crossref","unstructured":"C.-F.R. Chen, Q. Fan, R. Panda, Crossvit: Cross-attention multi-scale vision transformer for image classification, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 357\u2013366.","DOI":"10.1109\/ICCV48922.2021.00041"},{"key":"10.1016\/j.patcog.2024.110357_b12","doi-asserted-by":"crossref","unstructured":"L. Yuan, Y. Chen, T. Wang, W. Yu, Y. Shi, Z.-H. Jiang, F.E. Tay, J. Feng, S. Yan, Tokens-to-token vit: Training vision transformers from scratch on imagenet, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 558\u2013567.","DOI":"10.1109\/ICCV48922.2021.00060"},{"key":"10.1016\/j.patcog.2024.110357_b13","doi-asserted-by":"crossref","unstructured":"X. Mao, G. Qi, Y. Chen, X. Li, R. Duan, S. Ye, Y. He, H. Xue, Towards robust vision transformer, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 12042\u201312051.","DOI":"10.1109\/CVPR52688.2022.01173"},{"key":"10.1016\/j.patcog.2024.110357_b14","series-title":"Conditional positional encodings for vision transformers","author":"Chu","year":"2021"},{"key":"10.1016\/j.patcog.2024.110357_b15","doi-asserted-by":"crossref","unstructured":"P. Zhang, X. Dai, J. Yang, B. Xiao, L. Yuan, L. Zhang, J. Gao, Multi-scale vision longformer: A new vision transformer for high-resolution image encoding, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 2998\u20133008.","DOI":"10.1109\/ICCV48922.2021.00299"},{"issue":"3","key":"10.1016\/j.patcog.2024.110357_b16","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","article-title":"Pvt v2: Improved baselines with pyramid vision transformer","volume":"8","author":"Wang","year":"2022","journal-title":"Comput. Vis. Media"},{"key":"10.1016\/j.patcog.2024.110357_b17","doi-asserted-by":"crossref","unstructured":"W. Yu, M. Luo, P. Zhou, C. Si, Y. Zhou, X. Wang, J. Feng, S. Yan, Metaformer is actually what you need for vision, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 10819\u201310829.","DOI":"10.1109\/CVPR52688.2022.01055"},{"key":"10.1016\/j.patcog.2024.110357_b18","series-title":"Focal self-attention for local-global interactions in vision transformers","author":"Yang","year":"2021"},{"key":"10.1016\/j.patcog.2024.110357_b19","doi-asserted-by":"crossref","unstructured":"Y. Lee, J. Kim, J. Willette, S.J. Hwang, Mpvit: Multi-path vision transformer for dense prediction, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 7287\u20137296.","DOI":"10.1109\/CVPR52688.2022.00714"},{"key":"10.1016\/j.patcog.2024.110357_b20","series-title":"Pyramidtnt: Improved transformer-in-transformer baselines with pyramid architecture","author":"Han","year":"2022"},{"key":"10.1016\/j.patcog.2024.110357_b21","doi-asserted-by":"crossref","DOI":"10.1109\/TPAMI.2023.3268446","article-title":"Dual vision transformer","author":"Yao","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2024.110357_b22","unstructured":"A. Brock, S. De, S.L. Smith, Characterizing signal propagation to close the performance gap in unnormalized ResNets, in: 9th International Conference on Learning Representations, ICLR, 2021."},{"key":"10.1016\/j.patcog.2024.110357_b23","doi-asserted-by":"crossref","unstructured":"Z. Liu, H. Mao, C.-Y. Wu, C. Feichtenhofer, T. Darrell, S. Xie, A convnet for the 2020s, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 11976\u201311986.","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"10.1016\/j.patcog.2024.110357_b24","series-title":"Shuffle transformer: Rethinking spatial shuffle for vision transformer","author":"Huang","year":"2021"},{"key":"10.1016\/j.patcog.2024.110357_b25","doi-asserted-by":"crossref","unstructured":"X. Dong, J. Bao, D. Chen, W. Zhang, N. Yu, L. Yuan, D. Chen, B. Guo, Cswin transformer: A general vision transformer backbone with cross-shaped windows, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 12124\u201312134.","DOI":"10.1109\/CVPR52688.2022.01181"},{"key":"10.1016\/j.patcog.2024.110357_b26","article-title":"Uniformer: Unifying convolution and self-attention for visual recognition","author":"Li","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.patcog.2024.110357_b27","series-title":"European Conference on Computer Vision","first-page":"740","article-title":"Microsoft coco: Common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.patcog.2024.110357_b28","doi-asserted-by":"crossref","unstructured":"K. He, G. Gkioxari, P. Doll\u00e1r, R. Girshick, Mask r-cnn, in: Proceedings of the IEEE International Conference on Computer Vision, 2017, pp. 2961\u20132969.","DOI":"10.1109\/ICCV.2017.322"},{"key":"10.1016\/j.patcog.2024.110357_b29","doi-asserted-by":"crossref","unstructured":"Y. Zhou, O. Tuzel, Voxelnet: End-to-end learning for point cloud based 3d object detection, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 4490\u20134499.","DOI":"10.1109\/CVPR.2018.00472"},{"key":"10.1016\/j.patcog.2024.110357_b30","doi-asserted-by":"crossref","unstructured":"A.H. Lang, S. Vora, H. Caesar, L. Zhou, J. Yang, O. Beijbom, Pointpillars: Fast encoders for object detection from point clouds, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2019, pp. 12697\u201312705.","DOI":"10.1109\/CVPR.2019.01298"},{"key":"10.1016\/j.patcog.2024.110357_b31","doi-asserted-by":"crossref","unstructured":"T. Yin, X. Zhou, P. Krahenbuhl, Center-based 3d object detection and tracking, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 11784\u201311793.","DOI":"10.1109\/CVPR46437.2021.01161"},{"issue":"10","key":"10.1016\/j.patcog.2024.110357_b32","doi-asserted-by":"crossref","first-page":"3337","DOI":"10.3390\/s18103337","article-title":"Second: Sparsely embedded convolutional detection","volume":"18","author":"Yan","year":"2018","journal-title":"Sensors"},{"key":"10.1016\/j.patcog.2024.110357_b33","doi-asserted-by":"crossref","unstructured":"H. Caesar, V. Bankiti, A.H. Lang, S. Vora, V.E. Liong, Q. Xu, A. Krishnan, Y. Pan, G. Baldan, O. Beijbom, nuscenes: A multimodal dataset for autonomous driving, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 11621\u201311631.","DOI":"10.1109\/CVPR42600.2020.01164"},{"issue":"11","key":"10.1016\/j.patcog.2024.110357_b34","doi-asserted-by":"crossref","first-page":"1231","DOI":"10.1177\/0278364913491297","article-title":"Vision meets robotics: The kitti dataset","volume":"32","author":"Geiger","year":"2013","journal-title":"Int. J. Robot. Res."},{"key":"10.1016\/j.patcog.2024.110357_b35","unstructured":"I. Loshchilov, F. Hutter, Decoupled Weight Decay Regularization, in: International Conference on Learning Representations, 2018."},{"key":"10.1016\/j.patcog.2024.110357_b36","series-title":"European Conference on Computer Vision","first-page":"649","article-title":"Colorful image colorization","author":"Zhang","year":"2016"},{"key":"10.1016\/j.patcog.2024.110357_b37","unstructured":"J.-W. Su, H.-K. Chu, J.-B. Huang, Instance-aware image colorization, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 7968\u20137977."},{"key":"10.1016\/j.patcog.2024.110357_b38","doi-asserted-by":"crossref","unstructured":"Y. Wu, X. Wang, Y. Li, H. Zhang, X. Zhao, Y. Shan, Towards vivid and diverse image colorization with generative color prior, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 14377\u201314386.","DOI":"10.1109\/ICCV48922.2021.01411"},{"key":"10.1016\/j.patcog.2024.110357_b39","series-title":"European Conference on Computer Vision","first-page":"350","article-title":"Bigcolor: colorization using a generative color prior for natural images","author":"Kim","year":"2022"},{"key":"10.1016\/j.patcog.2024.110357_b40","series-title":"European Conference on Computer Vision","first-page":"20","article-title":"ColorFormer: Image colorization via color memory assisted hybrid-attention transformer","author":"Ji","year":"2022"}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320324001080?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320324001080?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,3,12]],"date-time":"2024-03-12T01:20:30Z","timestamp":1710206430000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320324001080"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6]]},"references-count":40,"alternative-id":["S0031320324001080"],"URL":"http:\/\/dx.doi.org\/10.1016\/j.patcog.2024.110357","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2024,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Efficient image analysis with triple attention vision transformer","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2024.110357","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier Ltd. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"110357"}}