{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T12:03:29Z","timestamp":1781006609812,"version":"3.54.1"},"reference-count":64,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100003968","name":"Iran National Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003968","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.knosys.2026.116082","type":"journal-article","created":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T23:57:08Z","timestamp":1777161428000},"page":"116082","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Mult-Pool Self Attention: a lightweight attention with linear complexity"],"prefix":"10.1016","volume":"345","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7938-5382","authenticated-orcid":false,"given":"Seyed Mohammad","family":"Hatefi","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0509-6219","authenticated-orcid":false,"given":"Hossein","family":"Mahvash Mohammadi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3218-9787","authenticated-orcid":false,"given":"Hamidreza","family":"Baradaran Kashani","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.knosys.2026.116082_bib0001","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","article-title":"Swin Transformer: hierarchical vision Transformer using shifted Windows","author":"Liu","year":"2021"},{"key":"10.1016\/j.knosys.2026.116082_bib0002","series-title":"European Conference on Computer Vision","article-title":"EdgeViTs: competing light-weight CNNs on mobile devices with vision transformers","author":"Pan","year":"2022"},{"key":"10.1016\/j.knosys.2026.116082_bib0003","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"CMT: convolutional neural networks meet vision transformers","author":"Guo","year":"2022"},{"key":"10.1016\/j.knosys.2026.116082_bib0004","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","article-title":"Rethinking vision transformers for MobileNet size and speed","author":"Li","year":"2023"},{"key":"10.1016\/j.knosys.2026.116082_bib0005","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"Mobile-former: bridging MobileNet and Transformer","author":"Chen","year":"2022"},{"key":"10.1016\/j.knosys.2026.116082_bib0006","unstructured":"S. Mehta and M. Rastegari, \"Separable self-attention for Mobile vision transformers,\" arXiv preprint arXiv:2206.02680, 2022."},{"key":"10.1016\/j.knosys.2026.116082_bib0007","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","article-title":"SwiftFormer: efficient additive attention for transformer-based real-time mobile vision applications","author":"Shaker","year":"2023"},{"key":"10.1016\/j.knosys.2026.116082_bib0008","unstructured":"T. Zhang, L. Li, Y. Zhou, W. Liu, C. Qian, J.N. Hwang and X. Ji, \"CAS-ViT: convolutional additive self-attention vision transformers for efficient mobile applications,\" arXiv preprint arXiv:2408.03703, 2024."},{"key":"10.1016\/j.knosys.2026.116082_bib0009","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"MetaFormer is actually what you need for vision","author":"Yu","year":"2022"},{"key":"10.1016\/j.knosys.2026.116082_bib0010","series-title":"2009 IEEE Conference on Computer Vision and Pattern Recognition","article-title":"ImageNet: a large-scale hierarchical image database","author":"Deng","year":"2009"},{"key":"10.1016\/j.knosys.2026.116082_bib0011","unstructured":"A.G. Howard, M. Zhu, B. Chen, D. Kalenichenko, W. Wang, T. Weyand, M. Andreetto and H. Adam, \"MobileNets: efficient convolutional neural networks for mobile vision applications,\" arXiv preprint arXiv:1704.04861, 2017."},{"key":"10.1016\/j.knosys.2026.116082_bib0012","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"MobileNetV2: inverted residuals and linear bottlenecks","author":"Sandler","year":"2018"},{"key":"10.1016\/j.knosys.2026.116082_bib0013","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","article-title":"Searching for MobileNetV3","author":"Howard","year":"2019"},{"key":"10.1016\/j.knosys.2026.116082_bib0014","series-title":"European Conference on Computer Vision","article-title":"MobileNetV4: universal models for the mobile ecosystem","author":"Qin","year":"2024"},{"key":"10.1016\/j.knosys.2026.116082_bib0015","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"ShuffleNet: an extremely efficient convolutional neural network for mobile devices","author":"Zhang","year":"2018"},{"key":"10.1016\/j.knosys.2026.116082_bib0016","series-title":"European Conference on Computer Vision","article-title":"ShuffleNet V2: practical guidelines for efficient CNN architecture design","author":"Ma","year":"2018"},{"key":"10.1016\/j.knosys.2026.116082_bib0017","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"MnasNet: platform-aware neural architecture search for mobile","author":"Tan","year":"2019"},{"key":"10.1016\/j.knosys.2026.116082_bib0018","series-title":"International Conference on Machine Learning","article-title":"EfficientNet: rethinking model scaling for convolutional neural networks","author":"Tan","year":"2019"},{"key":"10.1016\/j.knosys.2026.116082_bib0019","series-title":"International Conference on Machine Learning","article-title":"EfficientNetV2: smaller models and faster training","author":"Tan","year":"2021"},{"key":"10.1016\/j.knosys.2026.116082_bib0020","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"GhostNet: more features from cheap operations","author":"Han","year":"2020"},{"key":"10.1016\/j.knosys.2026.116082_bib0021","first-page":"9969","article-title":"GhostNetV2: enhance cheap operation with long-range attention","author":"Tang","year":"2022","journal-title":"Adv. Neural. Inf. Process Syst."},{"key":"10.1016\/j.knosys.2026.116082_bib0022","unstructured":"Z. Liu, Z. Hao, K. Han, Y. Tang and Y. Wang, \"GhostNetV3: exploring the training strategies for compact models,\" arXiv preprint arXiv:2404.11202, 2024."},{"key":"10.1016\/j.knosys.2026.116082_bib0023","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"InceptionNeXt: when Inception meets ConvNeXt","author":"Yu","year":"2024"},{"key":"10.1016\/j.knosys.2026.116082_bib0024","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"RepViT: revisiting mobile CNN from ViT perspective","author":"Wang","year":"2024"},{"key":"10.1016\/j.knosys.2026.116082_bib0025","unstructured":"A. Dosovitskiy, L. Beyer, A. Kolesnikov, D. Weissenborn, X. Zhai, T. Unterthiner, M. Dehghani, M. Minderer, G. Heigold, S. Gelly, J. Uszkoreit and N. Houlsby, \"An image is worth 16x16 words: transformers for image recognition at scale,\" arXiv preprint arXiv:2010.11929, 2020."},{"key":"10.1016\/j.knosys.2026.116082_bib0026","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","article-title":"Pyramid vision transformer: a versatile backbone for dense prediction without convolutions","author":"Wang","year":"2021"},{"issue":"3","key":"10.1016\/j.knosys.2026.116082_bib0027","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","article-title":"PVTv2: improved baselines with Pyramid vision transformer","volume":"8","author":"Wang","year":"2022","journal-title":"Comput. Vis. Media"},{"key":"10.1016\/j.knosys.2026.116082_bib0028","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"Swin Transformer V2: scaling up capacity and resolution","author":"Liu","year":"2022"},{"key":"10.1016\/j.knosys.2026.116082_bib0029","unstructured":"S. Mehta and M. Rastegari, \"MobileViT: light-weight, general-purpose, and mobile-friendly Vision transformer,\" arXiv preprint arXiv:2110.02178, 2021."},{"issue":"2","key":"10.1016\/j.knosys.2026.116082_bib0030","doi-asserted-by":"crossref","first-page":"896","DOI":"10.1109\/TPAMI.2023.3329173","article-title":"MetaFormer baselines for vision","volume":"46","author":"Yu","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.116082_bib0031","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","article-title":"FastViT: a fast hybrid vision transformer using structural reparameterization","author":"Vasu","year":"2023"},{"key":"10.1016\/j.knosys.2026.116082_bib0032","first-page":"12934","article-title":"EfficientFormer: vision transformers at MobileNet speed","volume":"35","author":"Li","year":"2022","journal-title":"Adv. Neural. Inf. Process Syst."},{"key":"10.1016\/j.knosys.2026.116082_bib0033","series-title":"European Conference on Computer Vision","article-title":"EdgeNeXt: efficiently amalgamated CNN-transformer architecture for Mobile vision applications","author":"Maaz","year":"2022"},{"key":"10.1016\/j.knosys.2026.116082_bib0034","series-title":"2023 IEEE\/CVF International Conference on Computer Vision","article-title":"Rethinking mobile block for efficient attention-based models","author":"Zhang","year":"2023"},{"issue":"11","key":"10.1016\/j.knosys.2026.116082_bib0035","doi-asserted-by":"crossref","first-page":"10560","DOI":"10.1109\/TPAMI.2025.3596776","article-title":"EMOv2: pushing 5M vision model frontier","volume":"47","author":"Zhang","year":"2025","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.116082_bib0036","unstructured":"T. Huang, L. Huang, S. You, F. Wang, C. Qian and C. Xu, \"LightViT: towards light-weight convolution-free vision transformers,\" arXiv preprint arXiv:2207.05557, 2022."},{"key":"10.1016\/j.knosys.2026.116082_bib0037","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"Lite vision transformer with enhanced self-attention","author":"Yang","year":"2022"},{"key":"10.1016\/j.knosys.2026.116082_bib0038","unstructured":"Y. Zhao, H. Tang, Y. Jiang, Y. A and Q. Wu, \"Lightweight vision transformer with cross feature attention,\" arXiv preprint arXiv:2207.07268, 2022."},{"issue":"9","key":"10.1016\/j.knosys.2026.116082_bib0039","doi-asserted-by":"crossref","first-page":"11120","DOI":"10.1109\/TPAMI.2023.3265499","article-title":"PSLT: a light-weight vision transformer with ladder self-attention and progressive shift","volume":"45","author":"Wu","year":"2023","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.116082_bib0040","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"EfficientViT: memory efficient vision transformer with cascaded group attention","author":"Liu","year":"2023"},{"key":"10.1016\/j.knosys.2026.116082_bib0041","unstructured":"J. Li, X. Xia, W. Li, H. Li, X. Wang, X. Xiao, R. Wang, M. Zheng and X. Pan, \"Next-ViT: next generation vision transformer for efficient deployment in realistic industrial scenarios,\" arXiv preprint arXiv:2207.05501, 2022."},{"key":"10.1016\/j.knosys.2026.116082_bib0042","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural. Inf. Process Syst."},{"key":"10.1016\/j.knosys.2026.116082_bib0043","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","article-title":"Squeeze-and-excitation networks","author":"Hu","year":"2018"},{"key":"10.1016\/j.knosys.2026.116082_bib0044","series-title":"Proceedings of the European Conference on Computer Vision","article-title":"CBAM: convolutional block attention module","author":"Woo","year":"2018"},{"key":"10.1016\/j.knosys.2026.116082_bib0045","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops","article-title":"GCNet: non-local networks meet Squeeze-Excitation networks and beyond","author":"Cao","year":"2019"},{"key":"10.1016\/j.knosys.2026.116082_bib0046","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"Designing network Design Spaces","author":"Radosavovic","year":"2020"},{"key":"10.1016\/j.knosys.2026.116082_bib0047","series-title":"International Conference on Learning Representations","article-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2019"},{"key":"10.1016\/j.knosys.2026.116082_bib0048","series-title":"International Conference on Learning Representations","article-title":"SGDR: stochastic gradient descent with warm restarts","author":"Loshchilov","year":"2017"},{"key":"10.1016\/j.knosys.2026.116082_bib0049","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","article-title":"Rethinking the inception architecture for computer vision","author":"Szegedy","year":"2016"},{"key":"10.1016\/j.knosys.2026.116082_bib0050","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","article-title":"Random erasing data augmentation","author":"Zhong","year":"2020"},{"key":"10.1016\/j.knosys.2026.116082_bib0051","unstructured":"H. Zhang, M. Cisse, Y.N. Dauphin and D. Lopez-Paz, \"mixup: beyond empirical risk minimization,\" arXiv preprint arXiv:1710.09412, 2017."},{"key":"10.1016\/j.knosys.2026.116082_bib0052","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","article-title":"CutMix: regularization strategy to train strong classifiers with localizable features","author":"Yun","year":"2019"},{"key":"10.1016\/j.knosys.2026.116082_bib0053","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops","article-title":"Randaugment: practical automated data augmentation with a reduced search space","author":"Cubuk","year":"2020"},{"issue":"4","key":"10.1016\/j.knosys.2026.116082_bib0054","doi-asserted-by":"crossref","first-page":"838","DOI":"10.1137\/0330046","article-title":"Acceleration of stochastic approximation by averaging","volume":"30","author":"Polyak","year":"1992","journal-title":"SIAM J. Control Optim."},{"key":"10.1016\/j.knosys.2026.116082_bib0055","article-title":"PyTorch: an imperative style, high-performance deep learning library","volume":"32","author":"Paszke","year":"2019","journal-title":"Adv. Neural. Inf. Process Syst."},{"key":"10.1016\/j.knosys.2026.116082_bib0056","unstructured":"R. Wightman, \"Pytorch image models,\" 2019. [Online]. Available: https:\/\/github.com\/huggingface\/pytorch-image-models. [Accessed 2025]."},{"key":"10.1016\/j.knosys.2026.116082_bib0057","series-title":"European Conference on Computer Vision","article-title":"Microsoft COCO: common objects in context","author":"Lin","year":"2014"},{"key":"10.1016\/j.knosys.2026.116082_bib0058","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","article-title":"Scene parsing through ADE20K dataset","author":"Zhou","year":"2017"},{"issue":"2","key":"10.1016\/j.knosys.2026.116082_bib0059","doi-asserted-by":"crossref","first-page":"318","DOI":"10.1109\/TPAMI.2018.2858826","article-title":"Focal loss for dense object detection","volume":"42","author":"Lin","year":"2020","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.116082_bib0060","series-title":"Proceedings of the IEEE International Conference on Computer Vision","article-title":"Mask R-CNN","author":"He","year":"2017"},{"key":"10.1016\/j.knosys.2026.116082_bib0061","unstructured":"K. Chen, J. Wang, J. Pang, Y. Cao, Y. Xiong, X. Li, S. Sun, W. Feng, Z. Liu, J. Xu, Z. Zhang, D. Cheng, C. Zhu, T. Cheng, Q. Zhao, B. Li, X. Lu, R. Zhu, Y. Wu, J. Dai, J. Wang, J. Shi, W. Ouyang, C.C. Loy and D. Lin, \"MMDetection: open MMLab detection toolbox and benchmark,\" arXiv preprint arXiv:1906.07155, 2019."},{"key":"10.1016\/j.knosys.2026.116082_bib0062","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","article-title":"Panoptic feature Pyramid networks","author":"Kirillov","year":"2019"},{"key":"10.1016\/j.knosys.2026.116082_bib0063","unstructured":"MMSegmentation Contributors, \"MMSegmentation: OpenMMLab semantic segmentation toolbox and benchmark,\" OpenMMLab, 2020. [Online]. Available: https:\/\/github.com\/open-mmlab\/mmsegmentation. [Accessed 2025]."},{"key":"10.1016\/j.knosys.2026.116082_bib0064","series-title":"International Conference on Machine Learning","article-title":"Training data-efficient image transformers & distillation through attention","author":"Touvron","year":"2021"}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126008087?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126008087?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T11:29:01Z","timestamp":1781004541000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705126008087"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":64,"alternative-id":["S0950705126008087"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2026.116082","relation":{},"ISSN":["0950-7051"],"issn-type":[{"value":"0950-7051","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Mult-Pool Self Attention: a lightweight attention with linear complexity","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2026.116082","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"116082"}}