{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,7]],"date-time":"2026-01-07T07:38:08Z","timestamp":1767771488475,"version":"3.28.0"},"reference-count":68,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2024,7,30]],"date-time":"2024-07-30T00:00:00Z","timestamp":1722297600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,7,30]],"date-time":"2024-07-30T00:00:00Z","timestamp":1722297600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Computing"],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1007\/s00607-024-01328-4","type":"journal-article","created":{"date-parts":[[2024,7,30]],"date-time":"2024-07-30T18:32:12Z","timestamp":1722364332000},"page":"3255-3277","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Dynamic attention guider network"],"prefix":"10.1007","volume":"106","author":[{"given":"Chunguang","family":"Yue","sequence":"first","affiliation":[]},{"given":"Jinbao","family":"Li","sequence":"additional","affiliation":[]},{"given":"Qichen","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Donghuan","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,7,30]]},"reference":[{"key":"1328_CR1","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, et al (2020) An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"1328_CR2","unstructured":"Cordts M, Omran M, Ramos S, Scharw\u00e4chter T, Enzweiler M, Benenson R, Franke U, Roth S, Schiele B (2015) The cityscapes dataset. In: CVPR workshop on the future of datasets in vision, vol. 2, p. 1"},{"key":"1328_CR3","doi-asserted-by":"crossref","unstructured":"Jain J, Singh A, Orlov N, Huang Z, Li J, Walton S, Shi H (2023) Semask: semantically masked transformers for semantic segmentation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 752\u2013761","DOI":"10.1109\/ICCVW60793.2023.00083"},{"key":"1328_CR4","first-page":"17864","volume":"34","author":"B Cheng","year":"2021","unstructured":"Cheng B, Schwing A, Kirillov A (2021) Per-pixel classification is not all you need for semantic segmentation. Adv Neural Inf Process Syst 34:17864\u201317875","journal-title":"Adv Neural Inf Process Syst"},{"key":"1328_CR5","doi-asserted-by":"crossref","unstructured":"Cheng B, Misra I, Schwing AG, Kirillov A, Girdhar R (2022) Masked-attention mask transformer for universal image segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 1290\u20131299","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"1328_CR6","doi-asserted-by":"crossref","unstructured":"Chen K, Pang J, Wang J, Xiong Y, Li X, Sun S, Feng W, Liu Z, Shi J, Ouyang W(2019) Hybrid task cascade for instance segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 4974\u20134983","DOI":"10.1109\/CVPR.2019.00511"},{"key":"1328_CR7","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1328_CR8","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams CK, Winn J, Zisserman A (2010) The pascal visual object classes (VOC) challenge. Int J Comput Vision 88:303\u2013338","journal-title":"Int J Comput Vision"},{"key":"1328_CR9","doi-asserted-by":"crossref","unstructured":"Meng L, Li H, Chen B-C, Lan S, Wu Z, Jiang Y-G, Lim S-N (2022) Adavit adaptive vision transformers for efficient image recognition. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 12309\u201312318","DOI":"10.1109\/CVPR52688.2022.01199"},{"key":"1328_CR10","doi-asserted-by":"crossref","unstructured":"Chen M, Lin M, Li K, Shen Y, Wu Y, Chao F, Ji R (2023) CF-VIT: ageneral coarse-to-fine method for vision transformer. In: Proceedings of the AAAI conference on artificial intelligence vol. 37, pp. 7042\u20137052","DOI":"10.1609\/aaai.v37i6.25860"},{"key":"1328_CR11","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision pp. 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"1328_CR12","doi-asserted-by":"crossref","unstructured":"Chen W, Du X, Yang F, Beyer L, Zhai X, Lin TY, Chen H, Li J, Song X, Wang Z (2022) A simple single-scale vision transformer for object detection and instance segmentation. In: European conference on computer vision, pp. 711\u2013727 . Springer","DOI":"10.1007\/978-3-031-20080-9_41"},{"key":"1328_CR13","doi-asserted-by":"crossref","unstructured":"Wang J, Zhang S, Liu Y, Wu T, Yang Y, Liu X, Chen K, Luo P, Lin D (2023) Riformer: Keep your vision backbone effective but removing token mixer. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition pp. 14443\u201314452","DOI":"10.1109\/CVPR52729.2023.01388"},{"key":"1328_CR14","doi-asserted-by":"crossref","unstructured":"Zhu L, Wang X, Ke Z, Zhang W, Lau RW (2023) Biformer: vision transformer with bi-level routing attention. In: Proceedings of the IEEE\/CVF Conference on computer vision and pattern recognition, pp. 10323\u201310333","DOI":"10.1109\/CVPR52729.2023.00995"},{"key":"1328_CR15","doi-asserted-by":"crossref","unstructured":"Srinivas A, Lin TY, Parmar N, Shlens J, Abbeel P, Vaswani A (2021) Bottleneck transformers for visual recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 16519\u201316529","DOI":"10.1109\/CVPR46437.2021.01625"},{"key":"1328_CR16","unstructured":"Zhou D, Yu Z, Xie E, Xiao C, Anandkuma, A, Feng J, Alvarez JM (2022) Understanding the robustness in vision transformers. In: International conference on machine learning, pp. 27378\u201327394 . PMLR"},{"key":"1328_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.110172","volume":"148","author":"X Xie","year":"2024","unstructured":"Xie X, Wu D, Xie M, Li Z (2024) Ghostformer: efficiently amalgamated CNN-transformer architecture for object detection. Pattern Recogn 148:110172","journal-title":"Pattern Recogn"},{"issue":"1","key":"1328_CR18","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1016\/0010-0285(80)90005-5","volume":"12","author":"AM Treisman","year":"1980","unstructured":"Treisman AM, Gelade G (1980) A feature-integration theory of attention. Cogn Psychol 12(1):97\u2013136","journal-title":"Cogn Psychol"},{"key":"1328_CR19","doi-asserted-by":"crossref","unstructured":"Zeiler MD, Fergus R (2014) Visualizing and understanding convolutional networks. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part I 13, pp. 818\u2013833 Springer","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"1328_CR20","doi-asserted-by":"crossref","unstructured":"Yang Z, Yang D, Dyer C, He X, Smola A, Hovy E (2016) Hierarchical attention networks for document classification. In: Proceedings of the 2016 conference of the North American chapter of the association for computational linguistics: human language technologies, pp. 1480\u20131489","DOI":"10.18653\/v1\/N16-1174"},{"key":"1328_CR21","doi-asserted-by":"crossref","unstructured":"Xia C, Wang X, Lv F, Hao X, Shi Y (2024) Vit-comer: vsion transformer with convolutional multi-scale feature interaction for dense predictions. arXiv preprint arXiv:2403.07392","DOI":"10.1109\/CVPR52733.2024.00525"},{"key":"1328_CR22","doi-asserted-by":"crossref","unstructured":"Lin W, Wu Z, Chen J, Huang J, Jin L (2023) Scale-aware modulation meet transformer. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 6015\u20136026","DOI":"10.1109\/ICCV51070.2023.00553"},{"key":"1328_CR23","doi-asserted-by":"crossref","unstructured":"Wang Q, Wu B, Zhu P, Li P, Zuo W, Hu Q (2020) ECA-net: efficient channel attention for deep convolutional neural networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 11534\u201311542","DOI":"10.1109\/CVPR42600.2020.01155"},{"issue":"3","key":"1328_CR24","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1007\/s41095-022-0271-y","volume":"8","author":"M-H Guo","year":"2022","unstructured":"Guo M-H, Xu T-X, Liu J-J, Liu Z-N, Jiang P-T, Mu T-J, Zhang S-H, Martin RR, Cheng M-M, Hu S-M (2022) Attention mechanisms in computer vision: a survey. Comput visual media 8(3):331\u2013368","journal-title":"Comput visual media"},{"key":"1328_CR25","first-page":"261","volume":"30","author":"A Vaswani","year":"2017","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser \u0141, Olosukhin I (2017) Attention is all you need. Adv Neural Inf Process Syst 30:261\u2013272","journal-title":"Adv Neural Inf Process Syst"},{"key":"1328_CR26","unstructured":"Shen Z, Bello I, Vemulapalli R, Jia X, Chen CH (2020) Global self-attention networks for image recognition. arXiv preprint arXiv:2010.03019"},{"issue":"3","key":"1328_CR27","doi-asserted-by":"publisher","first-page":"774","DOI":"10.1287\/orsc.2014.0900","volume":"26","author":"J O\u2019Reilly","year":"2015","unstructured":"O\u2019Reilly J, Robinson SL, Berdahl JL, Banki S (2015) Is negative attention better than no attention? the comparative effects of ostracism and harassment at work. Organ Sci 26(3):774\u2013793","journal-title":"Organ Sci"},{"key":"1328_CR28","doi-asserted-by":"crossref","unstructured":"Huang Z, Wang X, Huang L, Huang C, Wei Y, Liu W (2019) Ccnet: criss-cross attention for semantic segmentation. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 603\u2013612","DOI":"10.1109\/ICCV.2019.00069"},{"key":"1328_CR29","unstructured":"Mnih V, Heess N, Graves A, et al (2014) Recurrent models of visual attention. Advances in Neural Information Processing Systems 27"},{"key":"1328_CR30","unstructured":"Jaderberg M, Simonyan K, Zisserman A et al (2015) Spatial transformer networks. Advances in Neural Information Processing Systems 28"},{"key":"1328_CR31","unstructured":"Srivastava RK, Greff K, Schmidhuber J (2015) Highway networks. arXiv preprint arXiv:1505.00387"},{"key":"1328_CR32","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"key":"1328_CR33","doi-asserted-by":"crossref","unstructured":"Wang X, Girshick R, Gupta A, He K (2018) Non-local neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 7794\u20137803","DOI":"10.1109\/CVPR.2018.00813"},{"key":"1328_CR34","doi-asserted-by":"crossref","unstructured":"Woo S, Park J, Lee JY, Kweon IS (2018) Cbam: Convolutional block attention module. In: Proceedings of the European conference on computer vision (ECCV), pp. 3\u201319","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"1328_CR35","doi-asserted-by":"crossref","unstructured":"Fu J, Liu J, Tian H, Li Y, Bao Y, Fang Z, Lu H (2019) Dual attention network for scene segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 3146\u20133154","DOI":"10.1109\/CVPR.2019.00326"},{"key":"1328_CR36","unstructured":"Yu F, Koltun V (2015) Multi-scale context aggregation by dilated convolutions. arXiv preprint arXiv:1511.07122"},{"issue":"3","key":"1328_CR37","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","volume":"8","author":"W Wang","year":"2022","unstructured":"Wang W, Xie E, Li X, Fan D-P, Song K, Liang D, Lu T, Luo P, Shao L (2022) Pvt v2: improved baselines with pyramid vision transformer. Comput Visual Media 8(3):415\u2013424","journal-title":"Comput Visual Media"},{"key":"1328_CR38","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.111939","volume":"296","author":"N Hoanh","year":"2024","unstructured":"Hoanh N, Pham TV (2024) Focus-attention approach in optimizing DETR for object detection from high-resolution images. Knowl-Based Syst 296:111939","journal-title":"Knowl-Based Syst"},{"key":"1328_CR39","doi-asserted-by":"crossref","unstructured":"Ren S, Zhou D, He S, Feng J, Wang X (2022) Shunted self-attention via multi-scale token aggregation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 10853\u201310862","DOI":"10.1109\/CVPR52688.2022.01058"},{"key":"1328_CR40","doi-asserted-by":"crossref","unstructured":"Wu H, Xiao B, Codella N, Liu M, Dai X, Yuan L, Zhang L (2021) CVT: introducing convolutions to vision transformers. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 22\u201331","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"1328_CR41","doi-asserted-by":"crossref","unstructured":"Guo J, Han, K, Wu H, Tang Y, Chen X, Wang Y, Xu C (2022) Cmt: Convolutional neural networks meet vision transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 12175\u201312185","DOI":"10.1109\/CVPR52688.2022.01186"},{"key":"1328_CR42","unstructured":"Mehta, S, Rastegari M (2021) Mobilevit: light-weight, general-purpose, and mobile-friendly vision transformer. arXiv preprint arXiv:2110.02178"},{"key":"1328_CR43","doi-asserted-by":"crossref","unstructured":"Liu X, Peng H, Zheng N, Yang Y, Hu H, Yuan Y (2023) Efficientvit: memory efficient vision transformer with cascaded group attention. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 14420\u201314430","DOI":"10.1109\/CVPR52729.2023.01386"},{"key":"1328_CR44","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li L-J, Li K Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on computer vision and pattern recognition, pp. 248\u2013255 . IEEE","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1328_CR45","doi-asserted-by":"crossref","unstructured":"Lin TY, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: Common objects in context. In: Computer Vision\u2013ECCV 2014: 13th European Conference, Zurich, Switzerland, September 6-12, 2014, Proceedings, Part V 13, pp. 740\u2013755 Springer","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1328_CR46","doi-asserted-by":"crossref","unstructured":"Zhou B, Zhao H, Puig X, Fidler S, Barriuso A, Torralba A (2017) Scene parsing through ade20k dataset. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 633\u2013641","DOI":"10.1109\/CVPR.2017.544"},{"key":"1328_CR47","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"1328_CR48","unstructured":"Zhang H, Cisse M, Dauphin YN, Lopez-Paz D (2017) mixup: Beyond empirical risk minimization. arXiv preprint arXiv:1710.09412"},{"key":"1328_CR49","doi-asserted-by":"crossref","unstructured":"Yun S, Han D, Oh SJ, Chun S, Choe J, Yoo Y (2019) Cutmix: Regularization strategy to train strong classifiers with localizable features. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 6023\u20136032","DOI":"10.1109\/ICCV.2019.00612"},{"key":"1328_CR50","doi-asserted-by":"crossref","unstructured":"Zhong Z, Zheng L, Kang G, Li S, Yang Y (2020) Random erasing data augmentation. In: Proceedings of the aaai conference on artificial intelligence vol. 34, pp. 13001\u201313008","DOI":"10.1609\/aaai.v34i07.7000"},{"key":"1328_CR51","unstructured":"Kingma DP, Ba J (2014) Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980"},{"key":"1328_CR52","unstructured":"Loshchilov I, Hutter F (2017) Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101"},{"key":"1328_CR53","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"1328_CR54","doi-asserted-by":"crossref","unstructured":"Wang W, Xie, E, Li X, Fan DP, Song K, Liang D, Lu T, Luo P, Shao L (2021) Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 568\u2013578","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"1328_CR55","unstructured":"Hou Q, Lu CZ, Cheng MM, Feng J (2022) Conv2former: a simple transformer-style convnet for visual recognition. arXiv preprint arXiv:2211.11943"},{"issue":"4","key":"1328_CR56","doi-asserted-by":"publisher","first-page":"733","DOI":"10.1007\/s41095-023-0364-2","volume":"9","author":"M-H Guo","year":"2023","unstructured":"Guo M-H, Lu C-Z, Liu Z-N, Cheng M-M, Hu S-M (2023) Visual attention network. Comput Visual Media 9(4):733\u2013752","journal-title":"Comput Visual Media"},{"key":"1328_CR57","unstructured":"Yang J, Li C, Zhang P, Dai X, Xiao B, Yuan L, Gao J (2021) Focal self-attention for local-global interactions in vision transformers. arXiv preprint arXiv:2107.00641"},{"key":"1328_CR58","unstructured":"Touvron H, Cord M, Douze M, Massa F, Sablayrolles A, J\u00e9gou H (2021) Training data-efficient image transformers & distillation through attention. In: International conference on machine learning, pp. 10347\u201310357 . PMLR"},{"key":"1328_CR59","doi-asserted-by":"crossref","unstructured":"Selvaraju RR, Cogswell M, Das A, Vedantam R, Parikh,D, Batra D 2017) Grad-cam: Visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE international conference on computer vision, pp. 618\u2013626","DOI":"10.1109\/ICCV.2017.74"},{"key":"1328_CR60","first-page":"4203","volume":"35","author":"J Yang","year":"2022","unstructured":"Yang J, Li C, Dai X, Gao J (2022) Focal modulation networks. Adv Neural Inf Process Syst 35:4203\u20134217","journal-title":"Adv Neural Inf Process Syst"},{"key":"1328_CR61","doi-asserted-by":"crossref","unstructured":"Wang W, Dai J, Chen Z, Huang Z, Li Z, Zhu X, Hu X, Lu T, Lu L, Li H (2023) Internimage: Exploring large-scale vision foundation models with deformable convolutions. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 14408\u201314419","DOI":"10.1109\/CVPR52729.2023.01385"},{"key":"1328_CR62","first-page":"9355","volume":"34","author":"X Chu","year":"2021","unstructured":"Chu X, Tian Z, Wang Y, Zhang B, Ren H, Wei X, Xia H, Shen C (2021) Twins: Revisiting the design of spatial attention in vision transformers. Adv Neural Inf Process Syst 34:9355\u20139366","journal-title":"Adv Neural Inf Process Syst"},{"key":"1328_CR63","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp. 2961\u20132969","DOI":"10.1109\/ICCV.2017.322"},{"issue":"5","key":"1328_CR64","doi-asserted-by":"publisher","first-page":"1483","DOI":"10.1109\/TPAMI.2019.2956516","volume":"43","author":"Z Cai","year":"2019","unstructured":"Cai Z, Vasconcelos N (2019) Cascade R-CNN: High quality object detection and instance segmentation. IEEE Trans Pattern Anal Mach Intell 43(5):1483\u20131498","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"1328_CR65","doi-asserted-by":"crossref","unstructured":"Lin TY, Goyal P, Girshick R, He K, Doll\u00e1r P (2017) Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer Vision, pp. 2980\u20132988","DOI":"10.1109\/ICCV.2017.324"},{"key":"1328_CR66","unstructured":"Chen K, Wang J, Pang J, Cao Y, Xiong,Y, Li X, Sun S, Feng W, Liu Z, Xu J et al (2019) Mmdetection: Open mmlab detection toolbox and benchmark. arXiv preprint arXiv:1906.07155"},{"key":"1328_CR67","unstructured":"Huang Z, Ben Y, Luo G, Cheng P, Yu G, Fu B (2021) Shuffle transformer: rethinking spatial shuffle for vision transformer. arXiv preprint arXiv:2106.03650"},{"key":"1328_CR68","doi-asserted-by":"crossref","unstructured":"Kirillov A, Girshick R, He K, Doll\u00e1r P (2019) Panoptic feature pyramid networks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 6399\u20136408","DOI":"10.1109\/CVPR.2019.00656"}],"container-title":["Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-024-01328-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00607-024-01328-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00607-024-01328-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,25]],"date-time":"2024-11-25T05:21:19Z","timestamp":1732512079000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00607-024-01328-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,30]]},"references-count":68,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["1328"],"URL":"https:\/\/doi.org\/10.1007\/s00607-024-01328-4","relation":{},"ISSN":["0010-485X","1436-5057"],"issn-type":[{"type":"print","value":"0010-485X"},{"type":"electronic","value":"1436-5057"}],"subject":[],"published":{"date-parts":[[2024,7,30]]},"assertion":[{"value":"30 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 July 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 July 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors have no Conflict of interest to declare that are relevant to the content of this article.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}