{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T13:58:02Z","timestamp":1780581482379,"version":"3.54.1"},"reference-count":55,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100014173","name":"Health and Family Planning Commission of Shenzhen Municipality","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100014173","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100014173","name":"Health and Family Planning Commission of Shenzhen Municipality","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100014173","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Biomedical Signal Processing and Control"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.bspc.2026.110587","type":"journal-article","created":{"date-parts":[[2026,5,14]],"date-time":"2026-05-14T12:18:27Z","timestamp":1778761107000},"page":"110587","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":2,"special_numbering":"PB","title":["ACM-UNet: Adaptive integration of CNNs and Mamba for efficient medical image segmentation"],"prefix":"10.1016","volume":"123","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3294-5725","authenticated-orcid":false,"given":"Jing","family":"Huang","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yongkang","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuhan","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0554-689X","authenticated-orcid":false,"given":"Zhitao","family":"Dai","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Cheng","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qiying","family":"Lai","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.bspc.2026.110587_b1","series-title":"Medical Image Computing and Computer-Assisted Intervention\u2013MICCAI 2015: 18th International Conference, Munich, Germany, October 5-9, 2015, Proceedings, Part III 18","first-page":"234","article-title":"U-net: Convolutional networks for biomedical image segmentation","author":"Ronneberger","year":"2015"},{"key":"10.1016\/j.bspc.2026.110587_b2","first-page":"3","article-title":"Unet++: A nested u-net architecture for medical image segmentation","author":"Zhou","year":"2018"},{"key":"10.1016\/j.bspc.2026.110587_b3","series-title":"Transunet: Transformers make strong encoders for medical image segmentation","author":"Chen","year":"2021"},{"key":"10.1016\/j.bspc.2026.110587_b4","series-title":"European Conference on Computer Vision","first-page":"205","article-title":"Swin-unet: Unet-like pure transformer for medical image segmentation","author":"Cao","year":"2022"},{"key":"10.1016\/j.bspc.2026.110587_b5","doi-asserted-by":"crossref","DOI":"10.1016\/j.compmedimag.2024.102470","article-title":"Cis-unet: Multi-class segmentation of the aorta in computed tomography angiography via context-aware shifted window self-attention","volume":"118","author":"Imran","year":"2024","journal-title":"Comput. Med. Imaging Graph."},{"key":"10.1016\/j.bspc.2026.110587_b6","doi-asserted-by":"crossref","DOI":"10.1016\/j.compmedimag.2024.102370","article-title":"Mef-unet: An end-to-end ultrasound image segmentation algorithm based on multi-scale feature extraction and fusion","volume":"114","author":"Xu","year":"2024","journal-title":"Comput. Med. Imaging Graph."},{"key":"10.1016\/j.bspc.2026.110587_b7","series-title":"Vm-unet: Vision mamba unet for medical image segmentation","author":"Ruan","year":"2024"},{"key":"10.1016\/j.bspc.2026.110587_b8","series-title":"Advancing pose-guided image synthesis with progressive conditional diffusion models","author":"Shen","year":"2023"},{"key":"10.1016\/j.bspc.2026.110587_b9","doi-asserted-by":"crossref","first-page":"6246","DOI":"10.52202\/079017-0202","article-title":"Imagpose: A unified conditional framework for pose-guided person generation","volume":"37","author":"Shen","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.bspc.2026.110587_b10","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence","first-page":"6795","article-title":"Imagdressing-v1: Customizable virtual dressing","author":"Shen","year":"2025"},{"key":"10.1016\/j.bspc.2026.110587_b11","series-title":"Imaggarment-1: Fine-grained garment generation for controllable fashion design","author":"Shen","year":"2025"},{"key":"10.1016\/j.bspc.2026.110587_b12","series-title":"Long-term talkingface generation via motion-prior conditional diffusion model","author":"Shen","year":"2025"},{"key":"10.1016\/j.bspc.2026.110587_b13","series-title":"Semantic image segmentation with deep convolutional nets and fully connected crfs","author":"Chen","year":"2014"},{"key":"10.1016\/j.bspc.2026.110587_b14","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","article-title":"Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs","volume":"40","author":"Chen","year":"2017","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.bspc.2026.110587_b15","series-title":"Rethinking atrous convolution for semantic image segmentation","author":"Chen","year":"2017"},{"key":"10.1016\/j.bspc.2026.110587_b16","doi-asserted-by":"crossref","unstructured":"L.C. Chen, Y. Zhu, G. Papandreou, F. Schroff, H. Adam, Encoder-decoder with atrous separable convolution for semantic image segmentation, in: Proceedings of the European Conference on Computer Vision, ECCV, 2018, pp. 801\u2013818.","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"10.1016\/j.bspc.2026.110587_b17","doi-asserted-by":"crossref","unstructured":"J. Dai, H. Qi, Y. Xiong, Y. Li, G. Zhang, H. Hu, Y. Wei, Deformable convolutional networks, in: Proceedings of the IEEE International Conference on Computer Vision, 2017, pp. 764\u2013773.","DOI":"10.1109\/ICCV.2017.89"},{"key":"10.1016\/j.bspc.2026.110587_b18","series-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications","author":"Howard","year":"2017"},{"key":"10.1016\/j.bspc.2026.110587_b19","article-title":"Condconv: Conditionally parameterized convolutions for efficient inference","volume":"32","author":"Yang","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.bspc.2026.110587_b20","doi-asserted-by":"crossref","unstructured":"Y. Chen, X. Dai, M. Liu, D. Chen, L. Yuan, Z. Liu, Dynamic convolution: Attention over convolution kernels, in: 2020 IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2019, pp. 11027\u201311036.","DOI":"10.1109\/CVPR42600.2020.01104"},{"key":"10.1016\/j.bspc.2026.110587_b21","series-title":"Mixconv: Mixed depthwise convolutional kernels","author":"Tan","year":"2019"},{"key":"10.1016\/j.bspc.2026.110587_b22","first-page":"9202","article-title":"Pinwheel-shaped convolution and scale-based dynamic loss for infrared small target detection","volume":"39","author":"Yang","year":"2025","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"10.1016\/j.bspc.2026.110587_b23","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.bspc.2026.110587_b24","series-title":"Mamba: Linear-time sequence modeling with selective state spaces","author":"Gu","year":"2023"},{"key":"10.1016\/j.bspc.2026.110587_b25","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"10012","article-title":"Swin transformer: Hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"key":"10.1016\/j.bspc.2026.110587_b26","first-page":"103031","article-title":"Vmamba: Visual state space model","volume":"37","author":"Liu","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.bspc.2026.110587_b27","series-title":"Transfuse: Fusing transformers and cnns for medical image segmentation","author":"Zhang","year":"2021"},{"key":"10.1016\/j.bspc.2026.110587_b28","series-title":"Hc-mamba: Vision mamba with hybrid convolutional techniques for medical image segmentation","author":"Xu","year":"2024"},{"key":"10.1016\/j.bspc.2026.110587_b29","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2024.107154","article-title":"Pformer: An efficient cnn-transformer hybrid network with content-driven p-attention for 3d medical image segmentation","volume":"101","author":"Gao","year":"2025","journal-title":"Biomed. Signal Process. Control."},{"key":"10.1016\/j.bspc.2026.110587_b30","doi-asserted-by":"crossref","first-page":"7821","DOI":"10.3390\/app15147821","article-title":"A u-shaped architecture based on hybrid cnn and mamba for medical image segmentation","volume":"15","author":"Ma","year":"2025","journal-title":"Appl. Sci."},{"key":"10.1016\/j.bspc.2026.110587_b31","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2025.108525","article-title":"Wtcm-unet: A hybrid cnn-ssm framework combining wavelet transform for medical image segmentation","volume":"112","author":"Gan","year":"2026","journal-title":"Biomed. Signal Process. Control."},{"key":"10.1016\/j.bspc.2026.110587_b32","doi-asserted-by":"crossref","unstructured":"K. He, X. Zhang, S. Ren, J. Sun, Deep residual learning for image recognition, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2016, pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"10.1016\/j.bspc.2026.110587_b33","series-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020"},{"key":"10.1016\/j.bspc.2026.110587_b34","series-title":"Efficiently modeling long sequences with structured state spaces","author":"Gu","year":"2021"},{"key":"10.1016\/j.bspc.2026.110587_b35","series-title":"On the parameterization and initialization of diagonal state space models","author":"Gu","year":"2022"},{"key":"10.1016\/j.bspc.2026.110587_b36","series-title":"Vision mamba: Efficient visual representation learning with bidirectional state space model","author":"Zhu","year":"2024"},{"key":"10.1016\/j.bspc.2026.110587_b37","doi-asserted-by":"crossref","DOI":"10.1016\/j.inffus.2025.103019","article-title":"Multi-scale convolutional attention frequency-enhanced transformer network for medical image segmentation","volume":"119","author":"Yan","year":"2025","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.bspc.2026.110587_b38","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2026.109566","article-title":"Wavelet attention fusion for semi-supervised ultrasound segmentation","volume":"117","author":"Wu","year":"2026","journal-title":"Biomed. Signal Process. Control."},{"key":"10.1016\/j.bspc.2026.110587_b39","series-title":"European Conference on Computer Vision","first-page":"363","article-title":"Wavelet convolutions for large receptive fields","author":"Finder","year":"2024"},{"key":"10.1016\/j.bspc.2026.110587_b40","series-title":"2009 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"248","article-title":"Imagenet: A large-scale hierarchical image database","author":"Deng","year":"2009"},{"key":"10.1016\/j.bspc.2026.110587_b41","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2025.105483","article-title":"A spatial-frequency domain multi-branch decoder method for real-time semantic segmentation","volume":"156","author":"Deng","year":"2025","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.bspc.2026.110587_b42","unstructured":"B. Landman, Z. Xu, J. Igelsias, M. Styner, T. Langerak, A. Klein, Miccai multi-atlas labeling beyond the cranial vault\u2013workshop and challenge, in: Proc. MICCAI Multi-Atlas Labeling beyond Cranial Vault\u2014Workshop Challenge, Munich, Germany, 2015, p. 12."},{"key":"10.1016\/j.bspc.2026.110587_b43","doi-asserted-by":"crossref","first-page":"2514","DOI":"10.1109\/TMI.2018.2837502","article-title":"Deep learning techniques for automatic mri cardiac multi-structures segmentation and diagnosis: Is the problem solved?","volume":"37","author":"Bernard","year":"2018","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.bspc.2026.110587_b44","series-title":"2023 IEEE International Conference on Systems, Man, and Cybernetics","first-page":"5273","article-title":"Thyroid nodule classification in ultrasound videos by combining 3d cnn and video transformer","author":"Huang","year":"2023"},{"key":"10.1016\/j.bspc.2026.110587_b45","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","first-page":"615","article-title":"Swin-umamba: Mamba-based unet with imagenet-based pretraining","author":"Liu","year":"2024"},{"key":"10.1016\/j.bspc.2026.110587_b46","first-page":"1","article-title":"Mixformer: A mixed cnn\u2013transformer backbone for medical image segmentation","volume":"74","author":"Liu","year":"2025","journal-title":"IEEE Trans. Instrum. Meas."},{"key":"10.1016\/j.bspc.2026.110587_b47","doi-asserted-by":"crossref","unstructured":"M.M. Rahman, R. Marculescu, Medical image segmentation via cascaded attention decoding, in: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, 2023b, pp. 6222\u20136231.","DOI":"10.1109\/WACV56688.2023.00616"},{"key":"10.1016\/j.bspc.2026.110587_b48","series-title":"Chinese Conference on Pattern Recognition and Computer Vision","first-page":"42","article-title":"Levit-unet: Make faster encoders with transformer for medical image segmentation","author":"Xu","year":"2023"},{"key":"10.1016\/j.bspc.2026.110587_b49","series-title":"International Workshop on Predictive Intelligence in Medicine","first-page":"83","article-title":"Dae-former: Dual attention-guided efficient transformer for medical image segmentation","author":"Azad","year":"2023"},{"key":"10.1016\/j.bspc.2026.110587_b50","series-title":"Scaleformer: revisiting the transformer-based backbones from a scale-wise perspective for medical image segmentation","author":"Huang","year":"2022"},{"key":"10.1016\/j.bspc.2026.110587_b51","doi-asserted-by":"crossref","unstructured":"R. Azad, L. Niggemeier, M. Huttemann, A. Kazerouni, E.K. Aghdam, Y. Velichko, U. Bagci, D. Merhof, Beyond self-attention: Deformable large kernel attention for medical image segmentation, in: 2024 IEEE\/CVF Winter Conference on Applications of Computer Vision, WACV, 2023b, pp. 1276\u20131286.","DOI":"10.1109\/WACV57701.2024.00132"},{"key":"10.1016\/j.bspc.2026.110587_b52","doi-asserted-by":"crossref","unstructured":"M.M. Rahman, R. Marculescu, G-cascade: Efficient cascaded graph convolutional decoding for 2d medical image segmentation, in: 2024 IEEE\/CVF Winter Conference on Applications of Computer Vision, WACV, 2023a, pp. 7713\u20137722.","DOI":"10.1109\/WACV57701.2024.00755"},{"key":"10.1016\/j.bspc.2026.110587_b53","doi-asserted-by":"crossref","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","article-title":"Pvt v2: Improved baselines with pyramid vision transformer","volume":"8","author":"Wang","year":"2022","journal-title":"Comput. Vis. Media"},{"key":"10.1016\/j.bspc.2026.110587_b54","series-title":"Mask2former for video instance segmentation","author":"Cheng","year":"2021"},{"key":"10.1016\/j.bspc.2026.110587_b55","doi-asserted-by":"crossref","DOI":"10.1109\/TCSVT.2025.3563411","article-title":"Vivim: a video vision mamba for ultrasound video segmentation","author":"Yang","year":"2025","journal-title":"IEEE Trans. Circuits Syst. Video Technol."}],"container-title":["Biomedical Signal Processing and Control"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1746809426011419?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1746809426011419?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,29]],"date-time":"2026-05-29T20:16:34Z","timestamp":1780085794000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1746809426011419"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":55,"alternative-id":["S1746809426011419"],"URL":"https:\/\/doi.org\/10.1016\/j.bspc.2026.110587","relation":{},"ISSN":["1746-8094"],"issn-type":[{"value":"1746-8094","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"ACM-UNet: Adaptive integration of CNNs and Mamba for efficient medical image segmentation","name":"articletitle","label":"Article Title"},{"value":"Biomedical Signal Processing and Control","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.bspc.2026.110587","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"110587"}}