{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T23:26:47Z","timestamp":1770334007004,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Major Technological Innovation Project of Hangzhou","award":["No. 2022AIZD0147"],"award-info":[{"award-number":["No. 2022AIZD0147"]}]},{"name":"the National Key Research and Development Project","award":["No. 2022YFC2504605"],"award-info":[{"award-number":["No. 2022YFC2504605"]}]},{"name":"Japanese Ministry for Education, Science, Culture and Sports","award":["No. 20KK0234, No. 21H03470 and No. 20K21821"],"award-info":[{"award-number":["No. 20KK0234, No. 21H03470 and No. 20K21821"]}]},{"name":"Zhejiang Provincial Natural Science Foundation of China","award":["No. LZ22F020012"],"award-info":[{"award-number":["No. LZ22F020012"]}]},{"name":"Major Scientific Research Project of Zhejiang Lab","award":["No. 2020ND8AD01"],"award-info":[{"award-number":["No. 2020ND8AD01"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3611821","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:12Z","timestamp":1698391632000},"page":"5214-5222","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":7,"title":["Semi-Supervised Convolutional Vision Transformer with Bi-Level Uncertainty Estimation for Medical Image Segmentation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8987-350X","authenticated-orcid":false,"given":"Huimin","family":"Huang","sequence":"first","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9569-269X","authenticated-orcid":false,"given":"Yawen","family":"Huang","sequence":"additional","affiliation":[{"name":"Jarvis Research Center, Tencent YouTu Lab, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5106-5512","authenticated-orcid":false,"given":"Shiao","family":"Xie","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4098-588X","authenticated-orcid":false,"given":"Lanfen","family":"Lin","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8167-5354","authenticated-orcid":false,"given":"Tong","family":"Ruofeng","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5952-0188","authenticated-orcid":false,"given":"Yen-wei","family":"Chen","sequence":"additional","affiliation":[{"name":"Ritsumeikan University, Kyoto, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8076-2619","authenticated-orcid":false,"given":"Yuexiang","family":"Li","sequence":"additional","affiliation":[{"name":"Jarvis Research Center, Tencent YouTu Lab, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2195-2847","authenticated-orcid":false,"given":"Yefeng","family":"Zheng","sequence":"additional","affiliation":[{"name":"Jarvis Research Center, Tencent YouTu Lab, Shenzhen, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"30","article-title":"Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results","author":"Tarvainen A.","year":"2017","unstructured":"A.Tarvainen and H.Valpola. Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results. Advances in Neural Information Processing Systems, 30, 2017.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-66185-8_29"},{"key":"e_1_3_2_1_3_1","volume-title":"An embarrassingly simple consistency regularization method for semi-supervised medical image segmentation. arXiv preprint arXiv:2202.00677","author":"Basak H.","year":"2022","unstructured":"H. Basak, R. Bhattacharya, R. Hussain, and A. Chatterjee. An embarrassingly simple consistency regularization method for semi-supervised medical image segmentation. arXiv preprint arXiv:2202.00677, 2022."},{"key":"e_1_3_2_1_4_1","volume-title":"Deep learning techniques for automatic mri cardiac multi-structures segmentation and diagnosis: is the problem solved? IEEE transactions on medical imaging, 37(11):2514--2525","author":"Bernard O.","year":"2018","unstructured":"O. Bernard, A. Lalande, C. Zotti, F. Cervenansky, X. Yang, P. Heng, I. Cetin, K. Lekadir, O. Camara, and M. Ballester. Deep learning techniques for automatic mri cardiac multi-structures segmentation and diagnosis: is the problem solved? IEEE transactions on medical imaging, 37(11):2514--2525, 2018."},{"key":"e_1_3_2_1_5_1","volume-title":"Swin-unet: Unet-like pure transformer for medical image segmentation. arXiv preprint arXiv:2105.05537","author":"Cao H.","year":"2021","unstructured":"H. Cao, Y. Wang, J. Chen, D. Jiang, X. Zhang, Q. Tian, and M. Wang. Swin-unet: Unet-like pure transformer for medical image segmentation. arXiv preprint arXiv:2105.05537, 2021."},{"key":"e_1_3_2_1_6_1","first-page":"431","volume-title":"Uncertainty aware temporal-ensembling model for semisupervised abusmass segmentation","author":"Cao Xuyang","year":"2020","unstructured":"Xuyang Cao, Houjin Chen, Yanfeng Li, Yahui Peng, Shu Wang, and Lin Cheng. Uncertainty aware temporal-ensembling model for semisupervised abusmass segmentation. volume 40, pages 431--443. IEEE, 2020."},{"key":"e_1_3_2_1_7_1","volume-title":"Transunet: Transformers make strong encoders for medical image segmentation. arXiv preprint arXiv:2102.04306","author":"Chen Jieneng","year":"2021","unstructured":"Jieneng Chen, Yongyi Lu, Qihang Yu, Xiangde Luo, Ehsan Adeli, Yan Wang, Le Lu, Alan L Yuille, and Yuyin Zhou. Transunet: Transformers make strong encoders for medical image segmentation. arXiv preprint arXiv:2102.04306, 2021."},{"key":"e_1_3_2_1_8_1","first-page":"834","volume-title":"Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs","author":"Chen Liang-Chieh","year":"2017","unstructured":"Liang-Chieh Chen, George Papandreou, Iasonas Kokkinos, Kevin Murphy, and Alan L Yuille. Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. volume 40, pages 834--848. IEEE, 2017."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00264"},{"key":"e_1_3_2_1_10_1","volume-title":"Skin lesion analysis toward melanoma detection 2018: A challenge hosted by the international skin imaging collaboration (isic). arXiv preprint arXiv:1902.03368","author":"Codella Noel","year":"2019","unstructured":"Noel Codella, Veronica Rotemberg, Philipp Tschandl, M Emre Celebi, Stephen Dusza, David Gutman, Brian Helba, Aadi Kalloo, Konstantinos Liopyris, Michael Marchetti, et al. Skin lesion analysis toward melanoma detection 2018: A challenge hosted by the international skin imaging collaboration (isic). arXiv preprint arXiv:1902.03368, 2019."},{"key":"e_1_3_2_1_11_1","first-page":"1","volume-title":"Neural Computing and Applications","author":"Dan Jun","year":"2022","unstructured":"Jun Dan, Tao Jin, Hao Chi, Shunjie Dong, and Yixuan Shen. Uncertainty-guided joint unbalanced optimal transport for unsupervised domain adaptation. Neural Computing and Applications, pages 1--17, 2022."},{"key":"e_1_3_2_1_12_1","volume-title":"An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929","author":"Dosovitskiy A.","year":"2020","unstructured":"A. Dosovitskiy, L. Beyer, A. Kolesnikov, D. Weissenborn, X. Zhai, T. Unterthiner, M. Dehghani, M. Minderer, G. Heigold, and S. Gelly. An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929, 2020."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59710-8_55"},{"key":"e_1_3_2_1_14_1","first-page":"1055","volume-title":"IEEE International Conference on Acoustics, Speech and Signal Processing","author":"Huang Huimin","year":"2020","unstructured":"Huimin Huang, Lanfen Lin, Ruofeng Tong, Hongjie Hu, Qiaowei Zhang, Yutaro Iwamoto, Xianhua Han, Yen-Wei Chen, and Jian Wu. U-net 3: A full-scale connected u-net for medical image segmentation. In IEEE International Conference on Acoustics, Speech and Signal Processing, pages 1055--1059. IEEE, 2020."},{"key":"e_1_3_2_1_15_1","volume-title":"Temporal ensembling for semi-supervised learning. arXiv preprint arXiv:1610.02242","author":"Laine Samuli","year":"2016","unstructured":"Samuli Laine and Timo Aila. Temporal ensembling for semi-supervised learning. arXiv preprint arXiv:1610.02242, 2016."},{"key":"e_1_3_2_1_16_1","volume-title":"Semi-supervised skin lesion segmentation via transformation consistent self-ensembling model. arXiv preprint arXiv:1808.03887","author":"Li Xiaomeng","year":"2018","unstructured":"Xiaomeng Li, Lequan Yu, Hao Chen, Chi-Wing Fu, and Pheng-Ann Heng. Semi-supervised skin lesion segmentation via transformation consistent self-ensembling model. arXiv preprint arXiv:1808.03887, 2018."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2017.07.005"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"e_1_3_2_1_19_1","volume-title":"Semi-supervised medical image segmentation via cross teaching between cnn and transformer. arXiv preprint arXiv:2112.04894","author":"Luo X.","year":"2021","unstructured":"X. Luo, M. Hu, T.Song, G. Wang, and S. Zhang. Semi-supervised medical image segmentation via cross teaching between cnn and transformer. arXiv preprint arXiv:2112.04894, 2021."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-87196-3_30"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-00937-3_43"},{"key":"e_1_3_2_1_22_1","volume-title":"Matthew Lee, Mattias Heinrich, Kazunari Misawa, Kensaku Mori, Steven McDonagh, Nils Y Hammerla","author":"Oktay Ozan","year":"1804","unstructured":"Ozan Oktay, Jo Schlemper, Loic Le Folgoc, Matthew Lee, Mattias Heinrich, Kazunari Misawa, Kensaku Mori, Steven McDonagh, Nils Y Hammerla, Bernhard Kainz, et al. Attention u-net: Learning where to look for the pancreas. arXiv preprint arXiv:1804.03999, 2018."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01269"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01269"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00042"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_9"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_1_28_1","first-page":"30","article-title":"Attention is all you need","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. Attention is all you need. Advances in Neural Information Processing Systems, 30, 2017.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_29_1","volume-title":"Interpolation consistency training for semi-supervised learning. arXiv preprint arXiv:1903.03825","author":"Verma V.","year":"2019","unstructured":"V. Verma, K. Kawaguchi, A. Lamb, J. Kannala, Y. Bengio, and D. Lopez-Paz. Interpolation consistency training for semi-supervised learning. arXiv preprint arXiv:1903.03825, 2019."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00262"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59710-8_53"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-12053-4_37"},{"key":"e_1_3_2_1_33_1","volume-title":"Exploring smoothness and class-separation for semi-supervised medical image segmentation. arXiv preprint arXiv:2203.01324","author":"Wu Y.","year":"2022","unstructured":"Y. Wu, Z. Wu, Q. Wu, Z. Ge, and J. Cai. Exploring smoothness and class-separation for semi-supervised medical image segmentation. arXiv preprint arXiv:2203.01324, 2022."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093608"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00411"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-32245-8_67"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-66179-7_47"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-32226-7_17"},{"key":"e_1_3_2_1_39_1","volume-title":"Transformer-cnn cohort: semi-supervised semantic segmentation by the best of both students. arXiv preprint arXiv:2209.02178","author":"Zheng Xu","year":"2022","unstructured":"Xu Zheng, Yunhao Luo, Hao Wang, Chong Fu, and Lin Wang. Transformer-cnn cohort: semi-supervised semantic segmentation by the best of both students. arXiv preprint arXiv:2209.02178, 2022."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2019.00020"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2019.2959609"}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611821","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3611821","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T23:55:33Z","timestamp":1755820533000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3611821"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":41,"alternative-id":["10.1145\/3581783.3611821","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3611821","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}