{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T07:00:12Z","timestamp":1774508412544,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,10,26]],"date-time":"2023-10-26T00:00:00Z","timestamp":1698278400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,10,26]]},"DOI":"10.1145\/3581783.3612056","type":"proceedings-article","created":{"date-parts":[[2023,10,27]],"date-time":"2023-10-27T07:27:30Z","timestamp":1698391650000},"page":"3384-3393","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":29,"title":["ScribbleVC: Scribble-supervised Medical Image Segmentation with Vision-Class Embedding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-3839-0611","authenticated-orcid":false,"given":"Zihan","family":"Li","sequence":"first","affiliation":[{"name":"Xiamen University &amp; University of Washington, Xiamen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8929-020X","authenticated-orcid":false,"given":"Yuan","family":"Zheng","sequence":"additional","affiliation":[{"name":"Xiamen University, Xiamen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8574-0005","authenticated-orcid":false,"given":"Xiangde","family":"Luo","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6389-7428","authenticated-orcid":false,"given":"Dandan","family":"Shan","sequence":"additional","affiliation":[{"name":"Xiamen University, Xiamen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9996-6870","authenticated-orcid":false,"given":"Qingqi","family":"Hong","sequence":"additional","affiliation":[{"name":"Xiamen University &amp; Hong Kong Centre for Cerebro-Cardiovascular Health Engineering, Xiamen, China"}]}],"member":"320","published-online":{"date-parts":[[2023,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Segnet: a deep convolutional encoder-decoder architecture for image segmentation","author":"Badrinarayanan Vijay","unstructured":"Vijay Badrinarayanan, Alex Kendall, and Roberto Cipolla. 2017. Segnet: a deep convolutional encoder-decoder architecture for image segmentation. IEEE transactions on pattern analysis and machine intelligence, 39, 12, 2481--2495."},{"key":"e_1_3_2_1_2_1","volume-title":"International Workshop on Statistical Atlases and Computational Models of the Heart. Springer, 111--119","author":"Baumgartner Christian F","year":"2017","unstructured":"Christian F Baumgartner, Lisa M Koch, Marc Pollefeys, and Ender Konukoglu. 2017. An exploration of 2d and 3d deep learning techniques for cardiac mr image segmentation. In International Workshop on Statistical Atlases and Computational Models of the Heart. Springer, 111--119."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Olivier Bernard et al. 2018. Deep learning techniques for automatic mri cardiac multi-structures segmentation and diagnosis: is the problem solved? IEEE transactions on medical imaging 37 11 2514--2525.","DOI":"10.1109\/TMI.2018.2837502"},{"key":"e_1_3_2_1_4_1","volume-title":"Deep Learning in Medical Image Analysis and Multimodal Learning for Clinical Decision Support","author":"Can Yigit B","unstructured":"Yigit B Can, Krishna Chaitanya, Basil Mustafa, Lisa M Koch, Ender Konukoglu, and Christian F Baumgartner. 2018. Learning to segment medical images with scribble-supervision alone. In Deep Learning in Medical Image Analysis and Multimodal Learning for Clinical Decision Support. Springer, 236--244."},{"key":"e_1_3_2_1_5_1","unstructured":"Hu Cao Yueyue Wang Joy Chen Dongsheng Jiang Xiaopeng Zhang Qi Tian and Manning Wang. 2021. Swin-unet: unet-like pure transformer for medical image segmentation. arXiv preprint arXiv:2105.05537."},{"key":"e_1_3_2_1_6_1","unstructured":"Jieneng Chen Yongyi Lu Qihang Yu Xiangde Luo Ehsan Adeli Yan Wang Le Lu Alan L Yuille and Yuyin Zhou. 2021. Transunet: transformers make strong encoders for medical image segmentation. arXiv preprint arXiv:2102.04306."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46723-8_49"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10278-013-9622-7"},{"key":"e_1_3_2_1_9_1","unstructured":"Terrance DeVries and Graham W Taylor. 2017. Improved regularization of convolutional neural networks with cutout. arXiv preprint arXiv:1708.04552."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2021.3059453"},{"key":"e_1_3_2_1_11_1","volume-title":"Random walks for image segmentation","author":"Grady Leo","unstructured":"Leo Grady. 2006. Random walks for image segmentation. IEEE transactions on pattern analysis and machine intelligence, 28, 11, 1768--1783."},{"key":"e_1_3_2_1_12_1","unstructured":"Yves Grandvalet and Yoshua Bengio. 2004. Semi-supervised learning by entropy minimization. Advances in neural information processing systems 17."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00181"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2023.3280646"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01001"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00391"},{"key":"e_1_3_2_1_17_1","volume-title":"Jens Petersen, and Klaus H Maier-Hein.","author":"Isensee Fabian","year":"2021","unstructured":"Fabian Isensee, Paul F Jaeger, Simon AA Kohl, Jens Petersen, and Klaus H Maier-Hein. 2021. Nnu-net: a self-configuring method for deep learning-based biomedical image segmentation. Nature methods, 18, 2, 203--211."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-32248-9_20"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2941265"},{"key":"e_1_3_2_1_20_1","unstructured":"Jang-Hyun Kim Wonho Choo Hosan Jeong and Hyun Oh Song. 2021. Co-mixup: saliency guided joint mixup with supermodular diversity. arXiv preprint arXiv:2102.03065."},{"key":"e_1_3_2_1_21_1","volume-title":"International Conference on Machine Learning. PMLR, 5275--5285","author":"Kim Jang-Hyun","year":"2020","unstructured":"Jang-Hyun Kim, Wonho Choo, and Hyun Oh Song. 2020. Puzzle mix: exploiting saliency and local statistics for optimal mixup. In International Conference on Machine Learning. PMLR, 5275--5285."},{"key":"e_1_3_2_1_22_1","unstructured":"Alexander Kirillov et al. 2023. Segment anything. arXiv preprint arXiv:2304.02643."},{"key":"e_1_3_2_1_23_1","volume-title":"Post-dae: anatomically plausible segmentation via post-processing with denoising autoencoders","author":"Larrazabal Agostina J","unstructured":"Agostina J Larrazabal, C\u00e9sar Mart?nez, Ben Glocker, and Enzo Ferrante. 2020. Post-dae: anatomically plausible segmentation via post-processing with denoising autoencoders. IEEE transactions on medical imaging, 39, 12, 3813--3820."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59710-8_2"},{"key":"e_1_3_2_1_25_1","first-page":"6","article-title":"Chatdoctor: a medical chat model fine-tuned on a large language model meta-ai (llama) using medical domain knowledge","volume":"15","author":"Li Yunxiang","year":"2023","unstructured":"Yunxiang Li, Zihan Li, Kai Zhang, Ruilong Dan, Steve Jiang, and You Zhang. 2023. Chatdoctor: a medical chat model fine-tuned on a large language model meta-ai (llama) using medical domain knowledge. Cureus, 15, 6.","journal-title":"Cureus"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-15937-4_65"},{"key":"e_1_3_2_1_27_1","article-title":"Lvit: language meets vision transformer in medical image segmentation","author":"Li Zihan","year":"2023","unstructured":"Zihan Li, Yunxiang Li, Qingde Li, Puyang Wang, Dazhou Guo, Le Lu, Dakai Jin, You Zhang, and Qingqi Hong. 2023. Lvit: language meets vision transformer in medical image segmentation. IEEE Transactions on Medical Imaging.","journal-title":"IEEE Transactions on Medical Imaging."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.344"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Xiaoming Liu Quan Yuan Yaozong Gao Kelei He Shuo Wang Xiao Tang Jinshan Tang and Dinggang Shen. 2022. Weakly supervised segmentation of covid19 infection with scribble annotation on ct images. Pattern recognition 122 108341.","DOI":"10.1016\/j.patcog.2021.108341"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00754"},{"key":"e_1_3_2_1_31_1","unstructured":"Xiangde Luo Minhao Hu Wenjun Liao Shuwei Zhai Tao Song Guotai Wang and Shaoting Zhang. 2022. Scribble-supervised medical image segmentation via dual-branch network and dynamically mixed pseudo labels supervision. arXiv preprint arXiv:2203.02106."},{"key":"e_1_3_2_1_32_1","volume-title":"International Conference on Medical Imaging with Deep Learning. PMLR, 820--833","author":"Luo Xiangde","year":"2022","unstructured":"Xiangde Luo, Minhao Hu, Tao Song, Guotai Wang, and Shaoting Zhang. 2022. Semi-supervised medical image segmentation via cross teaching between cnn and transformer. In International Conference on Medical Imaging with Deep Learning. PMLR, 820--833."},{"key":"e_1_3_2_1_33_1","first-page":"318","article-title":"Efficient semi-supervised gross target volume of nasopharyngeal carcinoma segmentation via uncertainty rectified pyramid consistency","volume":"2021","author":"Luo Xiangde","year":"2021","unstructured":"Xiangde Luo, Wenjun Liao, Jieneng Chen, Tao Song, Yinan Chen, Shichuan Zhang, Nianyong Chen, Guotai Wang, and Shaoting Zhang. 2021. Efficient semi-supervised gross target volume of nasopharyngeal carcinoma segmentation via uncertainty rectified pyramid consistency. In Medical Image Computing and Computer Assisted Intervention - MICCAI 2021, 318--329.","journal-title":"Medical Image Computing and Computer Assisted Intervention - MICCAI"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00331"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Fausto Milletari Nassir Navab and Seyed-Ahmad Ahmadi. 2016. V-net: fully convolutional neural networks for volumetric medical image segmentation. In 2016 fourth international conference on 3D vision (3DV). Ieee 565--571.","DOI":"10.1109\/3DV.2016.79"},{"key":"e_1_3_2_1_36_1","volume-title":"M\u00e1rio AT Figueiredo, and Arlindo L Oliveira","author":"Monteiro Miguel","year":"2018","unstructured":"Miguel Monteiro, M\u00e1rio AT Figueiredo, and Arlindo L Oliveira. 2018. Conditional random fields as recurrent neural networks for 3d medical imaging segmentation. arXiv preprint arXiv:1807.07464."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Yehui Qiu Zihan Li Yining Wang Pei Dong Dijia Wu Xinnian Yang Qingqi Hong and Dinggang Shen. 2023. Corsegrec: a topology-preserving scheme for extracting fully-connected coronary arteries from ct angiography. In MICCAI.","DOI":"10.1007\/978-3-031-43898-1_64"},{"key":"e_1_3_2_1_38_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Alec","unstructured":"Alec Radford et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096683"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02007"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2021.3069634"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.541"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00443"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Yiqing Wang et al. 2023. Swinmm: masked multi-view with swin transformers for 3d medical image segmentation. In MICCAI.","DOI":"10.1007\/978-3-031-43898-1_47"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ymeth.2022.10.005"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2021.3089702"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01187"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00612"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"crossref","unstructured":"Hongyi Zhang Moustapha Cisse Yann N Dauphin and David Lopez-Paz. 2017. Mixup: beyond empirical risk minimization. arXiv preprint arXiv:1710.09412.","DOI":"10.1007\/978-1-4899-7687-1_79"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01136"},{"key":"e_1_3_2_1_54_1","unstructured":"Pengyi Zhang et al. 2020. Accl: adversarial constrained-cnn loss for weakly supervised medical image segmentation. arXiv preprint arXiv:2005.00328."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.179"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1038\/s42256-021-00425-9"},{"key":"e_1_3_2_1_57_1","volume-title":"Nima Tajbakhsh, and Jianming Liang.","author":"Zhou Zongwei","year":"2018","unstructured":"Zongwei Zhou, Md Mahfuzur Rahman Siddiquee, Nima Tajbakhsh, and Jianming Liang. 2018. Unet: a nested u-net architecture for medical image segmentation. In Deep learning in medical image analysis and multimodal learning for clinical decision support. Springer, 3--11."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46723-8_67"},{"key":"e_1_3_2_1_59_1","volume-title":"Multivariate mixture model for myocardial segmentation combining multi-source images","author":"Zhuang Xiahai","unstructured":"Xiahai Zhuang. 2018. Multivariate mixture model for myocardial segmentation combining multi-source images. IEEE transactions on pattern analysis and machine intelligence, 41, 12, 2933--2946."}],"event":{"name":"MM '23: The 31st ACM International Conference on Multimedia","location":"Ottawa ON Canada","acronym":"MM '23","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 31st ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612056","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3581783.3612056","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:02:41Z","timestamp":1755820961000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3581783.3612056"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,26]]},"references-count":59,"alternative-id":["10.1145\/3581783.3612056","10.1145\/3581783"],"URL":"https:\/\/doi.org\/10.1145\/3581783.3612056","relation":{},"subject":[],"published":{"date-parts":[[2023,10,26]]},"assertion":[{"value":"2023-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}