{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:46:51Z","timestamp":1765309611079,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":58,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62475072"],"award-info":[{"award-number":["62475072"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["22S31905800"],"award-info":[{"award-number":["22S31905800"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Science and Technology Commission of Shanghai Municipality","award":["22DZ2229004"],"award-info":[{"award-number":["22DZ2229004"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755262","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:26:38Z","timestamp":1761377198000},"page":"8018-8027","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Efficient Multi-Slide Visual-Language Feature Fusion for Placental Disease Classification"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-7443-3403","authenticated-orcid":false,"given":"Hang","family":"Guo","sequence":"first","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9987-5661","authenticated-orcid":false,"given":"Qing","family":"Zhang","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7609-3731","authenticated-orcid":false,"given":"Zixuan","family":"Gao","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4681-0431","authenticated-orcid":false,"given":"Siyuan","family":"Yang","sequence":"additional","affiliation":[{"name":"Nanyang Technological University, Singapore, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6408-2863","authenticated-orcid":false,"given":"Shulin","family":"Peng","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1052-118X","authenticated-orcid":false,"given":"Xiang","family":"Tao","sequence":"additional","affiliation":[{"name":"Obstetrics and Gynecology Hospital of Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7109-3826","authenticated-orcid":false,"given":"Ting","family":"Yu","sequence":"additional","affiliation":[{"name":"Obstetrics and Gynecology Hospital of Fudan University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1592-9627","authenticated-orcid":false,"given":"Yan","family":"Wang","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5063-8801","authenticated-orcid":false,"given":"Qingli","family":"Li","sequence":"additional","affiliation":[{"name":"East China Normal University, Shanghai, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41568-021-00408-3"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2023.3337549"},{"key":"e_1_3_2_2_3_1","volume-title":"European Conference on Computer Vision. Springer, 401-417","author":"Chen Pingyi","year":"2024","unstructured":"Pingyi Chen, Chenglu Zhu, Sunyi Zheng, Honglin Li, and Lin Yang. 2024b. Wsi-vqa: Interpreting whole slide images by generative visual question answering. In European Conference on Computer Vision. Springer, 401-417."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01567"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-024-02857-3"},{"key":"e_1_3_2_2_6_1","volume-title":"Heounjeong Go, Soo Jeong Nam, and Sang Hyun Park.","author":"Chikontwe Philip","year":"2024","unstructured":"Philip Chikontwe, Meejeong Kim, Jaehoon Jeong, Hyun Jung Sung, Heounjeong Go, Soo Jeong Nam, and Sang Hyun Park. 2024. FR-MIL: Distribution re-calibration based multiple instance learning with transformer for whole slide image classification. IEEE Transactions on Medical Imaging (2024)."},{"key":"e_1_3_2_2_7_1","first-page":"4171","volume-title":"Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers). 4171-4186."},{"key":"e_1_3_2_2_8_1","unstructured":"Tong Ding Sophia J Wagner Andrew H Song Richard J Chen Ming Y Lu Andrew Zhang Anurag J Vaidya Guillaume Jaume Muhammad Shaban Ahrong Kim et al. 2024. Multimodal whole slide foundation model for pathology. arXiv preprint arXiv:2411.19666 (2024)."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681534"},{"key":"e_1_3_2_2_10_1","first-page":"594","article-title":"Structured State Space Models for Multiple Instance Learning in Digital Pathology","volume":"2023","author":"Fillioux Leo","year":"2023","unstructured":"Leo Fillioux, Joseph Boyd, Maria Vakalopoulou, Paul-Henry Courn\u00e8de, and Stergios Christodoulidis. 2023. Structured State Space Models for Multiple Instance Learning in Digital Pathology. In Medical Image Computing and Computer Assisted Intervention - MICCAI 2023. 594-604.","journal-title":"Medical Image Computing and Computer Assisted Intervention - MICCAI"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.placenta.2023.03.003"},{"key":"e_1_3_2_2_12_1","volume-title":"Unleash the Power of State Space Model for Whole Slide Image with Local Aware Scanning and Importance Resampling","author":"Huang Yanyan","year":"2024","unstructured":"Yanyan Huang, Weiqin Zhao, Yu Fu, Lingting Zhu, and Lequan Yu. 2024. Unleash the Power of State Space Model for Whole Slide Image with Local Aware Scanning and Importance Resampling. IEEE Transactions on Medical Imaging (2024)."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01952"},{"key":"e_1_3_2_2_14_1","volume-title":"International conference on machine learning. PMLR, 2127-2136","author":"Ilse Maximilian","year":"2018","unstructured":"Maximilian Ilse, Jakub Tomczak, and Max Welling. 2018. Attention-based deep multiple instance learning. In International conference on machine learning. PMLR, 2127-2136."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.4103\/2153-3539.186902"},{"key":"e_1_3_2_2_16_1","volume-title":"HistoQC: an open-source quality control tool for digital pathology slides. JCO clinical cancer informatics","author":"Janowczyk Andrew","year":"2019","unstructured":"Andrew Janowczyk, Ren Zuo, Hannah Gilmore, Michael Feldman, and Anant Madabhushi. 2019. HistoQC: an open-source quality control tool for digital pathology slides. JCO clinical cancer informatics, Vol. 3 (2019), 1-7."},{"key":"e_1_3_2_2_17_1","volume-title":"Chulhong Kim, and Yosep Chong.","author":"Kim Hyeongsub","year":"2021","unstructured":"Hyeongsub Kim, Hongjoon Yoon, Nishant Thakur, Gyoyeon Hwang, Eun Jung Lee, Chulhong Kim, and Yosep Chong. 2021. Deep learning-based histopathological segmentation for whole slide images of colorectal cancer in a compressed domain. Scientific reports, Vol. 11, 1 (2021), 22520."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01409"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-43904-9_7"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01083"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-87237-3_51"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01076"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3688801"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2024.103088"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72083-3_34"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3680882"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41591-024-02856-4"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01893"},{"key":"e_1_3_2_2_30_1","volume-title":"Tiffany Y Chen, Richard J Chen, Matteo Barbieri, and Faisal Mahmood.","author":"Lu Ming Y","year":"2021","unstructured":"Ming Y Lu, Drew FK Williamson, Tiffany Y Chen, Richard J Chen, Matteo Barbieri, and Faisal Mahmood. 2021. Data-efficient and weakly supervised computational pathology on whole-slide images. Nature biomedical engineering, Vol. 5, 6 (2021), 555-570."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41374-021-00579-5"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.62037\/cjer.2024.04.01.03"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1002\/jmri.28787"},{"key":"e_1_3_2_2_34_1","first-page":"67551","article-title":"The rise of ai language pathologists: Exploring two-level prompt learning for few-shot weakly-supervised whole slide image classification","volume":"36","author":"Qu Linhao","year":"2023","unstructured":"Linhao Qu, Kexue Fu, Manning Wang, Zhijian Song, et al., 2023. The rise of ai language pathologists: Exploring two-level prompt learning for few-shot weakly-supervised whole slide image classification. Advances in Neural Information Processing Systems, Vol. 36 (2023), 67551-67564.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_35_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_2_36_1","volume-title":"Transmil: Transformer based correlated multiple instance learning for whole slide image classification. Advances in neural information processing systems","author":"Shao Zhuchen","year":"2021","unstructured":"Zhuchen Shao, Hao Bian, Yang Chen, Yifeng Wang, Jian Zhang, Xiangyang Ji, et al., 2021. Transmil: Transformer based correlated multiple instance learning for whole slide image classification. Advances in neural information processing systems, Vol. 34 (2021), 2136-2147."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i2.25315"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.102113"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.media.2024.103294"},{"key":"e_1_3_2_2_40_1","volume-title":"CoD-MIL: Chain-of-Diagnosis Prompting Multiple Instance Learning for Whole Slide Image Classification","author":"Shi Jiangbo","year":"2024","unstructured":"Jiangbo Shi, Chen Li, Tieliang Gong, Chunbao Wang, and Huazhu Fu. 2024b. CoD-MIL: Chain-of-Diagnosis Prompting Multiple Instance Learning for Whole Slide Image Classification. IEEE Transactions on Medical Imaging (2024)."},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01069"},{"key":"e_1_3_2_2_42_1","volume-title":"A low-rank matching attention based cross-modal feature fusion method for conversational emotion recognition","author":"Shou Yuntao","year":"2024","unstructured":"Yuntao Shou, Huan Liu, Xiangyong Cao, Deyu Meng, and Bo Dong. 2024. A low-rank matching attention based cross-modal feature fusion method for conversational emotion recognition. IEEE Transactions on Affective Computing (2024)."},{"volume-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","author":"Shu Tong","key":"e_1_3_2_2_43_1","unstructured":"Tong Shu, Jun Shi, Dongdong Sun, Zhiguo Jiang, and Yushan Zheng. 2024. SlideGCD: Slide-Based Graph Collaborative Training with Knowledge Distillation for Whole Slide Image Classification. In International Conference on Medical Image Computing and Computer-Assisted Intervention. Springer, 470-480."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01099"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1007\/s43032-023-01344-3"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.5114\/wo.2014.47136"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-024-46986-2"},{"key":"e_1_3_2_2_48_1","volume-title":"Virchow: A million-slide digital pathology foundation model. arXiv preprint arXiv:2309.07778","author":"Vorontsov Eugene","year":"2023","unstructured":"Eugene Vorontsov, Alican Bozkurt, Adam Casson, George Shaikovski, Michal Zelechowski, Siqi Liu, Kristen Severson, Eric Zimmermann, James Hall, Neil Tenenholtz, et al., 2023. Virchow: A million-slide digital pathology foundation model. arXiv preprint arXiv:2309.07778 (2023)."},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"crossref","unstructured":"Eugene Vorontsov Alican Bozkurt Adam Casson George Shaikovski Michal Zelechowski Kristen Severson Eric Zimmermann James Hall Neil Tenenholtz Nicolo Fusi et al. 2024. A foundation model for clinical-grade computational pathology and rare cancers detection. Nature medicine Vol. 30 10 (2024) 2924-2935.","DOI":"10.1038\/s41591-024-03141-0"},{"key":"e_1_3_2_2_50_1","volume-title":"Pan-cancer Histopathology WSI Pre-training with Position-aware Masked Autoencoder","author":"Wu Kun","year":"2024","unstructured":"Kun Wu, Zhiguo Jiang, Kunming Tang, Jun Shi, Fengying Xie, Wei Wang, Haibo Wu, and Yushan Zheng. 2024. Pan-cancer Histopathology WSI Pre-training with Position-aware Masked Autoencoder. IEEE Transactions on Medical Imaging (2024)."},{"key":"e_1_3_2_2_51_1","unstructured":"Jinxi Xiang Xiyue Wang Xiaoming Zhang Yinghua Xi Feyisope Eweje Yijiang Chen Yuchen Li Colin Bergstrom Matthew Gopaulchan Ted Kim et al. 2025. A vision-language foundation model for precision oncology. Nature (2025) 1-10."},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","unstructured":"Ling Xitong Lei Yuanyuan Li Jiawen Cheng Junru Huang Wenting Guan Tian Guan Jian and He Yonghong. 2025. Camelyon. doi:10.57760\/sciencedb.16442","DOI":"10.57760\/sciencedb.16442"},{"key":"e_1_3_2_2_53_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72083-3_28"},{"key":"e_1_3_2_2_54_1","first-page":"3452176","article-title":"Deep Learning Algorithm-Based Ultrasound Image Information in Diagnosis and Treatment of Pernicious Placenta Previa","volume":"2022","author":"Yang Xiao","year":"2022","unstructured":"Xiao Yang, Zheng Chen, and Xiaozhou Jia. 2022. Deep Learning Algorithm-Based Ultrasound Image Information in Diagnosis and Treatment of Pernicious Placenta Previa. Computational and Mathematical Methods in Medicine, Vol. 2022, 1 (2022), 3452176.","journal-title":"Computational and Mathematical Methods in Medicine"},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01824"},{"key":"e_1_3_2_2_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2023.3264781"},{"key":"e_1_3_2_2_57_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72083-3_35"},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBIOM.2024.3384704"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755262","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:43:49Z","timestamp":1765309429000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755262"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":58,"alternative-id":["10.1145\/3746027.3755262","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755262","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}