{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T16:44:40Z","timestamp":1779381880044,"version":"3.53.1"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,10,17]],"date-time":"2021-10-17T00:00:00Z","timestamp":1634428800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"BJNSF","award":["4202033"],"award-info":[{"award-number":["4202033"]}]},{"name":"the Pharmaceutical Collaborative Innovation Research Project of Beijing Science and Technology Commission","award":["Z191100007719002"],"award-info":[{"award-number":["Z191100007719002"]}]},{"name":"BJNSFC Haidian Original Innovation Joint Fund","award":["19L2062"],"award-info":[{"award-number":["19L2062"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,10,17]]},"DOI":"10.1145\/3474085.3475418","type":"proceedings-article","created":{"date-parts":[[2021,10,18]],"date-time":"2021-10-18T06:35:51Z","timestamp":1634538951000},"page":"2474-2482","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":43,"title":["Multi-Modal Multi-Instance Learning for Retinal Disease Recognition"],"prefix":"10.1145","author":[{"given":"Xirong","family":"Li","sequence":"first","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yang","family":"Zhou","sequence":"additional","affiliation":[{"name":"Beijing Visionary Intelligence Ltd., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jie","family":"Wang","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hailan","family":"Lin","sequence":"additional","affiliation":[{"name":"Renmin University of China, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jianchun","family":"Zhao","sequence":"additional","affiliation":[{"name":"Beijing Visionary Intelligence Ltd., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Dayong","family":"Ding","sequence":"additional","affiliation":[{"name":"Beijing Visionary Intelligence Ltd., Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Weihong","family":"Yu","sequence":"additional","affiliation":[{"name":"Peking Union Medical College Hospital, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Youxin","family":"Chen","sequence":"additional","affiliation":[{"name":"Peking Union Medical College Hospital, Beijing, China"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2021,10,17]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Jamie Ryan Kiros, and Geoffrey E Hinton","author":"Ba Jimmy Lei","year":"2016","unstructured":"Jimmy Lei Ba , Jamie Ryan Kiros, and Geoffrey E Hinton . 2016 . Layer normalization. arXiv preprint arXiv:1607.06450 (2016). Jimmy Lei Ba, Jamie Ryan Kiros, and Geoffrey E Hinton. 2016. Layer normalization. arXiv preprint arXiv:1607.06450 (2016)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1001\/jamaophthalmol.2017.3782"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"crossref","unstructured":"Joao Carreira and Andrew Zisserman. 2017. Quo Vadis Action Recognition? A New Model and the Kinetics Dataset. In CVPR.  Joao Carreira and Andrew Zisserman. 2017. Quo Vadis Action Recognition? A New Model and the Kinetics Dataset. In CVPR.","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Hila Chefer Shir Gur and Lior Wolf. 2021. Transformer Interpretability Beyond Attention Visualization. In CVPR.  Hila Chefer Shir Gur and Lior Wolf. 2021. Transformer Interpretability Beyond Attention Visualization. In CVPR.","DOI":"10.1109\/CVPR46437.2021.00084"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413869"},{"key":"e_1_3_2_1_6_1","unstructured":"Siying Dai Leiting Chen Ting Lei Chuan Zhou and Yang Wen. 2020. Automatic Detection Of Pathological Myopia And High Myopia On Fundus Images. In ICME.  Siying Dai Leiting Chen Ting Lei Chuan Zhou and Yang Wen. 2020. Automatic Detection Of Pathological Myopia And High Myopia On Fundus Images. In ICME."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"crossref","unstructured":"Jeffrey De Fauw Joseph R Ledsam Bernardino Romera-Paredes Stanislav Nikolov Nenad Tomasev Sam Blackwell Harry Askham Xavier Glorot Brendan O'Donoghue Daniel Visentin etal 2018. Clinically applicable deep learning for diagnosis and referral in retinal disease. Nature medicine Vol. 24 9 (2018) 1342--1350.  Jeffrey De Fauw Joseph R Ledsam Bernardino Romera-Paredes Stanislav Nikolov Nenad Tomasev Sam Blackwell Harry Askham Xavier Glorot Brendan O'Donoghue Daniel Visentin et al. 2018. Clinically applicable deep learning for diagnosis and referral in retinal disease. Nature medicine Vol. 24 9 (2018) 1342--1350.","DOI":"10.1038\/s41591-018-0107-6"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ophtha.2017.02.008"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413658"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1001\/jama.2016.17216"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"crossref","unstructured":"Kensho Hara Hirokatsu Kataoka and Yutaka Satoh. 2018. Can Spatiotemporal 3D CNNs Retrace the History of 2D CNNs and ImageNet?. In CVPR.  Kensho Hara Hirokatsu Kataoka and Yutaka Satoh. 2018. Can Spatiotemporal 3D CNNs Retrace the History of 2D CNNs and ImageNet?. In CVPR.","DOI":"10.1109\/CVPR.2018.00685"},{"key":"e_1_3_2_1_12_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR.  Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cell.2018.02.010"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.oret.2016.12.009"},{"key":"e_1_3_2_1_15_1","volume-title":"Development and evaluation of a deep learning model for the detection of multiple fundus diseases based on color fundus photography. British Journal of Ophthalmology","author":"Li Bing","year":"2021","unstructured":"Bing Li , Huan Chen , Bilei Zhang , Mingzhen Yuan , Xuemin Jin , Bo Lei , Jie Xu , Wei Gu , David Wong , Xixi He , Hao Wang , Dayong Ding , Xirong Li , Weihong Yu , and Youxin Chen . 2021. Development and evaluation of a deep learning model for the detection of multiple fundus diseases based on color fundus photography. British Journal of Ophthalmology ( 2021 ). Bing Li, Huan Chen, Bilei Zhang, Mingzhen Yuan, Xuemin Jin, Bo Lei, Jie Xu, Wei Gu, David Wong, Xixi He, Hao Wang, Dayong Ding, Xirong Li, Weihong Yu, and Youxin Chen. 2021. Development and evaluation of a deep learning model for the detection of multiple fundus diseases based on color fundus photography. British Journal of Ophthalmology (2021)."},{"key":"e_1_3_2_1_16_1","unstructured":"Xirong Li Wencui Wan Yang Zhou Jianchun Zhao Qijie Wei Junbo Rong Pengyi Zhou Limin Xu Lijuan Lang Yuying Liu Chengzhi Niu Dayong Ding and Xuemin Jin. 2020. Deep Multiple Instance Learning with Spatial Attention for ROP Case Classification Instance Selection and Abnormality Localization. In ICPR.  Xirong Li Wencui Wan Yang Zhou Jianchun Zhao Qijie Wei Junbo Rong Pengyi Zhou Limin Xu Lijuan Lang Yuying Liu Chengzhi Niu Dayong Ding and Xuemin Jin. 2020. Deep Multiple Instance Learning with Spatial Attention for ROP Case Classification Instance Selection and Abnormality Localization. In ICPR."},{"key":"e_1_3_2_1_17_1","unstructured":"Ilse Maximilian Tomczak Jakub and Welling Max. 2018. Attention-based Deep Multiple Instance Learning. In ICML.  Ilse Maximilian Tomczak Jakub and Welling Max. 2018. Attention-based Deep Multiple Instance Learning. In ICML."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3455008"},{"key":"e_1_3_2_1_19_1","volume-title":"Super-convergence: Very fast training of neural networks using large learning rates. In Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications.","author":"Smith Leslie N","year":"2019","unstructured":"Leslie N Smith and Nicholay Topin . 2019 . Super-convergence: Very fast training of neural networks using large learning rates. In Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications. Leslie N Smith and Nicholay Topin. 2019. Super-convergence: Very fast training of neural networks using large learning rates. In Artificial Intelligence and Machine Learning for Multi-Domain Operations Applications."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.5555\/3295222.3295349"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Weisen Wang Zhiyan Xu Weihong Yu Jianchun Zhao Jingyuan Yang Feng He Zhikun Yang Di Chen Dayong Ding Youxin Chen and Xirong Li. 2019. Two-Stream CNN with Loose Pair Training for Multi-modal AMD Categorization. In MICCAI.  Weisen Wang Zhiyan Xu Weihong Yu Jianchun Zhao Jingyuan Yang Feng He Zhikun Yang Di Chen Dayong Ding Youxin Chen and Xirong Li. 2019. Two-Stream CNN with Loose Pair Training for Multi-modal AMD Categorization. In MICCAI.","DOI":"10.1007\/978-3-030-32239-7_18"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Xi Wei Tianzhu Zhang Yan Li Yongdong Zhang and Feng Wu. 2020. Multi-Modality Cross Attention Network for Image and Sentence Matching. In CVPR.  Xi Wei Tianzhu Zhang Yan Li Yongdong Zhang and Feng Wu. 2020. Multi-Modality Cross Attention Network for Image and Sentence Matching. In CVPR.","DOI":"10.1109\/CVPR42600.2020.01095"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"crossref","unstructured":"Jun Wu Yao Zhang Jie Wang Jianchun Zhao Dayong Ding Ningjiang Chen Lingling Wang Xuan Chen Chunhui Jiang Xuan Zou Xing Liu Hui Xiao Yuan Tian Zongjiang Shang Kaiwei Wang Xirong Li Gang Yang and Jianping Fan. 2020. AttenNet: Deep Attention Based Retinal Disease Classification in OCT Images. In MMM.  Jun Wu Yao Zhang Jie Wang Jianchun Zhao Dayong Ding Ningjiang Chen Lingling Wang Xuan Chen Chunhui Jiang Xuan Zou Xing Liu Hui Xiao Yuan Tian Zongjiang Shang Kaiwei Wang Xirong Li Gang Yang and Jianping Fan. 2020. AttenNet: Deep Attention Based Retinal Disease Classification in OCT Images. In MMM.","DOI":"10.1007\/978-3-030-37734-2_46"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"crossref","unstructured":"Gang Yang Fan Li Dayong Ding Jun Wu and Jie Xu. 2021. Automatic Diagnosis of Glaucoma on Color Fundus Images Using Adaptive Mask Deep Network. In MMM.  Gang Yang Fan Li Dayong Ding Jun Wu and Jie Xu. 2021. Automatic Diagnosis of Glaucoma on Color Fundus Images Using Adaptive Mask Deep Network. In MMM.","DOI":"10.1007\/978-3-030-67835-7_9"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2671188.2749406"}],"event":{"name":"MM '21: ACM Multimedia Conference","location":"Virtual Event China","acronym":"MM '21","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 29th ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475418","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3474085.3475418","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:48:32Z","timestamp":1750193312000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3474085.3475418"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,17]]},"references-count":25,"alternative-id":["10.1145\/3474085.3475418","10.1145\/3474085"],"URL":"https:\/\/doi.org\/10.1145\/3474085.3475418","relation":{},"subject":[],"published":{"date-parts":[[2021,10,17]]},"assertion":[{"value":"2021-10-17","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}