{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T17:42:44Z","timestamp":1775324564922,"version":"3.50.1"},"reference-count":46,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,1]],"date-time":"2023-12-01T00:00:00Z","timestamp":1701388800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2020AAA0109500\/2020AAA0109501"],"award-info":[{"award-number":["2020AAA0109500\/2020AAA0109501"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Med. Imaging"],"published-print":{"date-parts":[[2023,12]]},"DOI":"10.1109\/tmi.2023.3294980","type":"journal-article","created":{"date-parts":[[2023,7,13]],"date-time":"2023-07-13T17:24:34Z","timestamp":1689269074000},"page":"3579-3589","source":"Crossref","is-referenced-by-count":34,"title":["Improving Medical Vision-Language Contrastive Pretraining With Semantics-Aware Triage"],"prefix":"10.1109","volume":"42","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2165-245X","authenticated-orcid":false,"given":"Bo","family":"Liu","sequence":"first","affiliation":[{"name":"Department of Computing, The Hong Kong Polytechnic University, Hong Kong, SAR, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8399-7410","authenticated-orcid":false,"given":"Donghuan","family":"Lu","sequence":"additional","affiliation":[{"name":"Tencent Jarvis Laboratory, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5969-6987","authenticated-orcid":false,"given":"Dong","family":"Wei","sequence":"additional","affiliation":[{"name":"Tencent Jarvis Laboratory, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1118-9710","authenticated-orcid":false,"given":"Xian","family":"Wu","sequence":"additional","affiliation":[{"name":"Tencent Jarvis Laboratory, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8843-8685","authenticated-orcid":false,"given":"Yan","family":"Wang","sequence":"additional","affiliation":[{"name":"College of Computer Science, Sichuan University, Chengdu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5743-8861","authenticated-orcid":false,"given":"Yu","family":"Zhang","sequence":"additional","affiliation":[{"name":"Tencent HealthCare, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2195-2847","authenticated-orcid":false,"given":"Yefeng","family":"Zheng","sequence":"additional","affiliation":[{"name":"Tencent Jarvis Laboratory, Shenzhen, China"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"SIIM-ACR Pneumothorax Segmentation","year":"2019"},{"key":"ref2","first-page":"23716","article-title":"Flamingo: A visual language model for few-shot learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Alayrac"},{"key":"ref3","article-title":"Publicly available clinical BERT embeddings","author":"Alsentzer","year":"2019","journal-title":"arXiv:1904.03323"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20059-5_1"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.3390\/info11020125"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_9"},{"key":"ref7","first-page":"9912","article-title":"Unsupervised learning of visual features by contrasting cluster assignments","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Caron"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-59713-9_51"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00791"},{"issue":"12","key":"ref10","first-page":"2292","article-title":"Sinkhorn distances: Lightspeed computation of optimal transport","volume-title":"Proc. Int. Conf. Neural. Infor. Process. Syst. (NIPS)","volume":"26","author":"Cuturi"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01101"},{"key":"ref12","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv:1810.04805"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00945"},{"key":"ref14","article-title":"VSE++: Improving visual-semantic embeddings with hard negatives","author":"Faghri","year":"2017","journal-title":"arXiv:1707.05612"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ISBI.2019.8759236"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00428"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref18","article-title":"Unsupervised multimodal representation learning across medical images and reports","volume-title":"arXiv:1811.08615","author":"Hsu","year":"2018"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00391"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00106"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301590"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1240"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-019-0322-0"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1038\/sdata.2016.35"},{"key":"ref25","first-page":"18661","article-title":"Supervised contrastive learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Khosla"},{"key":"ref26","article-title":"Unifying visual-semantic embeddings with multimodal neural language models","author":"Kiros","year":"2014","journal-title":"arXiv:1411.2539"},{"key":"ref27","first-page":"12888","article-title":"BLIP: Bootstrapping language-image pre-training for unified vision-language understanding and generation","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Li"},{"key":"ref28","article-title":"Prototypical contrastive learning of unsupervised representations","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Li"},{"key":"ref29","article-title":"Supervision exists everywhere: A data efficient contrastive language-image pre-training paradigm","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Li"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i10.17034"},{"key":"ref31","article-title":"Joint learning of localized representations from medical images and reports","volume-title":"arXiv:2112.02889","author":"M\u00fcller","year":"2021"},{"key":"ref32","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford"},{"key":"ref33","article-title":"Transfusion: Understanding transfer learning for medical imaging","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Raghu"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-24574-4_28"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1148\/ryai.2019180041"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-00919-9_29"},{"key":"ref38","article-title":"Representation learning with contrastive predictive coding","author":"van den Oord","year":"2018","journal-title":"arXiv:1807.03748"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.3389\/fncom.2019.00056"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.369"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00943"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1145\/3233547.3233573"},{"key":"ref44","article-title":"Contrastive learning of medical visual representations from paired images and text","author":"Zhang","year":"2020","journal-title":"arXiv:2010.00747"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.7005"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1002\/mp.13300"}],"container-title":["IEEE Transactions on Medical Imaging"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/42\/10336247\/10182304.pdf?arnumber=10182304","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,4,13]],"date-time":"2024-04-13T04:13:24Z","timestamp":1712981604000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10182304\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12]]},"references-count":46,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tmi.2023.3294980","relation":{},"ISSN":["0278-0062","1558-254X"],"issn-type":[{"value":"0278-0062","type":"print"},{"value":"1558-254X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,12]]}}}