{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:21:21Z","timestamp":1750220481402,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":39,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,3,1]],"date-time":"2021-03-01T00:00:00Z","timestamp":1614556800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"NSF","award":["1122374"],"award-info":[{"award-number":["1122374"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,3,3]]},"DOI":"10.1145\/3442188.3445909","type":"proceedings-article","created":{"date-parts":[[2021,2,25]],"date-time":"2021-02-25T01:45:48Z","timestamp":1614217548000},"page":"458-465","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["A Pilot Study in Surveying Clinical Judgments to Evaluate Radiology Report Generation"],"prefix":"10.1145","author":[{"given":"William","family":"Boag","sequence":"first","affiliation":[{"name":"MIT, USA"}]},{"given":"Hassan","family":"Kan\u00e9","sequence":"additional","affiliation":[{"name":"WL Research, USA"}]},{"given":"Saumya","family":"Rawat","sequence":"additional","affiliation":[{"name":"MIT, USA"}]},{"given":"Jesse","family":"Wei","sequence":"additional","affiliation":[{"name":"Beth Israel Deaconess Medical Center, Department of Radiology, USA"}]},{"given":"Alexander","family":"Goehler","sequence":"additional","affiliation":[{"name":"Beth Israel Deaconess Medical Center, Department of Radiology, USA"}]}],"member":"320","published-online":{"date-parts":[[2021,3]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65--72","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65--72."},{"key":"e_1_3_2_1_2_1","volume-title":"MUTT: Metric Unit TesTing for Language Generation Tasks. In ACL (Berlin, Germany).","author":"Boag William","year":"2016","unstructured":"William Boag, Renan Campos, Kate Saenko, and Anna Rumshisky. 2016. MUTT: Metric Unit TesTing for Language Generation Tasks. In ACL (Berlin, Germany)."},{"key":"e_1_3_2_1_3_1","volume-title":"Baselines for Chest X-Ray Report Generation. Machine Learning for Health workshop at NeurIPS.","author":"Boag William","year":"2019","unstructured":"William Boag, Tzu-Ming Harry Hsu, Matthew McDermott, Gabriela Berner, Emily Alsentzer, Wei-Hung Weng, Peter Szolovits, and Marzyeh Ghassemi. 2019. Baselines for Chest X-Ray Report Generation. Machine Learning for Health workshop at NeurIPS."},{"key":"e_1_3_2_1_4_1","volume-title":"Padchest: A large chest x-ray image dataset with multi-label annotated reports. arXiv preprint arXiv:1901.07441","author":"Bustos Aurelia","year":"2019","unstructured":"Aurelia Bustos, Antonio Pertusa, Jose-Maria Salinas, and Maria de la Iglesia-Vay\u00e1. 2019. Padchest: A large chest x-ray image dataset with multi-label annotated reports. arXiv preprint arXiv:1901.07441 (2019)."},{"key":"e_1_3_2_1_5_1","volume-title":"11th Conference of the European Chapter of the Association for Computational Linguistics.","author":"Callison-Burch Chris","year":"2006","unstructured":"Chris Callison-Burch, Miles Osborne, and Philipp Koehn. 2006. Re-evaluation the role of bleu in machine translation research. In 11th Conference of the European Chapter of the Association for Computational Linguistics."},{"key":"e_1_3_2_1_6_1","unstructured":"Jeanne Sternlicht Chall and Edgar Dale. 1995. Readability revisited: the new Dale-Chall readability formula."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1264"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1093\/jamia\/ocv080"},{"key":"e_1_3_2_1_9_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","unstructured":"Rudolph Flesch. 1948. A new readability yardstick. 221--233 pages. https:\/\/doi.org\/10.1037\/h0057532","DOI":"10.1037\/h0057532"},{"key":"e_1_3_2_1_11_1","volume-title":"Producing radiologist-quality reports for interpretable artificial intelligence. arXiv preprint arXiv:1806.00340","author":"Gale William","year":"2018","unstructured":"William Gale, Luke Oakden-Rayner, Gustavo Carneiro, Andrew P Bradley, and Lyle J Palmer. 2018. Producing radiologist-quality reports for interpretable artificial intelligence. arXiv preprint arXiv:1806.00340 (2018)."},{"key":"e_1_3_2_1_12_1","unstructured":"Robert Gunning. 1968. The Technique of Clear Writing."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00138-020-01060-x"},{"key":"e_1_3_2_1_14_1","volume-title":"Unsupervised multimodal representation learning across medical images and reports. arXiv preprint arXiv:1811.08615","author":"Harry Hsu Tzu-Ming","year":"2018","unstructured":"Tzu-Ming Harry Hsu, Wei-Hung Weng, Willie Boag, Matthew McDermott, and Peter Szolovits. 2018. Unsupervised multimodal representation learning across medical images and reports. arXiv preprint arXiv:1811.08615 (2018)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301590"},{"key":"e_1_3_2_1_16_1","volume-title":"MIMIC-CXR: A large publicly available database of labeled chest radiographs. arXiv preprint arXiv:1901.07042 1, 2","author":"Johnson Alistair EW","year":"2019","unstructured":"Alistair EW Johnson, Tom J Pollard, Seth Berkowitz, Nathaniel R Greenbaum, Matthew P Lungren, Chih-ying Deng, Roger G Mark, and Steven Horng. 2019. MIMIC-CXR: A large publicly available database of labeled chest radiographs. arXiv preprint arXiv:1901.07042 1, 2 (2019)."},{"key":"e_1_3_2_1_17_1","unstructured":"Hassan Kan\u00e9 Yusuf Kocyigit Pelkins Ajanoh Ali Abdalla and Mohamed Coulibali. 2019. Towards Neural Similarity Evaluator. (2019)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2017.2788044"},{"key":"e_1_3_2_1_19_1","volume-title":"Deep learning for medical image segmentation. arXiv preprint arXiv:1505.02000","author":"Lai Matthew","year":"2015","unstructured":"Matthew Lai. 2015. Deep learning for medical image segmentation. arXiv preprint arXiv:1505.02000 (2015)."},{"key":"e_1_3_2_1_20_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81."},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the 4th Machine Learning for Healthcare Conference (Proceedings of Machine Learning Research","volume":"269","author":"Liu Guanxiong","year":"2019","unstructured":"Guanxiong Liu, Tzu-Ming Harry Hsu, Matthew McDermott, Willie Boag, Wei-Hung Weng, Peter Szolovits, and Marzyeh Ghassemi. 2019. Clinically Accurate Chest X-Ray Report Generation. In Proceedings of the 4th Machine Learning for Healthcare Conference (Proceedings of Machine Learning Research, Vol. 106). PMLR, Ann Arbor, Michigan, 249--269. http:\/\/proceedings.mlr.press\/v106\/liu19a.html"},{"volume-title":"Proceedings of the second conference on machine translation. 589--597","year":"2017","key":"e_1_3_2_1_22_1","unstructured":"Chi-kiu Lo. 2017. MEANT 2.0: Accurate semantic MT evaluation for any output language. In Proceedings of the second conference on machine translation. 589--597."},{"volume-title":"Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1). 507--513","year":"2019","key":"e_1_3_2_1_23_1","unstructured":"Chi-kiu Lo. 2019. YiSi-A unified semantic MT quality evaluation and estimation metric for languages with different levels of available resources. In Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1). 507--513."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-4768"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of LREC","author":"Marelli M.","year":"2014","unstructured":"M. Marelli, S. Menini, M. Baroni, L. Bentivogli, R. Bernardi, and R. Zamparelli. 2014. A SICK cure for the evaluation of compositional distributional semantic models. In Proceedings of LREC 2014. 216--223."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-00928-1_51"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_28_1","volume-title":"Chexnet: Radiologist-level pneumonia detection on chest x-rays with deep learning. arXiv preprint arXiv:1711.05225","author":"Rajpurkar Pranav","year":"2017","unstructured":"Pranav Rajpurkar, Jeremy Irvin, Kaylie Zhu, Brandon Yang, Hershel Mehta, Tony Duan, Daisy Ding, Aarti Bagul, Curtis Langlotz, Katie Shpanskaya, et al. 2017. Chexnet: Radiologist-level pneumonia detection on chest x-rays with deep learning. arXiv preprint arXiv:1711.05225 (2017)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1162\/coli_a_00322"},{"key":"e_1_3_2_1_30_1","volume-title":"Proceedings of the 5th Machine Learning for Healthcare Conference (Proceedings of Machine Learning Research","author":"Saleh Shems","year":"2020","unstructured":"Shems Saleh, William Boag, Lauren Erdman, and Tristan Naumann. 2020. Clinical Collabsheets: 53 Questions to Guide a Clinical Collaboration. In Proceedings of the 5th Machine Learning for Healthcare Conference (Proceedings of Machine Learning Research, Vol. 126). PMLR."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-6456"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.274"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3354"},{"key":"e_1_3_2_1_34_1","volume-title":"Bleu is not suitable for the evaluation of text simplification. arXiv preprint arXiv:1810.05995","author":"Sulem Elior","year":"2018","unstructured":"Elior Sulem, Omri Abend, and Ari Rappoport. 2018. Bleu is not suitable for the evaluation of text simplification. arXiv preprint arXiv:1810.05995 (2018)."},{"key":"e_1_3_2_1_35_1","unstructured":"Richard H. Thaler and howpublished = Penguin Books year = 2009 isbn = 9780143115267 Cass R. Sunstein title = Nudge: Improving Decisions About Health Wealth and Happiness Revised Expanded Edition. [n.d.]."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1148\/radiol.2019182716"},{"key":"e_1_3_2_1_38_1","volume-title":"Andrew L Beam, and Isaac S Kohane","author":"Yu Kun-Hsing","year":"2018","unstructured":"Kun-Hsing Yu, Andrew L Beam, and Isaac S Kohane. 2018. Artificial intelligence in healthcare. Nature biomedical engineering 2, 10 (2018), 719--731."},{"key":"e_1_3_2_1_39_1","volume-title":"Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675","author":"Zhang Tianyi","year":"2019","unstructured":"Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian Q Weinberger, and Yoav Artzi. 2019. Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675 (2019)."}],"event":{"name":"FAccT '21: 2021 ACM Conference on Fairness, Accountability, and Transparency","sponsor":["ACM Association for Computing Machinery"],"location":"Virtual Event Canada","acronym":"FAccT '21"},"container-title":["Proceedings of the 2021 ACM Conference on Fairness, Accountability, and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3442188.3445909","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3442188.3445909","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3442188.3445909","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:48:57Z","timestamp":1750193337000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3442188.3445909"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,3]]},"references-count":39,"alternative-id":["10.1145\/3442188.3445909","10.1145\/3442188"],"URL":"https:\/\/doi.org\/10.1145\/3442188.3445909","relation":{},"subject":[],"published":{"date-parts":[[2021,3]]},"assertion":[{"value":"2021-03-01","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}