{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,16]],"date-time":"2026-02-16T15:39:49Z","timestamp":1771256389154,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":36,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,13]],"date-time":"2024-12-13T00:00:00Z","timestamp":1734048000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,13]]},"DOI":"10.1145\/3711542.3711580","type":"proceedings-article","created":{"date-parts":[[2025,4,14]],"date-time":"2025-04-14T04:46:56Z","timestamp":1744606016000},"page":"18-23","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Evidence Extraction for Automated Medical Coding: Preliminary Evaluation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4255-5445","authenticated-orcid":false,"given":"Xiaorui","family":"Jiang","sequence":"first","affiliation":[{"name":"Information School, The University of Sheffield, Sheffield, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0588-1974","authenticated-orcid":false,"given":"Kulsoom","family":"Khan","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Institute of Business Administration, Karachi, Pakistan"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1935-5552","authenticated-orcid":false,"given":"Sumithra Thinakara","family":"Vasantha","sequence":"additional","affiliation":[{"name":"Coventry University, Coventry, United Kingdom"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4458-1594","authenticated-orcid":false,"given":"Sajjad","family":"Haider","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Institute of Business Administration, Karachi, Pakistan"}]}],"member":"320","published-online":{"date-parts":[[2025,4,13]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.130"},{"key":"e_1_3_3_1_3_2","volume-title":"Language Models are Unsupervised Multitask Learners","year":"2019","unstructured":"Alec Radford and Jeffrey Wu and Rewon Child and David Luan and Dario Amodei and Ilya Sutskever 2019. Language Models are Unsupervised Multitask Learners. Retrieved Feb 16, 2025 from https:\/\/cdn.openai.com\/better-language-models\/language_models_are_unsupervised_multitask_learners.pdf"},{"key":"e_1_3_3_1_4_2","series-title":"(CHIL \u201920 Workshop)","volume-title":"Proceedings of the Workshop of the 2020 ACM Conference on Health, Inference, and Learning","author":"Altosaar Kexin Huang\u00a0Jaan","year":"2020","unstructured":"Kexin Huang\u00a0Jaan Altosaar and Rajesh Ranganath. 2020. ClinicalBERT: Modeling Clinical Notes and Predicting Hospital Readmission. In Proceedings of the Workshop of the 2020 ACM Conference on Health, Inference, and Learning(CHIL \u201920 Workshop). https:\/\/arxiv.org\/abs\/1904.05342"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Aitziber Atutxa Arantza\u00a0D\u00edaz de Ilarraza Koldo Gojenola Maite Oronoz and Olatz\u00a0Perez de Vi\u00f1aspre. 2019. Interpretable deep learning to map diagnostic texts to ICD-10 codes. Int. J. Med. Inform. 129 (Sept. 2019) 49\u201359. https:\/\/doi.org\/10.1016\/j.ijmedinf.2019.05.015","DOI":"10.1016\/j.ijmedinf.2019.05.015"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1371"},{"key":"e_1_3_3_1_7_2","series-title":"(DG4H \u201923 @ NeurIPS \u201923)","volume-title":"Proceedings of the Workshop on Deep Generative Models for Health at the the 36th International Conference on Neural Information Processing Systems","author":"Boyle Joseph","year":"2019","unstructured":"Joseph Boyle, Antanas Kascenas, Pat Lok, Maria Liakata, and Alison O\u2019Neil. 2019. SciBERT: A Pretrained Language Model for Scientific Text. In Proceedings of the Workshop on Deep Generative Models for Health at the the 36th International Conference on Neural Information Processing Systems(DG4H \u201923 @ NeurIPS \u201923). https:\/\/openreview.net\/forum?id=mqnR8rGWkn"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1145\/3529372.3530922"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.acl-long.416"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"crossref","unstructured":"Hang Dong Mat\u00fa\u0161 Falis William Whiteley Beatrice Alex Joshua Matterson Shaoxiong Ji Jiaoyan Chen and Honghan Wu. 2022. Automated clinical coding: what why and where we are? npj Digit. Med. 5 Article number\u00a0159 (Oct. 2022) 8\u00a0pages. https:\/\/doi.org\/10.1038\/s41746-022-00705-7","DOI":"10.1038\/s41746-022-00705-7"},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Jingcheng Du Qingyu Chen Yifan Peng Yang Xiang Cui Tao and Zhiyong Lu. 2019. ML-Net: multi-label classification of biomedical texts with deep neural networks. J. Am. Med. Inform. Assn. 26 11 (Nov. 2019) 1279\u20131285. https:\/\/doi.org\/10.1093\/jamia\/ocz085","DOI":"10.1093\/jamia\/ocz085"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Andres Duque Hermenegildo Fabregat Lourdes Araujo and Juan Martinez-Romo. 2021. A keyphrase-based approach for interpretable ICD-10 code classification of Spanish medical reports. Artif. Intell. Med. 121 Article 102177 (Nov. 2021). https:\/\/doi.org\/10.1016\/j.artmed.2021.102177","DOI":"10.1016\/j.artmed.2021.102177"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"crossref","unstructured":"Mat\u00fa\u0161 Falis Aryo\u00a0Pradipta Gema Hang Dong Luke Daines Siddharth Basetti Michael Holder Rose\u00a0S Penfold Alexandra Birch and Beatrice Alex. 2024. Can GPT-3.5 generate and code discharge summaries? J. Am. Med. Inform. Assn. 31 10 (Nov. 2024) 2284\u20132293. https:\/\/doi.org\/10.1093\/jamia\/ocae132","DOI":"10.1093\/jamia\/ocae132"},{"key":"e_1_3_3_1_14_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.clinicalnlp-1.2"},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Shaoxiong Ji Matti H\u00f6ltt\u00e4 and Pekka Marttinen. 2021. Does the magic of BERT apply to medical code assignment? A quantitative study. Comput. Biol. Med. 139 Article 104998 (Dec. 2021) 7\u00a0pages. https:\/\/doi.org\/10.1016\/j.compbiomed.2021.104998","DOI":"10.1016\/j.compbiomed.2021.104998"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Shaoxiong Ji Xiaobo Li Wei Sun Hang Dong Ara Taalas Yijia Zhang Honghan Wu Esa Pitk\u00e4nen and Pekka Marttinen. 2024. A Unified Review of Deep Learning for Automated Medical Coding. ACM Comput. Sur. 56 2 Article No.\u00a036 (Oct. 2024) 41\u00a0pages. https:\/\/doi.org\/10.1145\/3664615","DOI":"10.1145\/3664615"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1349"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"crossref","unstructured":"Alistair\u00a0E.W. Johnson Tom\u00a0J. Pollard Lu Shen Li wei H.\u00a0Lehman Mengling Feng Mohammad Ghassemi Benjamin Moody Peter Szolovits Leo\u00a0Anthony Celi and Roger\u00a0G. Mark. 2015. MIMIC-III a freely accessible critical care database. Sci. Data 3 Article number\u00a0160035 (May 2015) 8\u00a0pages. https:\/\/doi.org\/10.1038\/sdata.2016.35","DOI":"10.1038\/sdata.2016.35"},{"key":"e_1_3_3_1_19_2","series-title":"(ML4H \u201921)","first-page":"196","volume-title":"Proceedings of the 6th Machine Learning for Healthcare Conference","author":"Kim Byung-Hak","year":"2021","unstructured":"Byung-Hak Kim and Varun Ganapathi. 2021. Read, Attend, and Code: Pushing the Limits of Medical Codes Prediction from Clinical Notes by Machines. In Proceedings of the 6th Machine Learning for Healthcare Conference(ML4H \u201921). PMLR, 196\u2013208. https:\/\/proceedings.mlr.press\/v149\/kim21a.html"},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3477314.3506983"},{"key":"e_1_3_3_1_21_2","series-title":"(ICML \u201915)","first-page":"957","volume-title":"Proceedings of the 32nd International Conference on Machine Learning","author":"Kusner Matt\u00a0J.","year":"2015","unstructured":"Matt\u00a0J. Kusner, Yu Sun, Nicholas\u00a0I. Kolkin, and Kilian Q.Weinberger. 2015. From Word Embeddings to Document Distances. In Proceedings of the 32nd International Conference on Machine Learning(ICML \u201915). PMLR, 957\u2013966. https:\/\/proceedings.mlr.press\/v37\/kusnerb15.html"},{"key":"e_1_3_3_1_22_2","unstructured":"Simon\u00a0A. Lee and Timothy Lindsey. 2024. Can Large Language Models abstract Medical Coded Language. arxiv:https:\/\/arXiv.org\/abs\/2403.10822\u00a0[cs.CL]"},{"key":"e_1_3_3_1_23_2","unstructured":"Rumeng Li Xun Wang and Hong Yu. 2024. Exploring LLM Multi-Agents for ICD Coding. arxiv:https:\/\/arXiv.org\/abs\/2406.15363\u00a0[cs.CL]"},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"crossref","unstructured":"Tsung-Yi Lin Priya Goyal Ross Girshick Kaiming He and Piotr Doll\u00e1r. 2018. Focal Loss for Dense Object Detection. IEEE T. Pattern Anal. 42 2 (Feb. 2018) 318\u2013327. https:\/\/doi.org\/10.1109\/tpami.2018.2858826","DOI":"10.1109\/TPAMI.2018.2858826"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Leibo Liu Oscar Perez-Concha Anthony Nguyen Vicki Bennett and Louisa Jorm. 2023. Automated ICD coding using extreme multi-label long text transformer-based models. Artif. Intell. Med. 144 Article 104323 (Oct. 2023) 10\u00a0pages. https:\/\/doi.org\/10.1016\/j.jbi.2023.104323","DOI":"10.1016\/j.artmed.2023.102662"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"Guillermo L\u00f3pez-Garc\u00eda Jos\u00e9\u00a0M. Jerez Nuria Ribelles Emilio Alba and Francisco\u00a0J. Veredas. 2023. Explainable clinical coding with in-domain adapted transformers. J. Biomed. Inform. 139 Article 102662 (March 2023) 10\u00a0pages. https:\/\/doi.org\/10.1016\/j.jbi.2023.104323","DOI":"10.1016\/j.jbi.2023.104323"},{"key":"e_1_3_3_1_27_2","series-title":"(IberLEF \u201920)","first-page":"303","volume-title":"Proceedings of the Iberian Languages Evaluation Forum","author":"Miranda-Escalada Antonio","year":"2020","unstructured":"Antonio Miranda-Escalada, Eul\u00e0lia Farr\u00e9a, and Martin Krallinger. 2020. Named Entity Recognition, Concept Normalization and Clinical Coding: Overview of the Cantemist Track for Cancer Text Mining in Spanish, Corpus, Guidelines, Methods and Results. In Proceedings of the Iberian Languages Evaluation Forum(IberLEF \u201920). CEUR, 303\u2013323. https:\/\/ceur-ws.org\/Vol-2664\/capitel_overview.pdf"},{"key":"e_1_3_3_1_28_2","series-title":"(CLEF \u201920)","volume-title":"Proceedings of the CLEF eHealth Evaluation Lab 2020","author":"Miranda-Escalada Antonio","year":"2020","unstructured":"Antonio Miranda-Escalada, Aitor Gonzalez-Agirre, Jordi Armengol-Estap\u00e9, and Martin Krallinger. 2020. Overview of automatic clinical coding: annotations, guidelines, and solutions for non-English clinical cases at CodiEsp track of eHealth CLEF 2020. In Proceedings of the CLEF eHealth Evaluation Lab 2020(CLEF \u201920). CEUR. https:\/\/ceur-ws.org\/Vol-2664\/capitel_overview.pdf"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"crossref","unstructured":"Laila Rasmy Yang Xiang Ziqian Xie Cui Tao and Degui Zhi. 2021. Med-BERT: pretrained contextualized embeddings on large-scale structured electronic health records for disease prediction. NPJ Digit. Med. 4 Article 86 (May 2021). https:\/\/doi.org\/10.1038\/s41746-021-00455-y","DOI":"10.1038\/s41746-021-00455-y"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Karan Singhal Shekoofeh Azizi Tao Tu S.\u00a0Sara Mahdavi Jason Wei Hyung\u00a0Won Chung Nathan Scales Ajay Tanwani Heather Cole-Lewis Stephen Pfohl Perry Payne Martin Seneviratne Paul Gamble Chris Kelly Abubakr Babiker Nathanael Sch\u00e4rli Aakanksha Chowdhery Philip Mansfield Dina Demner-Fushman Blaise\u00a0Ag\u00fcera y Arcas Dale Webster Greg\u00a0S. Corrado Yossi Matias Katherine Chou Juraj Gottweis Nenad Tomasev Yun Liu Alvin Rajkomar Joelle Barral Christopher Semturs Alan Karthikesalingam and Vivek Natarajan. 2023. Large language models encode clinical knowledge. Nature 620 (Aug. 2023) 172\u2013180. https:\/\/doi.org\/10.1038\/s41586-023-06291-2","DOI":"10.1038\/s41586-023-06291-2"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"crossref","unstructured":"Ali Soroush Benjamin\u00a0S. Glicksberg Eyal Zimlichman Yiftach Barash Robert Freeman Alexander\u00a0W. Charney Girish\u00a0N Nadkarni and Eyal Klang. 2024. Large Language Models Are Poor Medical Coders \u2014 Benchmarking of Medical Code Querying. NEJM AI 1 5 (April 2024). https:\/\/doi.org\/10.1056\/AIdbp2300040","DOI":"10.1056\/AIdbp2300040"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"crossref","unstructured":"Kaushik\u00a0P. Venkatesh Marium\u00a0M. Raza and Joseph\u00a0C. Kvedar. 2023. Automating the overburdened clinical coding system: challenges and next steps. npj Digit. Med. 6 Article number\u00a016 (Feb. 2023) 2\u00a0pages. https:\/\/doi.org\/10.1038\/s41746-023-00768-0","DOI":"10.1038\/s41746-023-00768-0"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/461"},{"key":"e_1_3_3_1_34_2","doi-asserted-by":"crossref","unstructured":"Guangyu Wang Xiaohong Liu Zhen Ying Guoxing Yang Zhiwei Chen Zhiwen Liu Min Zhang Hongmei Yan Yuxing Lu Yuanxu Gao Kanmin Xue Xiaoying Li and Ying Chen. 2023. Optimized glycemic control of type 2 diabetes with reinforcement learning: a proof-of-concept trial. Nat. Med. 29 (Sept. 2023) 2633\u20132642. https:\/\/doi.org\/10.1038\/s41591-023-02552-9","DOI":"10.1038\/s41591-023-02552-9"},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"crossref","unstructured":"Yuzhou Wu Zhigang Chen Xin Yao Xuechen Chen Zeren Zhou and Jinkai Xue. 2022. JAN: Joint Attention Networks for Automatic ICD Coding. IEEE J. Biomed. Health 26 10 (July 2022) 5235\u20135246. https:\/\/doi.org\/10.1109\/JBHI.2022.3189404","DOI":"10.1109\/JBHI.2022.3189404"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1098"},{"key":"e_1_3_3_1_37_2","series-title":"(ML4H \u201923)","volume-title":"Proceedings of of the 2023 Machine Learning for Health symposium","author":"Yang Zhichao","year":"2023","unstructured":"Zhichao Yang, Sanjit\u00a0Singh Batra, Joel Stremmel, and Eran Halperin. 2023. Surpassing GPT-4 Medical Coding with a Two-Stage Approach. In Proceedings of of the 2023 Machine Learning for Health symposium(ML4H \u201923). 19\u00a0pages. https:\/\/arxiv.org\/abs\/2311.13735"}],"event":{"name":"NLPIR 2024: 2024 8th International Conference on Natural Language Processing and Information Retrieval","location":"Okayama Japan","acronym":"NLPIR 2024"},"container-title":["Proceedings of the 2024 8th International Conference on Natural Language Processing and Information Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711542.3711580","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711542.3711580","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:29Z","timestamp":1750295909000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711542.3711580"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,13]]},"references-count":36,"alternative-id":["10.1145\/3711542.3711580","10.1145\/3711542"],"URL":"https:\/\/doi.org\/10.1145\/3711542.3711580","relation":{},"subject":[],"published":{"date-parts":[[2024,12,13]]},"assertion":[{"value":"2025-04-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}