{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T17:24:26Z","timestamp":1777656266420,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":63,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,10,27]],"date-time":"2025-10-27T00:00:00Z","timestamp":1761523200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["OIA-1946391,2223793"],"award-info":[{"award-number":["OIA-1946391,2223793"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000002","name":"National Institutes of Health","doi-asserted-by":"publisher","award":["1R01CA277739-01"],"award-info":[{"award-number":["1R01CA277739-01"]}],"id":[{"id":"10.13039\/100000002","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755039","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T05:47:42Z","timestamp":1761371262000},"page":"257-266","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Interpreting Radiologist's Intention from Eye Movements in Chest X-ray Diagnosis"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3170-4142","authenticated-orcid":false,"given":"Trong-Thang","family":"Pham","sequence":"first","affiliation":[{"name":"University of Arkansas, Fayetteville, AR, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1449-211X","authenticated-orcid":false,"given":"Anh","family":"Nguyen","sequence":"additional","affiliation":[{"name":"University of Liverpool, Liverpool, United Kingdom"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2571-5865","authenticated-orcid":false,"given":"Zhigang","family":"Deng","sequence":"additional","affiliation":[{"name":"University of Houston, Houston, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1005-0995","authenticated-orcid":false,"given":"Carol C.","family":"Wu","sequence":"additional","affiliation":[{"name":"The University of Texas MD Anderson Cancer Center, Houston, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7280-2182","authenticated-orcid":false,"given":"Hien","family":"Nguyen","sequence":"additional","affiliation":[{"name":"University of Houston, Houston, TX, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2571-0511","authenticated-orcid":false,"given":"Ngan","family":"Le","sequence":"additional","affiliation":[{"name":"University of Arkansas, Fayetteville, AR, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00625"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00346"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0066169"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16437-8_66"},{"key":"e_1_3_2_2_5_1","volume-title":"a dataset of reports and eye-tracking data for localization of abnormalities in chest x-rays. Scientific data","author":"Lanfredi Ricardo Bigolin","year":"2022","unstructured":"Ricardo Bigolin Lanfredi, Mingyuan Zhang, William F Auffermann, Jessica Chan, Phuong-Anh T Duong, Vivek Srikumar, Trafton Drew, Joyce D Schroeder, and Tolga Tasdizen. 2022. REFLACX, a dataset of reports and eye-tracking data for localization of abnormalities in chest x-rays. Scientific data, Vol. 9, 1 (2022), 350."},{"key":"e_1_3_2_2_6_1","volume-title":"Predicting Visual Attention in Graphic Design Documents","author":"Chakraborty Souradeep","year":"2022","unstructured":"Souradeep Chakraborty and otherss. 2022. Predicting Visual Attention in Graphic Design Documents. IEEE Transactions on Multimedia (TMM) (2022)."},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_6"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00265"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01073"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02402"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02402"},{"key":"e_1_3_2_2_12_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops. 5031-5040","author":"Yupei","unstructured":"Yupei Chen et al., 2022. Characterizing Target-Absent Human Attention. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops. 5031-5040."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2018\/89"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00135"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2851672"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379337.3415825"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3125324"},{"key":"e_1_3_2_2_19_1","volume-title":"Deep residual learning for image recognition. arXiv e-prints. arXiv preprint arXiv:1512.03385","author":"He Kaiming","year":"2015","unstructured":"Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun. 2015. Deep residual learning for image recognition. arXiv e-prints. arXiv preprint arXiv:1512.03385, Vol. 10 (2015)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.3390\/make6020048"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.38"},{"key":"e_1_3_2_2_23_1","first-page":"590","volume-title":"Proceedings of the AAAI conference on artificial intelligence","volume":"33","author":"Jeremy","unstructured":"Jeremy Irvin et al., 2019. Chexpert: A large chest radiograph dataset with uncertainty labels and expert comparison. In Proceedings of the AAAI conference on artificial intelligence, Vol. 33. 590-597."},{"key":"e_1_3_2_2_24_1","volume-title":"Bruce","author":"Jia Sen","year":"2020","unstructured":"Sen Jia and Neil D. B. Bruce. 2020. EML-NET:An Expandable Multi-Layer NETwork for Saliency Prediction. Image and Vision Computing (2020)."},{"key":"e_1_3_2_2_25_1","first-page":"1","article-title":"Creation and validation of a chest X-ray dataset with eye-tracking and report dictation for AI development","volume":"8","author":"Alexandros Karargyris","year":"2021","unstructured":"Alexandros Karargyris et al., 2021. Creation and validation of a chest X-ray dataset with eye-tracking and report dictation for AI development. Scientific Data, Vol. 8, 1 (2021), 1-18.","journal-title":"Scientific Data"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW60836.2024.00034"},{"key":"e_1_3_2_2_27_1","volume-title":"Wallis","author":"K\u00fcmmerer Matthias","year":"2022","unstructured":"Matthias K\u00fcmmerer, Matthias Bethge, and Thomas S. A. Wallis. 2022. DeepGaze III: Modeling free-viewing human scanpaths with deep learning. Journal of Vision (JoV) (2022)."},{"key":"e_1_3_2_2_28_1","volume-title":"DeepGaze II: Reading fixations from deep features trained on object recognition. arXiv preprint arXiv:1610.01563","author":"K\u00fcmmerer Matthias","year":"2016","unstructured":"Matthias K\u00fcmmerer, Thomas S. A. Wallis, and Matthias Bethge. 2016. DeepGaze II: Reading fixations from deep features trained on object recognition. arXiv preprint arXiv:1610.01563 (2016)."},{"key":"e_1_3_2_2_29_1","volume-title":"Controllable Large Image Synthesis with Diffusion Models in Infinite Dimensions. In European Conference on Computer Vision. Springer, 385-401","author":"Le Minh-Quan","year":"2024","unstructured":"Minh-Quan Le, Alexandros Graikos, Srikar Yellapragada, Rajarsi Gupta, Joel Saltz, and Dimitris Samaras. 2024. Controllable Large Image Synthesis with Diffusion Models in Infinite Dimensions. In European Conference on Computer Vision. Springer, 385-401."},{"key":"e_1_3_2_2_30_1","volume-title":"The Thirteenth International Conference on Learning Representations.","author":"Le Minh-Quan","year":"2025","unstructured":"Minh-Quan Le, Gaurav Mittal, Tianjian Meng, A S M Iftekhar, Vishwas Suryanarayanan, Barun Patra, Dimitris Samaras, and Mei Chen. 2025. Hummingbird: High Fidelity Image Generation via Multimodal Context Alignment. In The Thirteenth International Conference on Learning Representations."},{"key":"e_1_3_2_2_31_1","unstructured":"Peizhao Li Junfeng He Gang Li Rachit Bhargava Shaolei Shen Nachiappan Valliappan Youwei Liang Hongxiang Gu Venky Ramachandran Golnaz Farhadi Yang Li Kai J Kohlhoff and Vidhya Navalpakkam. 2023. UniAR: Unifying Human Attention and Response Prediction on Visual Content. arXiv preprint arXiv:2312.10175 (2023)."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00865"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.106"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-87589-3_44"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.01073"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMI.2020.2995518"},{"key":"e_1_3_2_2_37_1","volume-title":"Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101","author":"Loshchilov I","year":"2017","unstructured":"I Loshchilov. 2017. Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)."},{"key":"e_1_3_2_2_38_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Sounak","unstructured":"Sounak Mondal et al., 2023. Gazeformer: Scalable, Effective and Fast Prediction of Goal-Directed Human Attention.. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_39_1","volume-title":"Sandra Costa Sousa, Chun Ouyang, Anderson Maciel, Andrew Duchowski, Joaquim Jorge, and Catarina Moreira.","author":"Neves Jos\u00e9","year":"2024","unstructured":"Jos\u00e9 Neves, Chihcheng Hsieh, Isabel Blanco Nobre, Sandra Costa Sousa, Chun Ouyang, Anderson Maciel, Andrew Duchowski, Joaquim Jorge, and Catarina Moreira. 2024. Shedding light on ai in radiology: A systematic review and taxonomy of eye gaze-driven interpretability in deep learning. European Journal of Radiology (2024), 111341."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.01581"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00108"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW60836.2024.00025"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/JBHI.2024.3422168"},{"key":"e_1_3_2_2_44_1","volume-title":"Tien-Phat Nguyen, Khoa Vo, Minh Tran, Ngoc Son Nguyen, Cuong Tran Van, Yuki Ikebe, et al.","author":"Pham Trong-Thang","year":"2025","unstructured":"Trong-Thang Pham, Akash Awasthi, Saba Khan, Esteban Duran Marti, Tien-Phat Nguyen, Khoa Vo, Minh Tran, Ngoc Son Nguyen, Cuong Tran Van, Yuki Ikebe, et al., 2025a. CT-ScanGaze: A Dataset and Baselines for 3D Volumetric Scanpath Modeling. arXiv preprint arXiv:2507.12591 (2025)."},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00767"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.artmed.2024.103054"},{"key":"e_1_3_2_2_47_1","volume-title":"FG-CXR: A Radiologist-Aligned Gaze Dataset for Enhancing Interpretability in Chest X-Ray Report Generation. ACCV","author":"Pham Trong Thang","year":"2024","unstructured":"Trong Thang Pham, Ngoc-Vuong Ho, Nhat-Tan Bui, Thinh Phan, Patel Brijesh, Donald Adjeroh, Gianfranco Doretto, Anh Nguyen, Carol C. Wu, Hien Nguyen, and Ngan Le. 2024b. FG-CXR: A Radiologist-Aligned Gaze Dataset for Enhancing Interpretability in Chest X-Ray Report Generation. ACCV (2024)."},{"key":"e_1_3_2_2_48_1","volume-title":"GazeSearch: Radiology Findings Search Benchmark. arXiv preprint arXiv:2411.05780","author":"Pham Trong Thang","year":"2024","unstructured":"Trong Thang Pham, Tien-Phat Nguyen, Yuki Ikebe, Akash Awasthi, Zhigang Deng, Carol C Wu, Hien Nguyen, and Ngan Le. 2024c. GazeSearch: Radiology Findings Search Benchmark. arXiv preprint arXiv:2411.05780 (2024)."},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3607822.3616408"},{"key":"e_1_3_2_2_50_1","volume-title":"Chexnet: Radiologist-level pneumonia detection on chest x-rays with deep learning. arXiv preprint arXiv:1711.05225","author":"Rajpurkar Pranav","year":"2017","unstructured":"Pranav Rajpurkar, Jeremy Irvin, Kaylie Zhu, Brandon Yang, Hershel Mehta, Tony Duan, Daisy Ding, Aarti Bagul, Curtis Langlotz, Katie Shpanskaya, et al., 2017. Chexnet: Radiologist-level pneumonia detection on chest x-rays with deep learning. arXiv preprint arXiv:1711.05225 (2017)."},{"key":"e_1_3_2_2_51_1","volume-title":"Lungren","author":"Smit Akshay","year":"2020","unstructured":"Akshay Smit, Saahil Jain, Pranav Rajpurkar, Anuj Pareek, Andrew Y. Ng, and Matthew P. Lungren. 2020. CheXbert: Combining Automatic Labelers and Expert Annotations for Accurate Radiology Report Labeling Using BERT. arXiv:2004.09167 [cs.CL]"},{"key":"e_1_3_2_2_52_1","volume-title":"Visual Scanpath Prediction using IOR-ROI Recurrent Mixture Density Network","author":"Sun Wanjie","year":"2019","unstructured":"Wanjie Sun, Zhenzhong Chen, and Feng Wu. 2019. Visual Scanpath Prediction using IOR-ROI Recurrent Mixture Density Network. IEEE Transactions on Pattern Analysis and Machine Intelligence (IEEE TPAMI) (2019)."},{"key":"e_1_3_2_2_53_1","volume-title":"SwinCheX: Multi-label classification on chest X-ray images with transformers. arXiv preprint arXiv:2206.04246","author":"Taslimi Sina","year":"2022","unstructured":"Sina Taslimi, Soroush Taslimi, Nima Fathi, Mohammadreza Salehi, and Mohammad Hossein Rohban. 2022. SwinCheX: Multi-label classification on chest X-ray images with transformers. arXiv preprint arXiv:2206.04246 (2022)."},{"key":"e_1_3_2_2_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00362"},{"key":"e_1_3_2_2_55_1","volume-title":"Attention is all you need. Advances in Neural Information Processing Systems","author":"Vaswani A","year":"2017","unstructured":"A Vaswani. 2017. Attention is all you need. Advances in Neural Information Processing Systems (2017)."},{"key":"e_1_3_2_2_56_1","first-page":"33536","article-title":"Multi-granularity cross-modal alignment for generalized medical visual representation learning","volume":"35","author":"Wang Fuying","year":"2022","unstructured":"Fuying Wang, Yuyin Zhou, Shujun Wang, Varut Vardhanabhuti, and Lequan Yu. 2022. Multi-granularity cross-modal alignment for generalized medical visual representation learning. Advances in Neural Information Processing Systems, Vol. 35 (2022), 33536-33549.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_57_1","unstructured":"Joy T Wu Nkechinyere N Agu Ismini Lourentzou Arjun Sharma Joseph A Paguio Jasper S Yao Edward C Dee William Mitchell Satyananda Kashyap Andrea Giovannini et al. 2021. Chest imagenome dataset for clinical reasoning. arXiv preprint arXiv:2108.00316 (2021)."},{"key":"e_1_3_2_2_58_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403172"},{"key":"e_1_3_2_2_59_1","doi-asserted-by":"publisher","DOI":"10.1145\/3233547.3233573"},{"key":"e_1_3_2_2_60_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR).","author":"Zhibo","unstructured":"Zhibo Yang et al., 2020. Predicting Goal-directed Human Attention Using Inverse Reinforcement Learning.. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)."},{"key":"e_1_3_2_2_61_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_4"},{"key":"e_1_3_2_2_62_1","volume-title":"Predicting Human Attention using Computational Attention. arXiv preprint arXiv:2303.09383v2","author":"Yang Zhibo","year":"2023","unstructured":"Zhibo Yang, Sounak Mondal, Seoyoung Ahn, Gregory Zelinsky, Minh Hoai, and Dimitris Samaras. 2023. Predicting Human Attention using Computational Attention. arXiv preprint arXiv:2303.09383v2 (2023)."},{"key":"e_1_3_2_2_63_1","volume-title":"Weakly supervised medical diagnosis and localization from multiple resolutions. arXiv preprint arXiv:1803.07703","author":"Yao Li","year":"2018","unstructured":"Li Yao, Jordan Prosky, Eric Poblenz, Ben Covington, and Kevin Lyman. 2018. Weakly supervised medical diagnosis and localization from multiple resolutions. arXiv preprint arXiv:1803.07703 (2018)."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755039","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755039","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T19:17:01Z","timestamp":1765307821000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755039"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":63,"alternative-id":["10.1145\/3746027.3755039","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755039","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}