{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T12:08:28Z","timestamp":1779970108224,"version":"3.53.1"},"reference-count":58,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,12,1]],"date-time":"2026-12-01T00:00:00Z","timestamp":1796083200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100013064","name":"Key Research and Development Program of Jiangxi Province","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100013064","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004479","name":"Jiangxi Provincial Natural Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100004479","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Displays"],"published-print":{"date-parts":[[2026,12]]},"DOI":"10.1016\/j.displa.2026.103550","type":"journal-article","created":{"date-parts":[[2026,5,25]],"date-time":"2026-05-25T16:06:36Z","timestamp":1779725196000},"page":"103550","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Towards clinician-like reasoning: Stage-aware gaze-guided heterogeneous network for medical image recognition"],"prefix":"10.1016","volume":"95","author":[{"given":"Boyang","family":"Liu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Guangli","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yuxing","family":"Zou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ruiyang","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xinjiong","family":"Zhou","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8375-0039","authenticated-orcid":false,"given":"Hongbin","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jingqin","family":"Lv","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Gongning","family":"Luo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Donghong","family":"Ji","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.displa.2026.103550_b1","article-title":"Recent advances and clinical applications of deep learning in medical image analysis","volume":"79","author":"Chen","year":"2021","journal-title":"Med. Image Anal."},{"key":"10.1016\/j.displa.2026.103550_b2","series-title":"2025 IEEE 22nd International Multi-Conference on Systems, Signals & Devices","first-page":"891","article-title":"AttCDCNet: Attention-enhanced chest disease classification using X-Ray images","author":"Khater","year":"2024"},{"key":"10.1016\/j.displa.2026.103550_b3","doi-asserted-by":"crossref","first-page":"2347","DOI":"10.1007\/s13042-023-02034-x","article-title":"BCT-OFD: bridging CNN and transformer via online feature distillation for COVID-19 image recognition","volume":"15","author":"Zhang","year":"2023","journal-title":"Int. J. Mach. Learn. Cybern."},{"key":"10.1016\/j.displa.2026.103550_b4","doi-asserted-by":"crossref","first-page":"31939","DOI":"10.1007\/s11042-024-20439-w","article-title":"Automated lesion detection in gastrointestinal endoscopic images: leveraging deep belief networks and genetic algorithm-based segmentation","volume":"84","author":"Alhajlah","year":"2024","journal-title":"Multim. Tools Appl."},{"key":"10.1016\/j.displa.2026.103550_b5","doi-asserted-by":"crossref","DOI":"10.1016\/j.compbiomed.2023.106683","article-title":"Deep learning based classification of multi-label chest X-ray images via dual-weighted metric loss","volume":"157","author":"Jin","year":"2023","journal-title":"Comput. Biol. Med."},{"key":"10.1016\/j.displa.2026.103550_b6","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2023.105937","article-title":"Transdd: A transformer-based dual-path decoder for improving the performance of thoracic diseases classification using chest X-ray","volume":"91","author":"Jiang","year":"2024","journal-title":"Biomed. Signal Process. Control."},{"key":"10.1016\/j.displa.2026.103550_b7","series-title":"Computer-aided diagnosis of thoracic diseases in chest X-rays using hybrid CNN-transformer architecture","author":"Singh","year":"2024"},{"key":"10.1016\/j.displa.2026.103550_b8","series-title":"2016 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"770","article-title":"Deep residual learning for image recognition","author":"He","year":"2015"},{"key":"10.1016\/j.displa.2026.103550_b9","series-title":"Inception-v4, inception-ResNet and the impact of residual connections on learning","author":"Szegedy","year":"2016"},{"key":"10.1016\/j.displa.2026.103550_b10","series-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"7277","article-title":"MPViT: Multi-path vision transformer for dense prediction","author":"Lee","year":"2021"},{"key":"10.1016\/j.displa.2026.103550_b11","series-title":"TransFuse: Fusing transformers and CNNs for medical image segmentation","author":"Zhang","year":"2021"},{"key":"10.1016\/j.displa.2026.103550_b12","doi-asserted-by":"crossref","DOI":"10.3390\/diagnostics11081384","article-title":"TransMed: Transformers advance multi-modal medical image classification","volume":"11","author":"Dai","year":"2021","journal-title":"Diagnostics"},{"key":"10.1016\/j.displa.2026.103550_b13","series-title":"A hybrid fully convolutional CNN-transformer model for inherently interpretable medical image classification","author":"Djoumessi","year":"2025"},{"key":"10.1016\/j.displa.2026.103550_b14","article-title":"REFLACX, a dataset of reports and eye-tracking data for localization of abnormalities in chest x-rays","volume":"9","author":"Lanfredi","year":"2021","journal-title":"Sci. Data"},{"key":"10.1016\/j.displa.2026.103550_b15","article-title":"GlanceSeg: Real-time microaneurysm lesion segmentation with gaze-map-guided foundation model for early detection of diabetic retinopathy","volume":"PP","author":"Jiang","year":"2023","journal-title":"IEEE J. Biomed. Health Inform."},{"key":"10.1016\/j.displa.2026.103550_b16","doi-asserted-by":"crossref","first-page":"1688","DOI":"10.1109\/TMI.2022.3146973","article-title":"Follow my eye: Using gaze to supervise computer-aided diagnosis","volume":"41","author":"Wang","year":"2022","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.displa.2026.103550_b17","series-title":"Leveraging human selective attention for medical image analysis with limited training data","author":"Huang","year":"2021"},{"key":"10.1016\/j.displa.2026.103550_b18","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","article-title":"Mammo-net: Integrating gaze supervision and interactive information in multi-view mammogram classification","author":"Ji","year":"2023"},{"key":"10.1016\/j.displa.2026.103550_b19","doi-asserted-by":"crossref","first-page":"3121","DOI":"10.1007\/s40747-021-00474-y","article-title":"A federated approach for detecting the chest diseases using DenseNet for multi-label classification","volume":"8","author":"Priya","year":"2021","journal-title":"Complex Intell. Syst."},{"key":"10.1016\/j.displa.2026.103550_b20","series-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"2261","article-title":"Densely connected convolutional networks","author":"Huang","year":"2016"},{"key":"10.1016\/j.displa.2026.103550_b21","doi-asserted-by":"crossref","first-page":"2455","DOI":"10.1109\/TCSVT.2021.3079900","article-title":"Multi-label chest X-Ray image classification via semantic similarity graph embedding","volume":"32","author":"Chen","year":"2022","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.displa.2026.103550_b22","doi-asserted-by":"crossref","DOI":"10.1186\/s12911-025-02966-0","article-title":"A novel network-level fused deep learning architecture with shallow neural network classifier for gastrointestinal cancer classification from wireless capsule endoscopy images","volume":"25","author":"Khan","year":"2025","journal-title":"BMC Med. Inform. Decis. Mak."},{"key":"10.1016\/j.displa.2026.103550_b23","doi-asserted-by":"crossref","DOI":"10.1007\/s13755-024-00290-x","article-title":"Spatial-attention ConvMixer architecture for classification and detection of gastrointestinal diseases using the kvasir dataset","volume":"12","author":"Demirba\u015f","year":"2024","journal-title":"Health Inf. Sci. Syst."},{"key":"10.1016\/j.displa.2026.103550_b24","doi-asserted-by":"crossref","first-page":"4452","DOI":"10.1109\/TCSVT.2023.3277462","article-title":"Vision transformer with hybrid shifted windows for gastrointestinal endoscopy image classification","volume":"33","author":"Wang","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.displa.2026.103550_b25","doi-asserted-by":"crossref","first-page":"3384","DOI":"10.1109\/TMI.2023.3287572","article-title":"Eye-gaze-guided vision transformer for rectifying shortcut learning","volume":"42","author":"Ma","year":"2022","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.displa.2026.103550_b26","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2025.108047","article-title":"DOKD-MFR: Integrating dual online knowledge distillation with multi-scale feature refinement for pneumonia image recognition","volume":"110","author":"Li","year":"2025","journal-title":"Biomed. Signal Process. Control."},{"key":"10.1016\/j.displa.2026.103550_b27","doi-asserted-by":"crossref","DOI":"10.3389\/fonc.2025.1626785","article-title":"Explainable multi-view transformer framework with mutual learning for precision breast cancer pathology image classification","volume":"15","author":"Byeon","year":"2025","journal-title":"Front. Oncol."},{"key":"10.1016\/j.displa.2026.103550_b28","doi-asserted-by":"crossref","DOI":"10.1002\/mp.17977","article-title":"Joint enhancement of automatic chest x-ray diagnosis and radiological gaze prediction with multistage cooperative learning","volume":"52","author":"Qiu","year":"2025","journal-title":"Med. Phys."},{"key":"10.1016\/j.displa.2026.103550_b29","series-title":"International Conference on Medical Image Computing and Computer-Assisted Intervention","article-title":"Enhancing human-computer interaction in chest X-ray analysis using vision and language model with eye gaze patterns","author":"Kim","year":"2024"},{"key":"10.1016\/j.displa.2026.103550_b30","series-title":"2024 IEEE\/CVF Winter Conference on Applications of Computer Vision","first-page":"2183","article-title":"GazeGNN: A gaze-guided graph neural network for chest X-ray classification","author":"Wang","year":"2023"},{"key":"10.1016\/j.displa.2026.103550_b31","series-title":"Eye tracking guided deep multiple instance learning with dual cross-attention for fundus disease detection","author":"Jiang","year":"2023"},{"key":"10.1016\/j.displa.2026.103550_b32","series-title":"2016 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3485","article-title":"ProNet: Learning to propose object-specific boxes for cascaded neural networks","author":"Sun","year":"2015"},{"key":"10.1016\/j.displa.2026.103550_b33","series-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"936","article-title":"Feature pyramid networks for object detection","author":"Lin","year":"2016"},{"key":"10.1016\/j.displa.2026.103550_b34","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","article-title":"DeepLab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs","volume":"40","author":"Chen","year":"2016","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.displa.2026.103550_b35","series-title":"2009 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"248","article-title":"ImageNet: A large-scale hierarchical image database","author":"Deng","year":"2009"},{"key":"10.1016\/j.displa.2026.103550_b36","series-title":"2021 IEEE\/CVF International Conference on Computer Vision","first-page":"9992","article-title":"Swin transformer: Hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"key":"10.1016\/j.displa.2026.103550_b37","series-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"11966","article-title":"A ConvNet for the 2020s","author":"Liu","year":"2022"},{"key":"10.1016\/j.displa.2026.103550_b38","series-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020"},{"key":"10.1016\/j.displa.2026.103550_b39","series-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"3462","article-title":"Chestx-Ray8: Hospital-scale chest X-Ray database and benchmarks on weakly-supervised classification and localization of common thorax diseases","author":"Wang","year":"2017"},{"key":"10.1016\/j.displa.2026.103550_b40","doi-asserted-by":"crossref","first-page":"104319","DOI":"10.1016\/j.compbiomed.2021.104319","article-title":"Exploring the effect of image enhancement techniques on COVID-19 detection using chest X-ray images","volume":"132","author":"Rahman","year":"2020","journal-title":"Comput. Biol. Med."},{"key":"10.1016\/j.displa.2026.103550_b41","doi-asserted-by":"crossref","DOI":"10.7717\/peerj-cs.2780","article-title":"Enhancing colorectal polyp classification using gaze-based attention networks","volume":"11","author":"Guo","year":"2025","journal-title":"PeerJ Comput. Sci."},{"key":"10.1016\/j.displa.2026.103550_b42","doi-asserted-by":"crossref","DOI":"10.1145\/3083187.3083212","article-title":"KVASIR: A multi-class image dataset for computer aided gastrointestinal disease detection","author":"Pogorelov","year":"2017","journal-title":"Proc. 8th ACM Multimed. Syst. Conf."},{"key":"10.1016\/j.displa.2026.103550_b43","doi-asserted-by":"crossref","first-page":"1990","DOI":"10.1109\/TMI.2022.3153322","article-title":"ImageGCN: Multi-relational image graph convolutional networks for disease identification with chest X-Rays","volume":"41","author":"Mao","year":"2019","journal-title":"IEEE Trans. Med. Imaging"},{"key":"10.1016\/j.displa.2026.103550_b44","doi-asserted-by":"crossref","first-page":"132","DOI":"10.1007\/s00530-024-01321-z","article-title":"Multi-label local awareness and global co-occurrence priori learning improve chest X-ray classification","volume":"30","author":"Wang","year":"2024","journal-title":"Multim. Syst."},{"key":"10.1016\/j.displa.2026.103550_b45","article-title":"Reconstruction-based approach for chest X-ray image segmentation and enhanced multi-label chest disease classification","volume":"165","author":"Chehade","year":"2025","journal-title":"Artif. Intell. Med."},{"key":"10.1016\/j.displa.2026.103550_b46","series-title":"2025 10th International Conference on Intelligent Computing and Signal Processing","first-page":"320","article-title":"Distilling label co-occurrence for chest X-Ray image classification","author":"Ding","year":"2025"},{"key":"10.1016\/j.displa.2026.103550_b47","series-title":"2021 IEEE Winter Conference on Applications of Computer Vision","first-page":"3138","article-title":"Rotate to attend: Convolutional triplet attention module","author":"Misra","year":"2020"},{"key":"10.1016\/j.displa.2026.103550_b48","series-title":"2021 IEEE\/CVF International Conference on Computer Vision","first-page":"82","article-title":"Asymmetric loss for multi-label classification","author":"Baruch","year":"2020"},{"key":"10.1016\/j.displa.2026.103550_b49","doi-asserted-by":"crossref","first-page":"5796","DOI":"10.1002\/int.22815","article-title":"Convolutional-capsule network for gastrointestinal endoscopy image classification","volume":"37","author":"Wang","year":"2022","journal-title":"Int. J. Intell. Syst."},{"key":"10.1016\/j.displa.2026.103550_b50","doi-asserted-by":"crossref","DOI":"10.3390\/bioengineering10070809","article-title":"GIT-net: An ensemble deep learning-based GI tract classification of endoscopic images","volume":"10","author":"Gunasekaran","year":"2023","journal-title":"Bioengineering"},{"key":"10.1016\/j.displa.2026.103550_b51","series-title":"Enhanced multi-class classification of gastrointestinal endoscopic images with interpretable deep learning model","author":"Kamble","year":"2025"},{"key":"10.1016\/j.displa.2026.103550_b52","doi-asserted-by":"crossref","DOI":"10.7717\/peerj-cs.2809","article-title":"Deep ensemble learning for gastrointestinal diagnosis using endoscopic image classification","volume":"11","author":"Siddiqui","year":"2025","journal-title":"PeerJ Comput. Sci."},{"key":"10.1016\/j.displa.2026.103550_b53","series-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"7132","article-title":"Squeeze-and-excitation networks","author":"Hu","year":"2017"},{"key":"10.1016\/j.displa.2026.103550_b54","series-title":"EfficientNet: Rethinking model scaling for convolutional neural networks","author":"Tan","year":"2019"},{"key":"10.1016\/j.displa.2026.103550_b55","series-title":"2019 IEEE\/CVF International Conference on Computer Vision","first-page":"1314","article-title":"Searching for MobileNetV3","author":"Howard","year":"2019"},{"key":"10.1016\/j.displa.2026.103550_b56","doi-asserted-by":"crossref","first-page":"103792","DOI":"10.1016\/j.compbiomed.2020.103792","article-title":"Automated detection of COVID-19 cases using deep neural networks with X-ray images","volume":"121","author":"Ozturk","year":"2020","journal-title":"Comput. Biol. Med."},{"key":"10.1016\/j.displa.2026.103550_b57","doi-asserted-by":"crossref","first-page":"1311","DOI":"10.1007\/s00521-020-05017-z","article-title":"A novel content-based image retrieval approach for classification using GLCM features and texture fused LBP variants","volume":"33","author":"Garg","year":"2020","journal-title":"Neural Comput. Appl."},{"key":"10.1016\/j.displa.2026.103550_b58","doi-asserted-by":"crossref","first-page":"2698","DOI":"10.1109\/TMI.2020.3042773","article-title":"Learning hierarchical attention for weakly-supervised chest X-Ray abnormality localization and diagnosis","volume":"40","author":"Ouyang","year":"2020","journal-title":"IEEE Trans. Med. Imaging"}],"container-title":["Displays"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0141938226002131?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0141938226002131?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,28]],"date-time":"2026-05-28T11:47:36Z","timestamp":1779968856000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0141938226002131"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,12]]},"references-count":58,"alternative-id":["S0141938226002131"],"URL":"https:\/\/doi.org\/10.1016\/j.displa.2026.103550","relation":{},"ISSN":["0141-9382"],"issn-type":[{"value":"0141-9382","type":"print"}],"subject":[],"published":{"date-parts":[[2026,12]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Towards clinician-like reasoning: Stage-aware gaze-guided heterogeneous network for medical image recognition","name":"articletitle","label":"Article Title"},{"value":"Displays","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.displa.2026.103550","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"103550"}}