{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T12:03:39Z","timestamp":1781006619868,"version":"3.54.1"},"reference-count":57,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100018571","name":"Specific Research Project of Guangxi for Research Bases and Talents","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100018571","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Knowledge-Based Systems"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.knosys.2026.116120","type":"journal-article","created":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T21:28:59Z","timestamp":1777930139000},"page":"116120","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":1,"special_numbering":"C","title":["GMGaze: MoE-based context-aware gaze estimation with CLIP and multiscale transformer"],"prefix":"10.1016","volume":"345","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-9660-4253","authenticated-orcid":false,"given":"Xinyuan","family":"Zhao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8640-8650","authenticated-orcid":false,"given":"Yihang","family":"Wu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3402-9576","authenticated-orcid":false,"given":"Ahmad","family":"Chaddad","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8428-3092","authenticated-orcid":false,"given":"Sarah A.","family":"Alkhodair","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Reem","family":"Kateb","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.knosys.2026.116120_b1","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.122363","article-title":"EG-net: Appearance-based eye gaze estimation using an efficient gaze network with attention mechanism","volume":"238","author":"Wu","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.knosys.2026.116120_b2","doi-asserted-by":"crossref","first-page":"293","DOI":"10.1016\/j.knosys.2016.07.038","article-title":"Appearance-based gaze estimation using deep features and random forest regression","volume":"110","author":"Wang","year":"2016","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.knosys.2026.116120_b3","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2021.107630","article-title":"Self-calibrated driver gaze estimation via gaze pattern learning","volume":"235","author":"Yuan","year":"2022","journal-title":"Knowl.-Based Syst."},{"issue":"1","key":"10.1016\/j.knosys.2026.116120_b4","article-title":"Bio-inspired vision mimetics toward next-generation collision-avoidance automation","volume":"4","author":"Xu","year":"2023","journal-title":"Innov."},{"key":"10.1016\/j.knosys.2026.116120_b5","doi-asserted-by":"crossref","unstructured":"Xucong Zhang, Yusuke Sugano, Mario Fritz, Andreas Bulling, It\u2019s written all over your face: Full-face appearance-based gaze estimation, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, 2017, pp. 51\u201360.","DOI":"10.1109\/CVPRW.2017.284"},{"key":"10.1016\/j.knosys.2026.116120_b6","first-page":"10623","article-title":"A coarse-to-fine adaptive network for appearance-based gaze estimation","volume":"vol. 34","author":"Cheng","year":"2020"},{"key":"10.1016\/j.knosys.2026.116120_b7","series-title":"2022 26th International Conference on Pattern Recognition","first-page":"3341","article-title":"Gaze estimation using transformer","author":"Cheng","year":"2022"},{"key":"10.1016\/j.knosys.2026.116120_b8","series-title":"Gazeclip: Towards enhancing gaze estimation via text guidance","author":"Wang","year":"2023"},{"key":"10.1016\/j.knosys.2026.116120_b9","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.111244","article-title":"Collaborative contrastive learning for cross-domain gaze estimation","volume":"161","author":"Xia","year":"2025","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.knosys.2026.116120_b10","first-page":"9780","article-title":"Gaze label alignment: Alleviating domain shift for gaze estimation","volume":"vol. 39","author":"Zeng","year":"2025"},{"key":"10.1016\/j.knosys.2026.116120_b11","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.125799","article-title":"Slippage-robust linear features for eye tracking","volume":"264","author":"Homavazir","year":"2025","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.knosys.2026.116120_b12","series-title":"GazeFormer-MoE: Context-aware gaze estimation via CLIP and moe transformer","author":"Zhao","year":"2026"},{"key":"10.1016\/j.knosys.2026.116120_b13","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2022.116894","article-title":"Eye gaze estimation: A survey on deep learning-based approaches","volume":"199","author":"Pathirana","year":"2022","journal-title":"Expert Syst. Appl."},{"issue":"12","key":"10.1016\/j.knosys.2026.116120_b14","doi-asserted-by":"crossref","first-page":"7509","DOI":"10.1109\/TPAMI.2024.3393571","article-title":"Appearance-based gaze estimation with deep learning: A review and benchmark","volume":"46","author":"Cheng","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.116120_b15","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.neucom.2013.11.037","article-title":"Mimicking visual searching with integrated top down cues and low-level features","volume":"133","author":"Xu","year":"2014","journal-title":"Neurocomputing"},{"key":"10.1016\/j.knosys.2026.116120_b16","doi-asserted-by":"crossref","first-page":"23","DOI":"10.1016\/j.neucom.2018.09.093","article-title":"A bio-inspired motion sensitive model and its application to estimating human gaze positions under classified driving conditions","volume":"345","author":"Xu","year":"2019","journal-title":"Neurocomputing"},{"issue":"1","key":"10.1016\/j.knosys.2026.116120_b17","doi-asserted-by":"crossref","first-page":"162","DOI":"10.1109\/TPAMI.2017.2778103","article-title":"Mpiigaze: Real-world dataset and deep appearance-based gaze estimation","volume":"41","author":"Zhang","year":"2017","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.116120_b18","first-page":"436","article-title":"Puregaze: Purifying gaze feature for generalizable gaze estimation","volume":"vol. 36","author":"Cheng","year":"2022"},{"key":"10.1016\/j.knosys.2026.116120_b19","doi-asserted-by":"crossref","unstructured":"Yiwei Bao, Yunfei Liu, Haofei Wang, Feng Lu, Generalizing gaze estimation with rotation consistency, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 4207\u20134216.","DOI":"10.1109\/CVPR52688.2022.00417"},{"key":"10.1016\/j.knosys.2026.116120_b20","unstructured":"Yunfei Liu, Ruicong Liu, Haofei Wang, Feng Lu, Generalizing gaze estimation with outlier-guided collaborative adaptation, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 3835\u20133844."},{"key":"10.1016\/j.knosys.2026.116120_b21","doi-asserted-by":"crossref","first-page":"3733","DOI":"10.1109\/TIP.2025.3575238","article-title":"\u2018Disengage AND integrate\u2019: Personalized causal network for gaze estimation","volume":"34","author":"Tian","year":"2025","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.knosys.2026.116120_b22","series-title":"ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"1","article-title":"Test time prompt tuning for domain adaptive gaze estimation","author":"Wang","year":"2025"},{"key":"10.1016\/j.knosys.2026.116120_b23","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.124425","article-title":"Deep face profiler (DeFaP): Towards explicit, non-restrained, non-invasive, facial and gaze comprehension","volume":"254","author":"Khan","year":"2024","journal-title":"Expert Syst. Appl."},{"issue":"3","key":"10.1016\/j.knosys.2026.116120_b24","doi-asserted-by":"crossref","first-page":"854","DOI":"10.1007\/s11263-023-01879-7","article-title":"In the eye of transformer: Global\u2013local correlation for egocentric gaze estimation and beyond","volume":"132","author":"Lai","year":"2024","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.knosys.2026.116120_b25","doi-asserted-by":"crossref","unstructured":"Yihua Cheng, Feng Lu, DVGaze: Dual-View Gaze Estimation, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, ICCV, 2023, pp. 20632\u201320641.","DOI":"10.1109\/ICCV51070.2023.01886"},{"key":"10.1016\/j.knosys.2026.116120_b26","series-title":"CLIP-driven dual feature enhancing network for gaze estimation","first-page":"arXiv","author":"Zhang","year":"2025"},{"key":"10.1016\/j.knosys.2026.116120_b27","series-title":"ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"1","article-title":"CR-CLIP: Image-text contrastive regression for generalized gaze estimation","author":"Zhu","year":"2025"},{"key":"10.1016\/j.knosys.2026.116120_b28","series-title":"Estimating or propagating gradients through stochastic neurons for conditional computation","author":"Bengio","year":"2013"},{"issue":"59","key":"10.1016\/j.knosys.2026.116120_b29","first-page":"1","article-title":"Domain-adversarial training of neural networks","volume":"17","author":"Ganin","year":"2016","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.knosys.2026.116120_b30","series-title":"Proceedings of the ACM Symposium on Eye Tracking Research and Applications","first-page":"255","article-title":"EYEDIAP: A database for the development and evaluation of gaze estimation algorithms from RGB and RGB-D cameras","author":"Mora","year":"2014"},{"key":"10.1016\/j.knosys.2026.116120_b31","series-title":"Proceedings of the European Conference on Computer Vision","first-page":"365","article-title":"ETH-XGaze: A large scale dataset for gaze estimation under extreme head pose and gaze variation","author":"Zhang","year":"2020"},{"key":"10.1016\/j.knosys.2026.116120_b32","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"2176","article-title":"Gaze360: Physically unconstrained gaze estimation in the wild","author":"Kellnhofer","year":"2019"},{"key":"10.1016\/j.knosys.2026.116120_b33","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops","first-page":"2299","article-title":"It\u2019s written all over your face: Full-face appearance-based gaze estimation","author":"Zhang","year":"2017"},{"key":"10.1016\/j.knosys.2026.116120_b34","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.knosys.2026.116120_b35","series-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2017"},{"key":"10.1016\/j.knosys.2026.116120_b36","series-title":"Asian Conference on Computer Vision","first-page":"309","article-title":"Appearance-based gaze estimation using dilated-convolutions","author":"Chen","year":"2018"},{"key":"10.1016\/j.knosys.2026.116120_b37","series-title":"2020 25th International Conference on Pattern Recognition","first-page":"9936","article-title":"Adaptive feature fusion network for gaze tracking in mobile tablets","author":"Bao","year":"2021"},{"key":"10.1016\/j.knosys.2026.116120_b38","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2023.106994","article-title":"Attention-guided and fine-grained feature extraction from face images for gaze estimation","volume":"126","author":"Wu","year":"2023","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.knosys.2026.116120_b39","doi-asserted-by":"crossref","unstructured":"Lin Zhang, Yi Tian, Xiyun Wang, Wanru Xu, Yi Jin, Yaping Huang, Differential contrastive training for gaze estimation, in: Proceedings of the 33rd ACM International Conference on Multimedia, 2025, pp. 3477\u20133486.","DOI":"10.1145\/3746027.3755096"},{"key":"10.1016\/j.knosys.2026.116120_b40","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110441","article-title":"Appearance debiased gaze estimation via stochastic subject-wise adversarial learning","volume":"152","author":"Kim","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.knosys.2026.116120_b41","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.110494","article-title":"Democratizing eye-tracking? Appearance-based gaze estimation with improved attention branch","volume":"149","author":"Kuric","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.knosys.2026.116120_b42","doi-asserted-by":"crossref","DOI":"10.1016\/j.cviu.2024.104105","article-title":"Joint pyramidal perceptual attention and hierarchical consistency constraint for gaze estimation","volume":"248","author":"Xia","year":"2024","journal-title":"Comput. Vis. Image Underst."},{"key":"10.1016\/j.knosys.2026.116120_b43","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2025.111536","article-title":"ADGaze: Anisotropic Gaussian label distribution learning for fine-grained gaze estimation","volume":"164","author":"Li","year":"2025","journal-title":"Pattern Recognit."},{"issue":"1","key":"10.1016\/j.knosys.2026.116120_b44","doi-asserted-by":"crossref","first-page":"27135","DOI":"10.1038\/s41598-025-12466-w","article-title":"Nonlinear multi-head cross-attention network and programmable gradient information for gaze estimation","volume":"15","author":"Li","year":"2025","journal-title":"Sci. Rep."},{"key":"10.1016\/j.knosys.2026.116120_b45","doi-asserted-by":"crossref","DOI":"10.1016\/j.displa.2024.102878","article-title":"Frequency-spatial interaction network for gaze estimation","volume":"86","author":"Jia","year":"2025","journal-title":"Displays"},{"key":"10.1016\/j.knosys.2026.116120_b46","doi-asserted-by":"crossref","DOI":"10.1109\/THMS.2025.3553404","article-title":"Slyklatent: A learning framework for gaze estimation using deep facial feature learning","author":"Adebayo","year":"2025","journal-title":"IEEE Trans. Human-Machine Syst."},{"issue":"3","key":"10.1016\/j.knosys.2026.116120_b47","first-page":"115","article-title":"GazeSymCAT: A symmetric cross-attention transformer for robust gaze estimation under extreme head poses and gaze variations","volume":"12","author":"Zhong","year":"2025","journal-title":"J. Comput. Des. Eng."},{"key":"10.1016\/j.knosys.2026.116120_b48","doi-asserted-by":"crossref","DOI":"10.1109\/TIP.2025.3546465","article-title":"Iris geometric transformation guided deep appearance-based gaze estimation","author":"Nie","year":"2025","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.knosys.2026.116120_b49","series-title":"Omnigaze: Reward-inspired generalizable gaze estimation in the wild","author":"Qu","year":"2025"},{"key":"10.1016\/j.knosys.2026.116120_b50","doi-asserted-by":"crossref","unstructured":"Yaoming Wang, Yangzhou Jiang, Jin Li, Bingbing Ni, Wenrui Dai, Chenglin Li, Hongkai Xiong, Teng Li, Contrastive regression for domain adaptation on gaze estimation, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 19376\u201319385.","DOI":"10.1109\/CVPR52688.2022.01877"},{"key":"10.1016\/j.knosys.2026.116120_b51","series-title":"Jitter does matter: Adapting gaze estimation to new domains","author":"Liu","year":"2022"},{"key":"10.1016\/j.knosys.2026.116120_b52","doi-asserted-by":"crossref","unstructured":"Isack Lee, Jun-Seok Yun, Hee Hyeon Kim, Youngju Na, Seok Bong Yoo, Latentgaze: Cross-domain gaze estimation through gaze-aware analytic latent code manipulation, in: Proceedings of the Asian Conference on Computer Vision, 2022, pp. 3379\u20133395.","DOI":"10.1007\/978-3-031-26348-4_10"},{"key":"10.1016\/j.knosys.2026.116120_b53","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.111901","article-title":"GHR-2D: Gaze and head redirection via disentanglement and diffusion for gaze estimation","volume":"160","author":"Hu","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"issue":"5","key":"10.1016\/j.knosys.2026.116120_b54","doi-asserted-by":"crossref","first-page":"3707","DOI":"10.1109\/TPAMI.2023.3348528","article-title":"Pnp-ga+: Plug-and-play domain adaptation for gaze estimation using model variants","volume":"46","author":"Liu","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.knosys.2026.116120_b55","doi-asserted-by":"crossref","unstructured":"Tobias Fischer, Hyung Jin Chang, Yiannis Demiris, Rt-gene: Real-time eye gaze estimation in natural environments, in: Proceedings of the European Conference on Computer Vision, ECCV, 2018, pp. 334\u2013352.","DOI":"10.1007\/978-3-030-01249-6_21"},{"issue":"4","key":"10.1016\/j.knosys.2026.116120_b56","doi-asserted-by":"crossref","first-page":"1224","DOI":"10.3390\/s25041224","article-title":"GazeCapsNet: A lightweight gaze estimation framework","volume":"25","author":"Muksimova","year":"2025","journal-title":"Sensors"},{"issue":"9","key":"10.1016\/j.knosys.2026.116120_b57","doi-asserted-by":"crossref","first-page":"2337","DOI":"10.1007\/s11263-022-01653-1","article-title":"Learning to prompt for vision-language models","volume":"130","author":"Zhou","year":"2022","journal-title":"Int. J. Comput. Vis."}],"container-title":["Knowledge-Based Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126008464?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0950705126008464?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,9]],"date-time":"2026-06-09T11:30:05Z","timestamp":1781004605000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0950705126008464"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":57,"alternative-id":["S0950705126008464"],"URL":"https:\/\/doi.org\/10.1016\/j.knosys.2026.116120","relation":{},"ISSN":["0950-7051"],"issn-type":[{"value":"0950-7051","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"GMGaze: MoE-based context-aware gaze estimation with CLIP and multiscale transformer","name":"articletitle","label":"Article Title"},{"value":"Knowledge-Based Systems","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.knosys.2026.116120","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"116120"}}