{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T01:10:23Z","timestamp":1755825023430,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1145\/3731715.3733261","type":"proceedings-article","created":{"date-parts":[[2025,6,25]],"date-time":"2025-06-25T18:31:04Z","timestamp":1750876264000},"page":"833-841","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["A Generic Framework for Evaluating Gaze Representations for Gaze Estimation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-1967-5883","authenticated-orcid":false,"given":"Xinyu","family":"Lin","sequence":"first","affiliation":[{"name":"Hangzhou Dianzi University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5534-7463","authenticated-orcid":false,"given":"Buyu","family":"Liu","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology (Shenzhen), Shenzhen, Guangdong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8243-1127","authenticated-orcid":false,"given":"Suguo","family":"Zhu","sequence":"additional","affiliation":[{"name":"Hangzhou Dianzi University, Hangzhou, Zhejiang, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3766-3953","authenticated-orcid":false,"given":"Jun","family":"Bao","sequence":"additional","affiliation":[{"name":"Harbin Institute of Technology (Shenzhen), Shenzhen, Guangdong, China"}]}],"member":"320","published-online":{"date-parts":[[2025,6,30]]},"reference":[{"doi-asserted-by":"crossref","unstructured":"V\u00edtor Albiero Xingyu Chen Xi Yin Guan Pang and Tal Hassner. 2021. img2pose: Face Alignment and Detection via 6DoF Face Pose Estimation. In CVPR. https:\/\/arxiv.org\/abs\/2012.07791","key":"e_1_3_2_1_1_1","DOI":"10.1109\/CVPR46437.2021.00753"},{"key":"e_1_3_2_1_2_1","volume-title":"One algorithm to rule them all? An evaluation and discussion of ten eye movement event-detection algorithms. Behavior research methods 49","author":"Andersson Richard","year":"2017","unstructured":"Richard Andersson, Linnea Larsson, Kenneth Holmqvist, Martin Stridh, and Marcus Nystr\u00f6m. 2017. One algorithm to rule them all? An evaluation and discussion of ten eye movement event-detection algorithms. Behavior research methods 49 (2017), 616--637."},{"key":"e_1_3_2_1_3_1","volume-title":"Variability and development of a normative data base for saccadic eye movements. Investigative ophthalmology & visual science 21, 1","author":"Bahill AT","year":"1981","unstructured":"AT Bahill, A Brockenbrough, and BT Troost. 1981. Variability and development of a normative data base for saccadic eye movements. Investigative ophthalmology & visual science 21, 1 (1981), 116--125."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_4_1","DOI":"10.1109\/CVPR52733.2024.00141"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_5_1","DOI":"10.1007\/978-3-030-01264-9_7"},{"key":"e_1_3_2_1_6_1","volume-title":"Appearance-based Gaze Estimation With Deep Learning: A Review and Benchmark. arXiv preprint arXiv:2104.12668","author":"Cheng Yihua","year":"2021","unstructured":"Yihua Cheng, Haofei Wang, Yiwei Bao, and Feng Lu. 2021. Appearance-based Gaze Estimation With Deep Learning: A Review and Benchmark. arXiv preprint arXiv:2104.12668 (2021)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_7_1","DOI":"10.1007\/978-3-642-00296-0"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_8_1","DOI":"10.1007\/978-3-030-01249-6_21"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_9_1","DOI":"10.1145\/2578153.2578190"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_10_1","DOI":"10.1109\/CVPRW56347.2022.00548"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_11_1","DOI":"10.1109\/ICCVW.2019.00144"},{"doi-asserted-by":"crossref","unstructured":"Dan Witzner Hansen and Qiang Ji. 2009. In the eye of the beholder: A survey of models for eyes and gaze. IEEE transactions on pattern analysis and machine intelligence 32 3 (2009) 478--500.","key":"e_1_3_2_1_12_1","DOI":"10.1109\/TPAMI.2009.30"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_13_1","DOI":"10.1016\/j.procs.2014.08.194"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_14_1","DOI":"10.1109\/ICCVW.2019.00146"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_15_1","DOI":"10.1109\/CVPR42600.2020.00975"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_16_1","DOI":"10.1109\/CVPR.2016.90"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_17_1","DOI":"10.1109\/ICME55011.2023.00409"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_18_1","DOI":"10.1109\/CVPR.2017.243"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_19_1","DOI":"10.1109\/WACV48630.2021.00006"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_20_1","DOI":"10.1109\/ICCV.2019.00701"},{"key":"e_1_3_2_1_21_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_22_1","DOI":"10.1109\/CVPR.2016.239"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_23_1","DOI":"10.1109\/TNSRE.2023.3236886"},{"key":"e_1_3_2_1_24_1","volume-title":"An adaptive algorithm for fixation, saccade, and glissade detection in eyetracking data. Behavior research methods 42, 1","author":"Nystr\u00f6m Marcus","year":"2010","unstructured":"Marcus Nystr\u00f6m and Kenneth Holmqvist. 2010. An adaptive algorithm for fixation, saccade, and glissade detection in eyetracking data. Behavior research methods 42, 1 (2010), 188--204."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_25_1","DOI":"10.1007\/978-3-030-58610-2_44"},{"key":"e_1_3_2_1_26_1","volume-title":"Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, et al. 2019. Pytorch: An imperative style, high-performance deep learning library. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_27_1","volume-title":"Eye movements in reading and information processing: 20 years of research. Psychological bulletin 124, 3","author":"Rayner Keith","year":"1998","unstructured":"Keith Rayner. 1998. Eye movements in reading and information processing: 20 years of research. Psychological bulletin 124, 3 (1998), 372."},{"key":"e_1_3_2_1_28_1","volume-title":"U-net: Convolutional networks for biomedical image segmentation. In Medical image computing and computer-assisted intervention-MICCAI 2015: 18th international conference","author":"Ronneberger Olaf","year":"2015","unstructured":"Olaf Ronneberger, Philipp Fischer, and Thomas Brox. 2015. U-net: Convolutional networks for biomedical image segmentation. In Medical image computing and computer-assisted intervention-MICCAI 2015: 18th international conference, Munich, Germany, October 5-9, 2015, proceedings, part III 18. Springer, 234--241."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_29_1","DOI":"10.1145\/355017.355028"},{"key":"e_1_3_2_1_30_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and AndrewZisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_31_1","DOI":"10.1145\/2501988.2501994"},{"key":"e_1_3_2_1_32_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 1821--1828","author":"Sugano Yusuke","year":"2014","unstructured":"Yusuke Sugano, Yasuyuki Matsushita, and Yoichi Sato. 2014. Learning-bysynthesis for appearance-based 3d gaze estimation. In Proceedings of the IEEE conference on computer vision and pattern recognition. 1821--1828."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_33_1","DOI":"10.1109\/ICCV48922.2021.00368"},{"key":"e_1_3_2_1_34_1","volume-title":"Hyung Jin Chang, and Xucong Zhang.","author":"Wang Yunhan","year":"2023","unstructured":"Yunhan Wang, Xiangwei Shi, Shalini De Mello, Hyung Jin Chang, and Xucong Zhang. 2023. Investigation of Architectures and Receptive Fields for Appearancebased Gaze Estimation. arXiv preprint arXiv:2308.09593 (2023)."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_35_1","DOI":"10.1109\/ISMAR-Adjunct51615.2020.00052"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_36_1","DOI":"10.1145\/2857491.2857492"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_37_1","DOI":"10.1109\/CVPR42600.2020.00734"},{"key":"e_1_3_2_1_38_1","volume-title":"Kyu Lee, Prashanth Venkatesh, Jaesik Park, Jihun Yu, and Hyun Soo Park.","author":"Yu Zhixuan","year":"2020","unstructured":"Zhixuan Yu, Jae Shin Yoon, In Kyu Lee, Prashanth Venkatesh, Jaesik Park, Jihun Yu, and Hyun Soo Park. 2020. Humbi: A large multiview dataset of human body expressions. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 2990--3000."},{"key":"e_1_3_2_1_39_1","volume-title":"Eth-xgaze: A large scale dataset for gaze estimation under extreme head pose and gaze variation. In Computer Vision-ECCV 2020: 16th European Conference","author":"Zhang Xucong","year":"2020","unstructured":"Xucong Zhang, Seonwook Park, Thabo Beeler, Derek Bradley, Siyu Tang, and Otmar Hilliges. 2020. Eth-xgaze: A large scale dataset for gaze estimation under extreme head pose and gaze variation. In Computer Vision-ECCV 2020: 16th European Conference, Glasgow, UK, August 23-28, 2020, Proceedings, Part V 16. Springer, 365--381."},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_40_1","DOI":"10.1145\/3204493.3204548"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_41_1","DOI":"10.1145\/3204493.3204548"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_42_1","DOI":"10.1109\/CVPR.2015.7299081"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_43_1","DOI":"10.1109\/CVPRW.2017.284"},{"doi-asserted-by":"publisher","key":"e_1_3_2_1_44_1","DOI":"10.1109\/TPAMI"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the IEEE International Conference on Computer Vision. 3143--3152","author":"Zhu Wangjiang","year":"2017","unstructured":"Wangjiang Zhu and Haoping Deng. 2017. Monocular free-head 3d gaze tracking with deep learning and geometry constraints. In Proceedings of the IEEE International Conference on Computer Vision. 3143--3152."}],"event":{"sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"acronym":"ICMR '25","name":"ICMR '25: International Conference on Multimedia Retrieval","location":"Chicago IL USA"},"container-title":["Proceedings of the 2025 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3731715.3733261","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T04:07:49Z","timestamp":1755749269000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3731715.3733261"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":45,"alternative-id":["10.1145\/3731715.3733261","10.1145\/3731715"],"URL":"https:\/\/doi.org\/10.1145\/3731715.3733261","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]},"assertion":[{"value":"2025-06-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}