{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T12:26:14Z","timestamp":1776083174577,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","funder":[{"name":"Baden-Wuerttemberg Stiftung","award":["AUTONOMOUS ROBOTICS project iAssistADL"],"award-info":[{"award-number":["AUTONOMOUS ROBOTICS project iAssistADL"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,10]]},"DOI":"10.1145\/3721238.3730692","type":"proceedings-article","created":{"date-parts":[[2025,7,23]],"date-time":"2025-07-23T08:40:47Z","timestamp":1753260047000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["HOIGaze: Gaze Estimation During Hand-Object Interactions in Extended Reality Exploiting Eye-Hand-Head Coordination"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5105-9753","authenticated-orcid":false,"given":"Zhiming","family":"Hu","sequence":"first","affiliation":[{"name":"University of Stuttgart, Stuttgart, Germany and The Hong Kong University of Science and Technology (Guangzhou), Guangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3480-6892","authenticated-orcid":false,"given":"Daniel","family":"Haeufle","sequence":"additional","affiliation":[{"name":"University of Tuebingen, Tuebingen, Germany and The Center for Bionic Intelligence Tuebingen Stuttgart, Stuttgart, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7768-8961","authenticated-orcid":false,"given":"Syn","family":"Schmitt","sequence":"additional","affiliation":[{"name":"University of Stuttgart, Stuttgart, Germany and The Center for Bionic Intelligence Tuebingen Stuttgart, Stuttgart, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6317-7303","authenticated-orcid":false,"given":"Andreas","family":"Bulling","sequence":"additional","affiliation":[{"name":"University of Stuttgart, Stuttgart, Germany"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,7,27]]},"reference":[{"key":"e_1_3_3_3_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/VR.2019.8797816"},{"key":"e_1_3_3_3_3_1","unstructured":"Prithviraj Banerjee Sindi Shkodrani Pierre Moulon Shreyas Hampali Fan Zhang Jade Fountain Edward Miller Selen Basol Richard Newcombe Robert Wang et\u00a0al. 2024. Introducing HOT3D: An Egocentric Dataset for 3D Hand and Object Tracking. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.09598 (2024)."},{"key":"e_1_3_3_3_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9982249"},{"key":"e_1_3_3_3_5_1","doi-asserted-by":"crossref","unstructured":"Andreas Bulling Jamie\u00a0A Ward Hans Gellersen and Gerhard Troster. 2010. Eye movement analysis for activity recognition using electrooculography. IEEE Transactions on Pattern Analysis and Machine Intelligence 33 4 (2010) 741\u2013753.","DOI":"10.1109\/TPAMI.2010.86"},{"key":"e_1_3_3_3_6_1","doi-asserted-by":"crossref","unstructured":"Antoine Coutrot Janet\u00a0H Hsiao and Antoni\u00a0B Chan. 2018. Scanpath modeling and classification with hidden Markov models. Behavior Research Methods 50 1 (2018) 362\u2013379.","DOI":"10.3758\/s13428-017-0876-8"},{"key":"e_1_3_3_3_7_1","doi-asserted-by":"crossref","unstructured":"Dima Damen Hazel Doughty Giovanni\u00a0Maria Farinella Antonino Furnari Evangelos Kazakos Jian Ma Davide Moltisanti Jonathan Munro Toby Perrett Will Price et\u00a0al. 2022. Rescaling egocentric vision: Collection pipeline and challenges for epic-kitchens-100. International Journal of Computer Vision (2022) 1\u201323.","DOI":"10.1007\/s11263-021-01531-2"},{"key":"e_1_3_3_3_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448018.3458008"},{"key":"e_1_3_3_3_9_1","doi-asserted-by":"crossref","unstructured":"Andrew\u00a0T Duchowski. 2018. Gaze-based interaction: a 30 year retrospective. Computers and Graphics 73 (2018) 59\u201369.","DOI":"10.1016\/j.cag.2018.04.002"},{"key":"e_1_3_3_3_10_1","doi-asserted-by":"crossref","unstructured":"Jacob Hadnett-Hunter George Nicolaou Eamonn O\u2019Neill and Michael Proulx. 2019. The effect of task on visual attention in interactive virtual environments. ACM Transactions on Applied Perception 16 3 (2019) 1\u201317.","DOI":"10.1145\/3352763"},{"key":"e_1_3_3_3_11_1","unstructured":"Austin Hale and Christoph Leuze. 2021. HoloYolo: Understand the real world by running object detection on the Hololens 2 and projecting the detection results in space. https:\/\/devpost.com\/software\/holoyolo."},{"key":"e_1_3_3_3_12_1","doi-asserted-by":"crossref","unstructured":"Zhiming Hu Andreas Bulling Sheng Li and Guoping Wang. 2021. FixationNet: forecasting eye fixations in task-oriented virtual environments. IEEE Transactions on Visualization and Computer Graphics 27 5 (2021) 2681\u20132690.","DOI":"10.1109\/TVCG.2021.3067779"},{"key":"e_1_3_3_3_13_1","unstructured":"Zhiming Hu Andreas Bulling Sheng Li and Guoping Wang. 2022. EHTask: recognizing user tasks from eye and head movements in immersive virtual reality. IEEE Transactions on Visualization and Computer Graphics (2022)."},{"key":"e_1_3_3_3_14_1","doi-asserted-by":"crossref","unstructured":"Zhiming Hu Sheng Li Congyi Zhang Kangrui Yi Guoping Wang and Dinesh Manocha. 2020. DGaze: CNN-based gaze prediction in dynamic scenes. IEEE Transactions on Visualization and Computer Graphics 26 5 (2020) 1902\u20131911.","DOI":"10.1109\/TVCG.2020.2973473"},{"key":"e_1_3_3_3_15_1","unstructured":"Zhiming Hu Jiahui Xu Syn Schmitt and Andreas Bulling. 2024a. Pose2Gaze: Eye-body Coordination during Daily Activities for Gaze Prediction from Full-body Poses. IEEE Transactions on Visualization and Computer Graphics (2024)."},{"key":"e_1_3_3_3_16_1","unstructured":"Zhiming Hu Zheming Yin Daniel Haeufle Syn Schmitt and Andreas Bulling. 2024b. HOIMotion: Forecasting Human Motion During Human-Object Interactions Using Egocentric 3D Object Bounding Boxes. IEEE Transactions on Visualization and Computer Graphics (2024)."},{"key":"e_1_3_3_3_17_1","doi-asserted-by":"crossref","unstructured":"Zhiming Hu Congyi Zhang Sheng Li Guoping Wang and Dinesh Manocha. 2019. SGaze: a data-driven eye-head coordination model for realtime gaze prediction. IEEE Transactions on Visualization and Computer Graphics 25 5 (2019) 2002\u20132010.","DOI":"10.1109\/TVCG.2019.2899187"},{"key":"e_1_3_3_3_18_1","unstructured":"Zhiming Hu Guanhua Zhang Zheming Yin Daniel Haeufle Syn Schmitt and Andreas Bulling. 2024c. HaHeAE: Learning Generalisable Joint Representations of Human Hand and Head Movements in Extended Reality. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.16430 (2024)."},{"key":"e_1_3_3_3_19_1","doi-asserted-by":"crossref","unstructured":"Laurent Itti Christof Koch and Ernst Niebur. 1998. A model of saliency-based visual attention for rapid scene analysis. IEEE Transactions on Pattern Analysis and Machine Intelligence 20 11 (1998) 1254\u20131259.","DOI":"10.1109\/34.730558"},{"key":"e_1_3_3_3_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3586183.3606780"},{"key":"e_1_3_3_3_21_1","unstructured":"Chuhan Jiao Yao Wang Guanhua Zhang Mihai B\u00e2ce Zhiming Hu and Andreas Bulling. 2024a. DiffGaze: A Diffusion Model for Continuous Gaze Sequence Generation on 360-degree Images. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.17477 (2024)."},{"key":"e_1_3_3_3_22_1","unstructured":"Chuhan Jiao Guanhua Zhang Zhiming Hu and Andreas Bulling. 2024b. DiffEyeSyn: Diffusion-based User-specific Eye Movement Synthesis. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.01240 (2024)."},{"key":"e_1_3_3_3_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379156.3391338"},{"key":"e_1_3_3_3_24_1","doi-asserted-by":"crossref","unstructured":"Rakshit Kothari Zhizhuo Yang Christopher Kanan Reynold Bailey Jeff\u00a0B Pelz and Gabriel\u00a0J Diaz. 2020. Gaze-in-wild: a dataset for studying eye and head coordination in everyday activities. Scientific Reports 10 1 (2020) 1\u201318.","DOI":"10.1038\/s41598-020-59251-5"},{"key":"e_1_3_3_3_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/VR.2016.7504694"},{"key":"e_1_3_3_3_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02054"},{"key":"e_1_3_3_3_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00633"},{"key":"e_1_3_3_3_28_1","doi-asserted-by":"crossref","unstructured":"Ryoichi Nakashima Yu Fang Yasuhiro Hatori Akinori Hiratani Kazumichi Matsumiya Ichiro Kuriki and Satoshi Shioiri. 2015. Saliency-based gaze prediction based on head direction. Vision Research 117 (2015) 59\u201366.","DOI":"10.1016\/j.visres.2015.10.001"},{"key":"e_1_3_3_3_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01842"},{"key":"e_1_3_3_3_30_1","doi-asserted-by":"crossref","unstructured":"Anjul Patney Marco Salvi Joohwan Kim Anton Kaplanyan Chris Wyman Nir Benty David Luebke and Aaron Lefohn. 2016. Towards foveated rendering for gaze-tracked virtual reality. ACM Transactions on Graphics 35 6 Article 179 (2016) 12\u00a0pages.","DOI":"10.1145\/2980179.2980246"},{"key":"e_1_3_3_3_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV61041.2025.00854"},{"key":"e_1_3_3_3_32_1","doi-asserted-by":"crossref","unstructured":"Ludwig Sidenmark and Hans Gellersen. 2019a. Eye head and torso coordination during gaze shifts in virtual reality. ACM Transactions on Computer-Human Interaction 27 1 (2019) 1\u201340.","DOI":"10.1145\/3361218"},{"key":"e_1_3_3_3_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3332165.3347921"},{"key":"e_1_3_3_3_34_1","doi-asserted-by":"crossref","unstructured":"Vincent Sitzmann Ana Serrano Amy Pavel Maneesh Agrawala Diego Gutierrez Belen Masia and Gordon Wetzstein. 2018. Saliency in VR: how do people explore virtual environments? IEEE Transactions on Visualization and Computer Graphics 24 4 (2018) 1633\u20131642.","DOI":"10.1109\/TVCG.2018.2793599"},{"key":"e_1_3_3_3_35_1","doi-asserted-by":"crossref","unstructured":"John\u00a0S Stahl. 1999. Amplitude of human head movements associated with horizontal saccades. Experimental Brain Research 126 1 (1999) 41\u201354.","DOI":"10.1007\/s002210050715"},{"key":"e_1_3_3_3_36_1","doi-asserted-by":"crossref","unstructured":"Qi Sun Anjul Patney Li-Yi Wei Omer Shapira Jingwan Lu Paul Asente Suwen Zhu Morgan McGuire David Luebke and Arie Kaufman. 2018. Towards virtual reality infinite walking: dynamic saccadic redirection. ACM Transactions on Graphics 37 4 (2018) 1\u201313.","DOI":"10.1145\/3197517.3201294"},{"key":"e_1_3_3_3_37_1","first-page":"169","volume-title":"European Conference on Computer Vision","author":"Tang Bowen","year":"2024","unstructured":"Bowen Tang, Kaihao Zhang, Wenhan Luo, Wei Liu, and Hongdong Li. 2024. Prompting Future Driven Diffusion Model for Hand Motion Prediction. In European Conference on Computer Vision. Springer, 169\u2013186."},{"key":"e_1_3_3_3_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3334480.3382889"},{"key":"e_1_3_3_3_39_1","doi-asserted-by":"publisher","unstructured":"Yao Wang Mihai B\u00e2ce and Andreas Bulling. 2023. Scanpath Prediction on Information Visualisations. IEEE Transactions on Visualization and Computer Graphics (TVCG) 30 7 (2023) 3902\u20133914. 10.1109\/TVCG.2023.3242293","DOI":"10.1109\/TVCG.2023.3242293"},{"key":"e_1_3_3_3_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642942"},{"key":"e_1_3_3_3_41_1","volume-title":"Proceedings of the 2024 Pacific Conference on Computer Graphics and Applications","author":"Yan Haodong","year":"2024","unstructured":"Haodong Yan, Zhiming Hu, Syn Schmitt, and Andreas Bulling. 2024. GazeMoDiff: Gaze-guided Diffusion Model for Stochastic Human Motion Prediction. In Proceedings of the 2024 Pacific Conference on Computer Graphics and Applications."},{"key":"e_1_3_3_3_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00050"},{"key":"e_1_3_3_3_43_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19818-2_8"},{"key":"e_1_3_3_3_44_1","unstructured":"Mingyuan Zhang Zhongang Cai Liang Pan Fangzhou Hong Xinying Guo Lei Yang and Ziwei Liu. 2022a. Motiondiffuse: Text-driven human motion generation with diffusion model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2208.15001 (2022)."},{"key":"e_1_3_3_3_45_1","doi-asserted-by":"crossref","unstructured":"Xucong Zhang Yusuke Sugano Mario Fritz and Andreas Bulling. 2017. MPIIGaze: real-world dataset and deep appearance-based gaze estimation. IEEE Transactions on Pattern Analysis and Machine Intelligence 41 1 (2017) 162\u2013175.","DOI":"10.1109\/TPAMI.2017.2778103"}],"event":{"name":"SIGGRAPH Conference Papers '25: Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers","location":"Vancouver BC Canada","acronym":"SIGGRAPH Conference Papers '25","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"]},"container-title":["Proceedings of the Special Interest Group on Computer Graphics and Interactive Techniques Conference Conference Papers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3721238.3730692","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T14:57:44Z","timestamp":1774018664000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3721238.3730692"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,27]]},"references-count":44,"alternative-id":["10.1145\/3721238.3730692","10.1145\/3721238"],"URL":"https:\/\/doi.org\/10.1145\/3721238.3730692","relation":{},"subject":[],"published":{"date-parts":[[2025,7,27]]},"assertion":[{"value":"2025-07-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}