{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T04:32:20Z","timestamp":1773376340289,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":16,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100006769","name":"Russian Science Foundation","doi-asserted-by":"publisher","award":["24-41-02039"],"award-info":[{"award-number":["24-41-02039"]}],"id":[{"id":"10.13039\/501100006769","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,3,16]]},"DOI":"10.1145\/3776734.3794571","type":"proceedings-article","created":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T20:05:48Z","timestamp":1773345948000},"page":"1130-1134","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["HoverAI: An Embodied Aerial Agent for Natural Human-Drone Interaction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-3353-3712","authenticated-orcid":false,"given":"Yuhua","family":"Jin","sequence":"first","affiliation":[{"name":"Chinese University of Hong Kong, Shenzhen, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3556-5437","authenticated-orcid":false,"given":"Nikita","family":"Kuzmin","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7492-7116","authenticated-orcid":false,"given":"Georgii","family":"Demianchuk","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-5006-8937","authenticated-orcid":false,"given":"Mariya","family":"Lezina","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0850-6519","authenticated-orcid":false,"given":"Fawad","family":"Mehboob","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0156-1890","authenticated-orcid":false,"given":"Issatay","family":"Tokmurziyev","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5974-9257","authenticated-orcid":false,"given":"Miguel","family":"Altamirano Cabrera","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-7054-7722","authenticated-orcid":false,"given":"Muhammad Ahsan","family":"Mustafa","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8055-5345","authenticated-orcid":false,"given":"Dzmitry","family":"Tsetserukou","sequence":"additional","affiliation":[{"name":"Skolkovo Institute of Science and Technology, Moscow, Russian Federation"}]}],"member":"320","published-online":{"date-parts":[[2026,3,16]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3714477"},{"key":"e_1_3_2_2_2_1","volume-title":"Maskbot: Real-time robotic projection mapping with head motion tracking. In SIGGRAPH Asia 2020 Emerging Technologies. 1\u20132.","author":"Cabrera Miguel Altamirano","year":"2020","unstructured":"Miguel Altamirano Cabrera, Igor Usachev, Juan Heredia, Jonathan Tirado, Aleksey Fedoseev, and Dzmitry Tsetserukou. 2020. Maskbot: Real-time robotic projection mapping with head motion tracking. In SIGGRAPH Asia 2020 Emerging Technologies. 1\u20132."},{"key":"e_1_3_2_2_3_1","unstructured":"Edresson Casanova Kelly Davis Eren Golge Gorkem Goknar Iulian Gulea Logan Hart Aya Aljafari Joshua Meyer et al. 2024. Xtts: a massively multilingual zero-shot text-to-speech model. arxiv:2406.04904. Retrieved from https:\/\/arxiv.org\/abs\/2406.04904"},{"key":"e_1_3_2_2_4_1","unstructured":"Google DeepMind. 2024. gemma:7b-instruct: Open model for Ollama. https:\/\/ollama.com\/library\/gemma:7b-instruct Accessed: 2025-12-08"},{"key":"e_1_3_2_2_5_1","unstructured":"Dronisos. 2025. Drone Light Shows indoor vs outdoor. https:\/\/www.dronisos.com\/post\/drone-light-shows-indoor-vs-outdoor Accessed: 2025-12-08"},{"key":"e_1_3_2_2_6_1","unstructured":"Pascale Fung Yoram Bachrach Asli Celikyilmaz Kamalika Chaudhuri Delong Chen Willy Chung Emmanuel Dupoux Hongyu Gong Herv\u00e9 J\u00e9gou et al. 2025. Embodied AI Agents: Modeling the World. arxiv:2506.22355. Retrieved from https:\/\/arxiv.org\/abs\/2506.22355"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3548250"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/2858036.2858519"},{"key":"e_1_3_2_2_9_1","unstructured":"InsightFace. 2017. InsightFace: State-of-the-art 2D & 3D Face Analysis Project. https:\/\/github.com\/deepinsight\/insightface Accessed: 2025-12-08"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","unstructured":"Shiva Lingam Rutger Verstegen Sebastiaan Petermeijer and Marieke Martens. 2025. Human Interactions With Delivery Drones in Public Spaces: Design Recommendations From Recipient and Bystander Perspectives. https:\/\/doi.org\/10.13140\/RG.2.2.16544.70405 10.13140\/RG.2.2.16544.70405","DOI":"10.13140\/RG.2.2.16544.70405"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1080\/10447318.2024.2400756"},{"key":"e_1_3_2_2_12_1","volume-title":"Oleg Sautenkov, Artyom Myshlyaev, Grik Tadevosyan, Yasheerah Yaqoot, and Dzmitry Tsetserukou.","author":"Lykov Artem","year":"2025","unstructured":"Artem Lykov, Valerii Serpiva, Muhammad Haris Khan, Oleg Sautenkov, Artyom Myshlyaev, Grik Tadevosyan, Yasheerah Yaqoot, and Dzmitry Tsetserukou. 2025. CognitiveDrone: A VLA Model and Evaluation Benchmark for Real-Time Cognitive Task Solving and Reasoning in UAVs. arxiv:2503.01378.. Retrieved from https:\/\/arxiv.org\/abs\/2503.01378"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/2929464.2932429"},{"key":"e_1_3_2_2_14_1","volume-title":"Int. conf. on machine learning. 28492\u201328518","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust speech recognition via large-scale weak supervision. In Int. conf. on machine learning. 28492\u201328518."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR-Adjunct68609.2025.00255"},{"key":"e_1_3_2_2_16_1","unstructured":"Silero Team. 2024. Silero VAD: pre-trained enterprise-grade Voice Activity Detector (VAD) Number Detector and Language Classifier. https:\/\/github.com\/snakers4\/silero-vad"}],"event":{"name":"HRI '26: 21st ACM\/IEEE International Conference on Human-Robot Interaction","location":"Edinburgh Scotland UK","acronym":"HRI '26","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGCHI ACM Special Interest Group on Computer-Human Interaction","IEEE RAS"]},"container-title":["Companion Proceedings of the 21st ACM\/IEEE International Conference on Human-Robot Interaction"],"original-title":[],"deposited":{"date-parts":[[2026,3,12]],"date-time":"2026-03-12T20:10:45Z","timestamp":1773346245000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3776734.3794571"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,16]]},"references-count":16,"alternative-id":["10.1145\/3776734.3794571","10.1145\/3776734"],"URL":"https:\/\/doi.org\/10.1145\/3776734.3794571","relation":{},"subject":[],"published":{"date-parts":[[2026,3,16]]},"assertion":[{"value":"2026-03-16","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}