{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,7]],"date-time":"2025-11-07T19:34:08Z","timestamp":1762544048808,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":42,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,5]],"date-time":"2024-10-05T00:00:00Z","timestamp":1728086400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/501100006374","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2239569"],"award-info":[{"award-number":["2239569"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Cornell University IGNITE Innovation Acceleration Program"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,5]]},"DOI":"10.1145\/3675095.3676611","type":"proceedings-article","created":{"date-parts":[[2024,9,25]],"date-time":"2024-09-25T16:36:36Z","timestamp":1727282196000},"page":"40-47","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":4,"title":["EchoGuide: Active Acoustic Guidance for LLM-Based Eating Event Analysis from Egocentric Videos"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-8791-9340","authenticated-orcid":false,"given":"Vineet","family":"Parikh","sequence":"first","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5283-0765","authenticated-orcid":false,"given":"Saif","family":"Mahmud","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1338-9275","authenticated-orcid":false,"given":"Devansh","family":"Agarwal","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4208-7904","authenticated-orcid":false,"given":"Ke","family":"Li","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5510-6799","authenticated-orcid":false,"given":"Fran\u00e7ois","family":"Guimbreti\u00e8re","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5079-5927","authenticated-orcid":false,"given":"Cheng","family":"Zhang","sequence":"additional","affiliation":[{"name":"Cornell University, Ithaca, NY, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,10,5]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376869"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/3490099.3511154"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3264902"},{"key":"e_1_3_2_1_4_1","unstructured":"Rishi Bommasani Drew A Hudson Ehsan Adeli Russ Altman Simran Arora Sydney von Arx Michael S Bernstein Jeannette Bohg Antoine Bosselut Emma Brunskill et al. 2021. On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258 (2021)."},{"key":"e_1_3_2_1_5_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems Vol. 33 (2020) 1877--1901."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3636534.3649366"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3643832.3661890"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3570361.3613281"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.3390\/s16071067"},{"volume-title":"https:\/\/gopro.com\/en\/us\/shop\/cameras\/hero9-black\/CHDHX-901-master.html. [Online","year":"2023","key":"e_1_3_2_1_10_1","unstructured":"GoPro. 2020. HERO9 Black. https:\/\/gopro.com\/en\/us\/shop\/cameras\/hero9-black\/CHDHX-901-master.html. [Online; accessed 12-September-2023]."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01842"},{"key":"e_1_3_2_1_12_1","volume-title":"Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa.","author":"Kojima Takeshi","year":"2022","unstructured":"Takeshi Kojima, Shixiang Shane Gu, Machel Reid, Yutaka Matsuo, and Yusuke Iwasawa. 2022. Large language models are zero-shot reasoners. Advances in neural information processing systems, Vol. 35 (2022), 22199--22213."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3613904.3642910"},{"key":"e_1_3_2_1_14_1","first-page":"9459","article-title":"Retrieval-augmented generation for knowledge-intensive nlp tasks","volume":"33","author":"Lewis Patrick","year":"2020","unstructured":"Patrick Lewis, Ethan Perez, Aleksandra Piktus, Fabio Petroni, Vladimir Karpukhin, Naman Goyal, Heinrich K\u00fcttler, Mike Lewis, Wen-tau Yih, Tim Rockt\u00e4schel, et al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in Neural Information Processing Systems, Vol. 33 (2020), 9459--9474.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"crossref","unstructured":"Ke Li Devansh Agarwal Ruidong Zhang Vipin Gunda Tianjun Mo Saif Mahmud Boao Chen Franccois Guimbreti\u00e8re and Cheng Zhang. 2024. SonicID: User Identification on Smart Glasses with Acoustic Sensing. arXiv preprint arXiv:2406.08273 (2024).","DOI":"10.1145\/3699734"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3636534.3649376"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534621"},{"key":"e_1_3_2_1_18_1","volume-title":"Mattia Soldan, Michael Wray, Rui Yan, Eric Zhongcong Xu, Difei Gao, Rongcheng Tu, Wenzhe Zhao, Weijie Kong, et al.","author":"Lin Kevin Qinghong","year":"2022","unstructured":"Kevin Qinghong Lin, Alex Jinpeng Wang, Mattia Soldan, Michael Wray, Rui Yan, Eric Zhongcong Xu, Difei Gao, Rongcheng Tu, Wenzhe Zhao, Weijie Kong, et al. 2022. Egocentric Video-Language Pretraining. arXiv preprint arXiv:2206.01670 (2022)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","unstructured":"Jerry Liu. 2022. LlamaIndex. https:\/\/doi.org\/10.5281\/zenodo.1234","DOI":"10.5281\/zenodo.1234"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610895"},{"key":"e_1_3_2_1_21_1","volume-title":"ActSonic: Everyday Activity Recognition on Smart Glasses using Active Acoustic Sensing. arXiv preprint arXiv:2404.13924","author":"Mahmud Saif","year":"2024","unstructured":"Saif Mahmud, Vineet Parikh, Qikang Liang, Ke Li, Ruidong Zhang, Ashwin Ajit, Vipin Gunda, Devansh Agarwal, Franccois Guimbreti\u00e8re, and Cheng Zhang. 2024. ActSonic: Everyday Activity Recognition on Smart Glasses using Active Acoustic Sensing. arXiv preprint arXiv:2404.13924 (2024)."},{"key":"e_1_3_2_1_22_1","unstructured":"Meta. 2023. https:\/\/about.fb.com\/news\/2023\/09\/new-ray-ban-meta-smart-glasses\/"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3131894"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_1"},{"key":"e_1_3_2_1_25_1","unstructured":"OpenAI Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia Leoni Aleman Diogo Almeida"},{"key":"e_1_3_2_1_26_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3491102.3502041"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3594738.3611358"},{"key":"e_1_3_2_1_29_1","unstructured":"Gemini Team Rohan Anil Sebastian Borgeaud Yonghui Wu Jean-Baptiste Alayrac Jiahui Yu Radu Soricut Johan Schalkwyk Andrew M Dai Anja Hauth et al. 2023. Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/2678025.2701405"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01432"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3161188"},{"key":"e_1_3_2_1_33_1","volume-title":"Denny Zhou, et al.","author":"Wei Jason","year":"2022","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Fei Xia, Ed Chi, Quoc V Le, Denny Zhou, et al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems, Vol. 35 (2022), 24824--24837."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00054"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46604-0_38"},{"key":"e_1_3_2_1_36_1","unstructured":"Tianhong Catherine Yu Guilin Hu Ruidong Zhang Hyunchul Lim Saif Mahmud Chi-Jung Lee Ke Li Devansh Agarwal Shuyang Nie Jinseok Oh et al. 2024. Ring-a-Pose: A Ring for Continuous Hand Pose Tracking. arXiv preprint arXiv:2404.12980 (2024)."},{"key":"e_1_3_2_1_37_1","volume-title":"Monitoring chewing and eating in free-living using smart eyeglasses","author":"Zhang Rui","year":"2017","unstructured":"Rui Zhang and Oliver Amft. 2017. Monitoring chewing and eating in free-living using smart eyeglasses. IEEE journal of biomedical and health informatics, Vol. 22, 1 (2017), 23--32."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3594738.3611365"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3580801"},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3546749"},{"key":"e_1_3_2_1_41_1","volume-title":"BERTScore: Evaluating Text Generation with BERT. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SkeHuCVFDr","author":"Tianyi","year":"2020","unstructured":"Tianyi Zhang*, Varsha Kishore*, Felix Wu*, Kilian Q. Weinberger, and Yoav Artzi. 2020. BERTScore: Evaluating Text Generation with BERT. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=SkeHuCVFDr"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00637"}],"event":{"name":"UbiComp '24: The 2024 ACM International Joint Conference on Pervasive and Ubiquitous Computing","sponsor":["SIGCHI ACM Special Interest Group on Computer-Human Interaction","SIGSPATIAL ACM Special Interest Group on Spatial Information"],"location":"Melbourne VIC Australia","acronym":"UbiComp '24"},"container-title":["Proceedings of the 2024 ACM International Symposium on Wearable Computers"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3675095.3676611","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3675095.3676611","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T00:08:17Z","timestamp":1755907697000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3675095.3676611"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,5]]},"references-count":42,"alternative-id":["10.1145\/3675095.3676611","10.1145\/3675095"],"URL":"https:\/\/doi.org\/10.1145\/3675095.3676611","relation":{},"subject":[],"published":{"date-parts":[[2024,10,5]]},"assertion":[{"value":"2024-10-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}