{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T17:42:39Z","timestamp":1776102159386,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,7]],"date-time":"2024-10-07T00:00:00Z","timestamp":1728259200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-sa\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,7]]},"DOI":"10.1145\/3677386.3682095","type":"proceedings-article","created":{"date-parts":[[2024,9,13]],"date-time":"2024-09-13T06:29:25Z","timestamp":1726208965000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["TAGGAR: General-Purpose Task Guidance from Natural Language in Augmented Reality using Vision-Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-5358-8423","authenticated-orcid":false,"given":"Daniel","family":"Stover","sequence":"first","affiliation":[{"name":"Center for Human-Computer Interaction, Virginia Tech, United States"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0491-5067","authenticated-orcid":false,"given":"Doug","family":"Bowman","sequence":"additional","affiliation":[{"name":"Center for Human-Computer Interaction, Virginia Tech, United States"}]}],"member":"320","published-online":{"date-parts":[[2024,10,7]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"2024. OpenAI Platform. https:\/\/platform.openai.com"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1561\/2200000006"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376688"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCG.2011.4"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cirp.2017.04.006"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1080\/10494820.2013.815221"},{"key":"e_1_3_2_2_7_1","volume-title":"Towards General Purpose Vision Systems: An End-to-End Task-Agnostic Vision-Language Architecture. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (June 2022","author":"Gupta Tanmay","year":"2022","unstructured":"Tanmay Gupta, Amita Kamath, Aniruddha Kembhavi, and Derek Hoiem. 2022. Towards General Purpose Vision Systems: An End-to-End Task-Agnostic Vision-Language Architecture. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (June 2022), 16399\u201316409. https:\/\/openaccess.thecvf.com\/content\/CVPR2022\/html\/Gupta_Towards_General_Purpose_Vision_Systems_An_End-to-End_Task-Agnostic_Vision-Language_Architecture_CVPR_2022_paper"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISMAR.2011.6092386"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3411764.3445283"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","unstructured":"Shilong Liu Zhaoyang Zeng Tianhe Ren Feng Li Hao Zhang Jie Yang Chunyuan Li Jianwei Yang Hang Su Jun Zhu and Lei Zhang. 2023. Grounding DINO: Marrying DINO with Grounded Pre-Training for Open-Set Object Detection. (2023). https:\/\/doi.org\/10.48550\/arXiv.2303.05499 eprint: 2303.05499.","DOI":"10.48550\/arXiv.2303.05499"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3544548.3581442"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2404.13696"},{"key":"e_1_3_2_2_13_1","unstructured":"OpenAI Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman 2024. GPT-4 Technical Report. (2024). https:\/\/doi.org\/10.48550\/arXiv.2303.08774 eprint: 2303.08774."},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2017.06.002"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.rcim.2019.101887"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.2015\/IJIRMF.2455.0620\/202112017"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1080\/00207543.2020.1859636"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/AIxVR59861.2024.00028"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2022.3203104"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.mfglet.2019.08.003"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2008.11239"},{"key":"e_1_3_2_2_22_1","volume-title":"Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141\u00a0ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/s40593-014-0032-x"},{"key":"e_1_3_2_2_24_1","volume-title":"Advances in Neural Information Processing Systems, S.\u00a0Koyejo, S.\u00a0Mohamed, A.\u00a0Agarwal, D.\u00a0Belgrave, K.\u00a0Cho, and A.\u00a0Oh (Eds.). Vol.\u00a035. Curran Associates","author":"Zhang Haotian","year":"2022","unstructured":"Haotian Zhang, Pengchuan Zhang, Xiaowei Hu, Yen-Chun Chen, Liunian Li, Xiyang Dai, Lijuan Wang, Lu Yuan, Jenq-Neng Hwang, and Jianfeng Gao. 2022. GLIPv2: Unifying Localization and Vision-Language Understanding. In Advances in Neural Information Processing Systems, S.\u00a0Koyejo, S.\u00a0Mohamed, A.\u00a0Agarwal, D.\u00a0Belgrave, K.\u00a0Cho, and A.\u00a0Oh (Eds.). Vol.\u00a035. Curran Associates, Inc., 36067\u201336080. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/ea370419760b421ce12e3082eb2ae1a8-Paper-Conference.pdf"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procir.2022.02.159"}],"event":{"name":"SUI '24: ACM Symposium on Spatial User Interaction","location":"Trier Germany","acronym":"SUI '24","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["ACM Symposium on Spatial User Interaction"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677386.3682095","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3677386.3682095","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T17:25:27Z","timestamp":1755883527000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3677386.3682095"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,7]]},"references-count":25,"alternative-id":["10.1145\/3677386.3682095","10.1145\/3677386"],"URL":"https:\/\/doi.org\/10.1145\/3677386.3682095","relation":{},"subject":[],"published":{"date-parts":[[2024,10,7]]},"assertion":[{"value":"2024-10-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}