{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:00:33Z","timestamp":1750309233923,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,14]],"date-time":"2024-06-14T00:00:00Z","timestamp":1718323200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1910356"],"award-info":[{"award-number":["1910356"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,14]]},"DOI":"10.1145\/3665939.3665964","type":"proceedings-article","created":{"date-parts":[[2024,6,18]],"date-time":"2024-06-18T13:22:04Z","timestamp":1718716924000},"page":"1-7","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Guided Querying over Videos using Autocompletion Suggestions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4291-0745","authenticated-orcid":false,"given":"Hojin","family":"Yoo","sequence":"first","affiliation":[{"name":"The Ohio State University, Columbus, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4138-603X","authenticated-orcid":false,"given":"Arnab","family":"Nandi","sequence":"additional","affiliation":[{"name":"The Ohio State University, Columbus, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,6,18]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Jean-Baptiste Alayrac Jeff Donahue Pauline Luc Antoine Miech Iain Barr Yana Hasson Karel Lenc Arthur Mensch Katherine Millican Malcolm Reynolds et al. 2022. Flamingo: a visual language model for few-shot learning. Advances in neural information processing systems 35 (2022) 23716--23736."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.14778\/3598581.3598599"},{"key":"e_1_3_2_1_3_1","volume-title":"OTIF: Efficient Tracker Pre-processing over Large Video Datasets.(2022).","author":"Bastani Favyen","year":"2022","unstructured":"Favyen Bastani and Samuel Madden. 2022. OTIF: Efficient Tracker Pre-processing over Large Video Datasets.(2022). (2022)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2016.7533003"},{"key":"e_1_3_2_1_5_1","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877--1901."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3517857"},{"key":"e_1_3_2_1_7_1","first-page":"55","article-title":"The QueRIE system for Personalized Query Recommendations","volume":"34","author":"Chatzopoulou Gloria","year":"2011","unstructured":"Gloria Chatzopoulou, Magdalini Eirinaki, Suju Koshy, Sarika Mittal, Neoklis Polyzotis, and Jothi Swarubini Vindhiya Varman. 2011. The QueRIE system for Personalized Query Recommendations. IEEE Data Eng. Bull. 34, 2 (2011), 55--60.","journal-title":"IEEE Data Eng. Bull."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/1559845.1559919"},{"key":"e_1_3_2_1_9_1","volume-title":"Xing","author":"Chiang Wei-Lin","year":"2023","unstructured":"Wei-Lin Chiang, Zhuohan Li, Zi Lin, Ying Sheng, Zhanghao Wu, Hao Zhang, Lianmin Zheng, Siyuan Zhuang, Yonghao Zhuang, Joseph E. Gonzalez, Ion Stoica, and Eric P. Xing. 2023. Vicuna: An Open-Source Chatbot Impressing GPT-4 with 90%* ChatGPT Quality. https:\/\/lmsys.org\/blog\/2023-03-30-vicuna\/"},{"key":"e_1_3_2_1_10_1","volume-title":"Junqi Zhao, Weisheng Wang, Boyang Li, Pascale N Fung, and Steven Hoi.","author":"Dai Wenliang","year":"2024","unstructured":"Wenliang Dai, Junnan Li, Dongxu Li, Anthony Meng Huat Tiong, Junqi Zhao, Weisheng Wang, Boyang Li, Pascale N Fung, and Steven Hoi. 2024. Instructblip: Towards general-purpose vision-language models with instruction tuning. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE51399.2021.00156"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00957"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/1963405.1963425"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00214"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01457"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00393"},{"volume-title":"Flask web development: developing web applications with python. \" O'Reilly Media","author":"Grinberg Miguel","key":"e_1_3_2_1_17_1","unstructured":"Miguel Grinberg. 2018. Flask web development: developing web applications with python. \" O'Reilly Media, Inc.\"."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3459242"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"e_1_3_2_1_20_1","volume-title":"Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al.","author":"Jiang Albert Q","year":"2023","unstructured":"Albert Q Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, et al. 2023. Mistral 7B. arXiv preprint arXiv:2310.06825 (2023)."},{"key":"e_1_3_2_1_21_1","volume-title":"Blazeit: Optimizing declarative aggregation and limit queries for neural network-based video analytics. arXiv preprint arXiv:1805.01046","author":"Kang Daniel","year":"2018","unstructured":"Daniel Kang, Peter Bailis, and Matei Zaharia. 2018. Blazeit: Optimizing declarative aggregation and limit queries for neural network-based video analytics. arXiv preprint arXiv:1805.01046 (2018)."},{"key":"e_1_3_2_1_22_1","volume-title":"Noscope: optimizing neural network queries over video at scale. arXiv preprint arXiv:1703.02529","author":"Kang Daniel","year":"2017","unstructured":"Daniel Kang, John Emmons, Firas Abuzaid, Peter Bailis, and Matei Zaharia. 2017. Noscope: optimizing neural network queries over video at scale. arXiv preprint arXiv:1703.02529 (2017)."},{"key":"e_1_3_2_1_23_1","volume-title":"VIVA: An End-to-End System for Interactive Video Analytics.. In CIDR.","author":"Kang Daniel","year":"2022","unstructured":"Daniel Kang, Francisco Romero, Peter D Bailis, Christos Kozyrakis, and Matei Zaharia. 2022. VIVA: An End-to-End System for Interactive Video Analytics.. In CIDR."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2484028.2484041"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.14778\/1880172.1880175"},{"key":"e_1_3_2_1_26_1","volume-title":"Spatialyze: A Geospatial Video Analytics System with Spatial-Aware Optimizations. arXiv preprint arXiv:2308.03276","author":"Kittivorawong Chanwut","year":"2023","unstructured":"Chanwut Kittivorawong, Yongming Ge, Yousef Helal, and Alvin Cheung. 2023. Spatialyze: A Geospatial Video Analytics System with Spatial-Aware Optimizations. arXiv preprint arXiv:2308.03276 (2023)."},{"key":"e_1_3_2_1_27_1","volume-title":"International conference on machine learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International conference on machine learning. PMLR, 19730--19742."},{"key":"e_1_3_2_1_28_1","volume-title":"Videochat: Chat-centric video understanding. arXiv preprint arXiv:2305.06355","author":"Li KunChang","year":"2023","unstructured":"KunChang Li, Yinan He, Yi Wang, Yizhuo Li, Wenhai Wang, Ping Luo, Yali Wang, Limin Wang, and Yu Qiao. 2023. Videochat: Chat-centric video understanding. arXiv preprint arXiv:2305.06355 (2023)."},{"key":"e_1_3_2_1_29_1","volume-title":"LLaMA-VID: An image is worth 2 tokens in large language models. arXiv preprint arXiv:2311.17043","author":"Li Yanwei","year":"2023","unstructured":"Yanwei Li, Chengyao Wang, and Jiaya Jia. 2023. LLaMA-VID: An image is worth 2 tokens in large language models. arXiv preprint arXiv:2311.17043 (2023)."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33018690"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.340"},{"key":"e_1_3_2_1_32_1","volume-title":"Visual instruction tuning. Advances in neural information processing systems 36","author":"Liu Haotian","year":"2024","unstructured":"Haotian Liu, Chunyuan Li, Qingyang Wu, and Yong Jae Lee. 2024. Visual instruction tuning. Advances in neural information processing systems 36 (2024)."},{"key":"e_1_3_2_1_33_1","volume-title":"Video-chatgpt: Towards detailed video understanding via large vision and language models. arXiv preprint arXiv:2306.05424","author":"Maaz Muhammad","year":"2023","unstructured":"Muhammad Maaz, Hanoona Rasheed, Salman Khan, and Fahad Shahbaz Khan. 2023. Video-chatgpt: Towards detailed video understanding via large vision and language models. arXiv preprint arXiv:2306.05424 (2023)."},{"key":"e_1_3_2_1_34_1","volume-title":"Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. 1043--1052","author":"Malla Srikanth","year":"2023","unstructured":"Srikanth Malla, Chiho Choi, Isht Dwivedi, Joon Hee Choi, and Jiachen Li. 2023. DRAMA: Joint Risk Localization and Captioning in Driving. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. 1043--1052."},{"key":"e_1_3_2_1_35_1","volume-title":"More Like This: Query Recommendation for SQL. Department of Computer Science & Engineering","author":"Miles Christopher","year":"2011","unstructured":"Christopher Miles. 2011. More Like This: Query Recommendation for SQL. Department of Computer Science & Engineering. University of Washington, Seattle, WA, USA (2011)."},{"key":"e_1_3_2_1_36_1","volume-title":"2022 IEEE 38th International Conference on Data Engineering (ICDE). IEEE, 2956--2968","author":"Moll Oscar","year":"2022","unstructured":"Oscar Moll, Favyen Bastani, Sam Madden, Mike Stonebraker, Vijay Gadepally, and Tim Kraska. 2022. Exsample: Efficient searches on video repositories through adaptive sampling. In 2022 IEEE 38th International Conference on Data Engineering (ICDE). IEEE, 2956--2968."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/1247480.1247640"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.5555\/1325851.1325879"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.14778\/3402755.3402797"},{"key":"e_1_3_2_1_40_1","volume-title":"International conference on machine learning. PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748--8763."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01196"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.690"},{"key":"e_1_3_2_1_43_1","volume-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","author":"Ren Shaoqing","year":"2016","unstructured":"Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2016. Faster R-CNN: Towards real-time object detection with region proposal networks. IEEE transactions on pattern analysis and machine intelligence 39, 6 (2016), 1137--1149."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01240-3_13"},{"key":"e_1_3_2_1_45_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_46_1","volume-title":"2024 IEEE 40th International Conference on Data Engineering (ICDE).","author":"Winecki Dominik","year":"2024","unstructured":"Dominik Winecki and Arnab Nandi. 2024. V2V: Efficiently Synthesizing Video Results for Video Queries. In 2024 IEEE 40th International Conference on Data Engineering (ICDE)."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2017.8296962"},{"key":"e_1_3_2_1_48_1","volume-title":"See Kiong Ng, and Jiashi Feng","author":"Xu Lin","year":"2024","unstructured":"Lin Xu, Yilin Zhao, Daquan Zhou, Zhijie Lin, See Kiong Ng, and Jiashi Feng. 2024. PLLaVA: Parameter-free LLaVA Extension from Images to Videos for Video Dense Captioning. arXiv preprint arXiv:2404.16994 (2024)."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3526142"},{"key":"e_1_3_2_1_50_1","volume-title":"Video-llama: An instruction-tuned audio-visual language model for video understanding. arXiv preprint arXiv:2306.02858","author":"Zhang Hang","year":"2023","unstructured":"Hang Zhang, Xin Li, and Lidong Bing. 2023. Video-llama: An instruction-tuned audio-visual language model for video understanding. arXiv preprint arXiv:2306.02858 (2023)."},{"key":"e_1_3_2_1_51_1","volume-title":"Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592","author":"Zhu Deyao","year":"2023","unstructured":"Deyao Zhu, Jun Chen, Xiaoqian Shen, Xiang Li, and Mohamed Elhoseiny. 2023. Minigpt-4: Enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592 (2023)."},{"key":"e_1_3_2_1_52_1","first-page":"998","article-title":"Zero shot detection","volume":"30","author":"Zhu Pengkai","year":"2019","unstructured":"Pengkai Zhu, Hanxiao Wang, and Venkatesh Saligrama. 2019. Zero shot detection. IEEE Transactions on Circuits and Systems for Video Technology 30, 4 (2019), 998--1010.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"}],"event":{"name":"HILDA 24: 2024 Workshop on Human-In-the-Loop Data Analytics","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"],"location":"Santiago AA Chile","acronym":"HILDA 24"},"container-title":["Proceedings of the 2024 Workshop on Human-In-the-Loop Data Analytics"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3665939.3665964","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3665939.3665964","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:44:28Z","timestamp":1750290268000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3665939.3665964"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,14]]},"references-count":52,"alternative-id":["10.1145\/3665939.3665964","10.1145\/3665939"],"URL":"https:\/\/doi.org\/10.1145\/3665939.3665964","relation":{},"subject":[],"published":{"date-parts":[[2024,6,14]]},"assertion":[{"value":"2024-06-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}