{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,11]],"date-time":"2026-01-11T05:12:20Z","timestamp":1768108340583,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":46,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,10]],"date-time":"2022-06-10T00:00:00Z","timestamp":1654819200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"NSF","award":["CNS-1651570"],"award-info":[{"award-number":["CNS-1651570"]}]},{"name":"Google"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,10]]},"DOI":"10.1145\/3514221.3517897","type":"proceedings-article","created":{"date-parts":[[2022,6,12]],"date-time":"2022-06-12T02:33:49Z","timestamp":1655001229000},"page":"1934-1947","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":20,"title":["TASTI: Semantic Indexes for Machine Learning-based Queries over Unstructured Data"],"prefix":"10.1145","author":[{"given":"Daniel","family":"Kang","sequence":"first","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}]},{"given":"John","family":"Guibas","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}]},{"given":"Peter D.","family":"Bailis","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}]},{"given":"Tatsunori","family":"Hashimoto","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}]},{"given":"Matei","family":"Zaharia","sequence":"additional","affiliation":[{"name":"Stanford University, Stanford, CA, USA"}]}],"member":"320","published-online":{"date-parts":[[2022,6,11]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Pankaj K Agarwal Sariel Har-Peled Kasturi R Varadarajan et al. 2005. Geometric approximation via coresets. Combinatorial and computational geometry Vol. 52 (2005) 1--30."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-57301-1_5"},{"key":"e_1_3_2_2_3_1","volume-title":"Predicate Optimization for a Visual Analytics Database. ICDE","author":"Anderson Michael R","year":"2019","unstructured":"Michael R Anderson, Michael Cafarella, Thomas F Wenisch, and German Ros. 2019. Predicate Optimization for a Visual Analytics Database. ICDE (2019)."},{"key":"e_1_3_2_2_4_1","volume-title":"Common voice: A massively-multilingual speech corpus. arXiv preprint arXiv:1912.06670","author":"Ardila Rosana","year":"2019","unstructured":"Rosana Ardila, Megan Branson, Kelly Davis, Michael Henretty, Michael Kohler, Josh Meyer, Reuben Morais, Lindsay Saunders, Francis M Tyers, and Gregor Weber. 2019. Common voice: A massively-multilingual speech corpus. arXiv preprint arXiv:1912.06670 (2019)."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10590-1_38"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3318464.3389692"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.14778\/3415478.3415498"},{"key":"e_1_3_2_2_8_1","volume-title":"SIGFIDET Workshop on Data Description, Access and Control .","author":"Bayer Rudolf","year":"1970","unstructured":"Rudolf Bayer and Edward McCreight. 1970. Organization and maintenance of large ordered indexes. In SIGFIDET Workshop on Data Description, Access and Control ."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"},{"key":"e_1_3_2_2_10_1","volume-title":"New frameworks for offline and streaming coreset constructions. arXiv preprint arXiv:1612.00889","author":"Braverman Vladimir","year":"2016","unstructured":"Vladimir Braverman, Dan Feldman, and Harry Lang. 2016. New frameworks for offline and streaming coreset constructions. arXiv preprint arXiv:1612.00889 (2016)."},{"key":"e_1_3_2_2_11_1","volume-title":"Scaling Video Analytics on Constrained Edge Nodes. SysML","author":"Canel Christopher","year":"2019","unstructured":"Christopher Canel, Thomas Kim, Giulio Zhou, Conglong Li, Hyeontaek Lim, David Andersen, Michael Kaminsky, and Subramanya Dulloor. 2019. Scaling Video Analytics on Constrained Edge Nodes. SysML (2019)."},{"key":"e_1_3_2_2_12_1","volume-title":"Proceedings of the 23rd VLDB conference","author":"Ciaccia Paolo","year":"1997","unstructured":"Paolo Ciaccia, Marco Patella, and Pavel Zezula. 1997. M-tree: An E cient Access Method for Similarity Search in Metric Spaces. In Proceedings of the 23rd VLDB conference, Athens, Greece. Citeseer, 426--435."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/356770.356776"},{"key":"e_1_3_2_2_14_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/191843.191925"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"crossref","unstructured":"Myron Flickner Harpreet Sawhney Wayne Niblack Jonathan Ashley Qian Huang Byron Dom Monika Gorkani Jim Hafner Denis Lee Dragutin Petkovic et al. 1995. Query by image and video content: The QBIC system. computer Vol. 28 9 (1995) 23--32.","DOI":"10.1109\/2.410146"},{"key":"e_1_3_2_2_17_1","volume-title":"Rekall: Specifying video events using compositions of spatiotemporal labels. arXiv preprint arXiv:1910.02993","author":"Fu Daniel Y","year":"2019","unstructured":"Daniel Y Fu, Will Crichton, James Hong, Xinwei Yao, Haotian Zhang, Anh Truong, Avanika Narayan, Maneesh Agrawala, Christopher R\u00e9, and Kayvon Fatahalian. 2019. Rekall: Specifying video events using compositions of spatiotemporal labels. arXiv preprint arXiv:1910.02993 (2019)."},{"key":"e_1_3_2_2_18_1","unstructured":"Hector Garcia-Molina. 2008. Database systems: the complete book .Pearson Education India."},{"key":"e_1_3_2_2_19_1","volume-title":"Clustering to minimize the maximum intercluster distance. Theoretical computer science","author":"Gonzalez Teofilo F","year":"1985","unstructured":"Teofilo F Gonzalez. 1985. Clustering to minimize the maximum intercluster distance. Theoretical computer science, Vol. 38 (1985), 293--306."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/602259.602266"},{"key":"e_1_3_2_2_21_1","volume-title":"Monte Carlo Methods","author":"Hammersley John Michael","unstructured":"John Michael Hammersley and David Christopher Handscomb. 1964. General principles of the Monte Carlo method. In Monte Carlo Methods. Springer, 50--75."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/1007352.1007400"},{"key":"e_1_3_2_2_23_1","volume-title":"Mask r-cnn","author":"He Kaiming","unstructured":"Kaiming He, Georgia Gkioxari, Piotr Doll\u00e1r, and Ross Girshick. 2017. Mask r-cnn. In ICCV. IEEE, 2980--2988."},{"key":"e_1_3_2_2_24_1","unstructured":"Kaiming He Xiangyu Zhang Shaoqing Ren and Jian Sun. 2016. Deep residual learning for image recognition. In CVPR. 770--778."},{"key":"e_1_3_2_2_25_1","unstructured":"Joseph M Hellerstein Jeffrey F Naughton and Avi Pfeffer. 1995. Generalized search trees for database systems .September."},{"key":"e_1_3_2_2_26_1","volume-title":"Focus: Querying Large Video Datasets with Low Latency and Low Cost. OSDI","author":"Hsieh Kevin","year":"2018","unstructured":"Kevin Hsieh, Ganesh Ananthanarayanan, Peter Bodik, Paramvir Bahl, Matthai Philipose, Phillip B Gibbons, and Onur Mutlu. 2018. Focus: Querying Large Video Datasets with Low Latency and Low Cost. OSDI (2018)."},{"key":"e_1_3_2_2_27_1","first-page":"68","article-title":"Database Cracking","volume":"7","author":"Idreos Stratos","year":"2007","unstructured":"Stratos Idreos, Martin L Kersten, Stefan Manegold, et al. 2007. Database Cracking.. In CIDR, Vol. 7. 68--78.","journal-title":"CIDR"},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/1071610.1071612"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3230543.3230574"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2018.8622396"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.14778\/3372716.3372725"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.14778\/3137628.3137664"},{"key":"e_1_3_2_2_33_1","volume-title":"Approximate Selection with Guarantees using Proxies. PVLDB","author":"Kang Daniel","year":"2020","unstructured":"Daniel Kang, Edward Gan, Peter Bailis, Tatsunori Hashimoto, and Matei Zaharia. 2020. Approximate Selection with Guarantees using Proxies. PVLDB (2020)."},{"key":"e_1_3_2_2_34_1","volume-title":"2021 a. Accelerating Approximate Aggregation Queries with Expensive Predicates. PVLDB","author":"Kang Daniel","year":"2021","unstructured":"Daniel Kang, John Guibas, Peter Bailis, Tatsunori Hashimoto, Yi Sun, and Matei Zaharia. 2021 a. Accelerating Approximate Aggregation Queries with Expensive Predicates. PVLDB (2021)."},{"key":"e_1_3_2_2_35_1","volume-title":"2021 b. Jointly Optimizing Preprocessing and Inference for DNN-based Visual Analytics. PVLDB","author":"Kang Daniel","year":"2021","unstructured":"Daniel Kang, Ankit Mathur, Teja Veeramacheneni, Peter Bailis, and Matei Zaharia. 2021 b. Jointly Optimizing Preprocessing and Inference for DNN-based Visual Analytics. PVLDB (2021)."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3030497"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2015.7301269"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"crossref","unstructured":"Yao Lu Aakanksha Chowdhery Srikanth Kandula and Surajit Chaudhuri. 2018. Accelerating Machine Learning Inference with Probabilistic Predicates. In SIGMOD. ACM 1493--1508.","DOI":"10.1145\/3183713.3183751"},{"key":"e_1_3_2_2_39_1","volume-title":"Active learning for convolutional neural networks: A core-set approach. arXiv preprint arXiv:1708.00489","author":"Sener Ozan","year":"2017","unstructured":"Ozan Sener and Silvio Savarese. 2017. Active learning for convolutional neural networks: A core-set approach. arXiv preprint arXiv:1708.00489 (2017)."},{"key":"e_1_3_2_2_40_1","volume-title":"Proceedings of the fourth ACM international conference on Multimedia. 87--98","author":"Smith John R","year":"1997","unstructured":"John R Smith and Shih-Fu Chang. 1997. VisualSEEk: a fully automated content-based image query system. In Proceedings of the fourth ACM international conference on Multimedia. 87--98."},{"key":"e_1_3_2_2_41_1","unstructured":"Jeffrey D Ullman. 1984. Principles of database systems. Galgotia publications."},{"key":"e_1_3_2_2_42_1","article-title":"Distance metric learning for large margin nearest neighbor classification","volume":"10","author":"Weinberger Kilian Q","year":"2009","unstructured":"Kilian Q Weinberger and Lawrence K Saul. 2009. Distance metric learning for large margin nearest neighbor classification. Journal of Machine Learning Research, Vol. 10, 2 (2009).","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3302424.3303971"},{"key":"e_1_3_2_2_44_1","first-page":"421","article-title":"Indexing the distance: An efficient method to knn processing","volume":"1","author":"Yu Cui","year":"2001","unstructured":"Cui Yu, Beng Chin Ooi, Kian-Lee Tan, and HV Jagadish. 2001. Indexing the distance: An efficient method to knn processing. In VLDB, Vol. 1. 421--430.","journal-title":"VLDB"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.14778\/3372716.3372721"},{"key":"e_1_3_2_2_46_1","volume-title":"Seq2SQL: Generating Structured Queries from Natural Language using Reinforcement Learning. CoRR","author":"Zhong Victor","year":"2017","unstructured":"Victor Zhong, Caiming Xiong, and Richard Socher. 2017. Seq2SQL: Generating Structured Queries from Natural Language using Reinforcement Learning. CoRR, Vol. abs\/1709.00103 (2017)."}],"event":{"name":"SIGMOD\/PODS '22: International Conference on Management of Data","location":"Philadelphia PA USA","acronym":"SIGMOD\/PODS '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2022 International Conference on Management of Data"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3517897","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3514221.3517897","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3514221.3517897","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:30:36Z","timestamp":1750188636000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3514221.3517897"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,10]]},"references-count":46,"alternative-id":["10.1145\/3514221.3517897","10.1145\/3514221"],"URL":"https:\/\/doi.org\/10.1145\/3514221.3517897","relation":{},"subject":[],"published":{"date-parts":[[2022,6,10]]},"assertion":[{"value":"2022-06-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}