{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T18:33:40Z","timestamp":1770748420645,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,14]],"date-time":"2024-06-14T00:00:00Z","timestamp":1718323200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,14]]},"DOI":"10.1145\/3665939.3665959","type":"proceedings-article","created":{"date-parts":[[2024,6,18]],"date-time":"2024-06-18T13:22:04Z","timestamp":1718716924000},"page":"1-4","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["It Took Longer than I was Expecting: Why is Dataset Search Still so Hard?"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0949-7290","authenticated-orcid":false,"given":"Madelon","family":"Hulsebos","sequence":"first","affiliation":[{"name":"UC Berkeley, Berkeley, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-5229-3913","authenticated-orcid":false,"given":"Wenjing","family":"Lin","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0919-9672","authenticated-orcid":false,"given":"Shreya","family":"Shankar","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4538-4752","authenticated-orcid":false,"given":"Aditya","family":"Parameswaran","sequence":"additional","affiliation":[{"name":"UC Berkeley, Berkeley, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,6,18]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"[n. d.]. Adding Intelligence to Databricks Search. https:\/\/www.databricks.com\/blog\/adding-intelligence-to-databricks-search. Accessed: 2024-03-29."},{"key":"e_1_3_2_1_2_1","unstructured":"[n. d.]. A peek inside how Snowflake's new Universal Search feature was built. https:\/\/medium.com\/snowflake\/a-peek-inside-how-snowflakes-new-universal-search-feature-was-built-dfd1188176d0. Accessed: 2024-03-29."},{"key":"e_1_3_2_1_3_1","volume-title":"d.]","year":"2020","unstructured":"[n. d.]. Volume of data\/information created, captured, copied, and consumed worldwide from 2010 to 2020, with forecasts from 2021 to 2025. https:\/\/www.statista.com\/statistics\/871513\/worldwide-data-created\/. Accessed: 2024-03-29."},{"key":"e_1_3_2_1_4_1","unstructured":"2023. Datahub: A Modern Data Catalog. https:\/\/datahubproject.io\/docs\/next."},{"key":"e_1_3_2_1_5_1","volume-title":"Croissant: A Metadata Format for ML-Ready Datasets. arXiv preprint arXiv:2403.19546","author":"Akhtar Mubashara","year":"2024","unstructured":"Mubashara Akhtar, Omar Benjelloun, Costanza Conforti, et al. 2024. Croissant: A Metadata Format for ML-Ready Datasets. arXiv preprint arXiv:2403.19546 (2024)."},{"key":"e_1_3_2_1_6_1","first-page":"28","article-title":"Lakehouse: a new generation of open platforms that unify data warehousing and advanced analytics","volume":"8","author":"Armbrust Michael","year":"2021","unstructured":"Michael Armbrust, Ali Ghodsi, Reynold Xin, and Matei Zaharia. 2021. Lakehouse: a new generation of open platforms that unify data warehousing and advanced analytics. In Proceedings of CIDR, Vol. 8. 28.","journal-title":"Proceedings of CIDR"},{"key":"e_1_3_2_1_7_1","volume-title":"Voyager: Data discovery and integration for data science. JDIQ","author":"Bogatu Alex","year":"2022","unstructured":"Alex Bogatu, Norman W Paton, Mark Douthwaite, and Andre Freitas. 2022. Voyager: Data discovery and integration for data science. JDIQ (2022)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Dan Brickley Matthew Burgess and Natasha Noy. 2019. Google Dataset Search: Building a search engine for datasets in an open Web ecosystem. In WWW. 1365--1375.","DOI":"10.1145\/3308558.3313685"},{"key":"e_1_3_2_1_9_1","volume-title":"Retrieve","author":"Cappuzzo Riccardo","year":"2024","unstructured":"Riccardo Cappuzzo, Gael Varoquaux, Aimee Coelho, and Paolo Papotti. 2024. Retrieve, Merge, Predict: Augmenting Tables with Data Lakes. arXiv preprint arXiv:2402.06282 (2024)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.14778\/3476311.3476346"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00778-019-00564-x"},{"key":"e_1_3_2_1_12_1","volume-title":"Semantics-aware Dataset Discovery from Data Lakes with Contextualized Column-based Representation Learning. PVLDB","author":"Fan Grace","year":"2023","unstructured":"Grace Fan, Jin Wang, Yuliang Li, Dan Zhang, and Ren\u00e9e J Miller. 2023. Semantics-aware Dataset Discovery from Data Lakes with Contextualized Column-based Representation Learning. PVLDB (2023)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2018.00094"},{"key":"e_1_3_2_1_14_1","volume-title":"Sergi Nadal Francesch, and \u00d3scar Romero Moral","author":"de Jes\u00fas Flores Herrera Javier","year":"2021","unstructured":"Javier de Jes\u00fas Flores Herrera, Sergi Nadal Francesch, and \u00d3scar Romero Moral. 2021. Towards scalable data discovery. In EDBT'21. 433--438."},{"key":"e_1_3_2_1_15_1","volume-title":"Neural approaches to conversational information retrieval","author":"Gao Jianfeng","unstructured":"Jianfeng Gao, Chenyan Xiong, Paul Bennett, and Nick Craswell. 2023. Neural approaches to conversational information retrieval. Vol. 44. Springer Nature."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2018396.2018423"},{"key":"e_1_3_2_1_17_1","unstructured":"Mark Grover. 2019. Amundsen --- Lyft's data discovery & metadata engine. https:\/\/eng.lyft.com\/amundsen-lyfts-data-discovery-metadata-engine-62d27254fbb9. Accessed: 2024-03-29."},{"key":"e_1_3_2_1_18_1","volume-title":"Open domain question answering over tables via dense retrieval. arXiv preprint arXiv:2103.12011","author":"Herzig Jonathan","year":"2021","unstructured":"Jonathan Herzig, Thomas M\u00fcller, Syrine Krichene, and Julian Martin Eisenschlos. 2021. Open domain question answering over tables via dense retrieval. arXiv preprint arXiv:2103.12011 (2021)."},{"key":"e_1_3_2_1_19_1","volume-title":"The Fast and the Private: Task-based Dataset Search. arXiv preprint arXiv:2308.05637","author":"Huang Zezhou","year":"2023","unstructured":"Zezhou Huang, Jiaxiang Liu, Haonan Wang, and Eugene Wu. 2023. The Fast and the Private: Task-based Dataset Search. arXiv preprint arXiv:2308.05637 (2023)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330993"},{"key":"e_1_3_2_1_21_1","volume-title":"CHORUS: foundation models for unified data discovery and exploration. arXiv preprint arXiv:2306.09610","author":"Kayali Moe","year":"2023","unstructured":"Moe Kayali, Anton Lykov, Ilias Fountalis, Nikolaos Vasiloglou, Dan Olteanu, and Dan Suciu. 2023. CHORUS: foundation models for unified data discovery and exploration. arXiv preprint arXiv:2306.09610 (2023)."},{"key":"e_1_3_2_1_22_1","volume-title":"Olio: A Semantic Search Interface for Data Repositories. In UIST'23","author":"Setlur Vidya","year":"2023","unstructured":"Vidya Setlur, Andriy Kanyuka, and Arjun Srinivasan. 2023. Olio: A Semantic Search Interface for Data Repositories. In UIST'23. 1--16."},{"key":"e_1_3_2_1_23_1","volume-title":"Solo: Data Discovery Using Natural Language Questions Via A Self-Supervised Approach. In SIGMOD.","author":"Wang Qiming","year":"2023","unstructured":"Qiming Wang and Raul Castro Fernandez. 2023. Solo: Data Discovery Using Natural Language Questions Via A Self-Supervised Approach. In SIGMOD."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3299869.3300065"}],"event":{"name":"HILDA 24: 2024 Workshop on Human-In-the-Loop Data Analytics","location":"Santiago AA Chile","acronym":"HILDA 24","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 2024 Workshop on Human-In-the-Loop Data Analytics"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3665939.3665959","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3665939.3665959","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:44:28Z","timestamp":1750290268000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3665939.3665959"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,14]]},"references-count":24,"alternative-id":["10.1145\/3665939.3665959","10.1145\/3665939"],"URL":"https:\/\/doi.org\/10.1145\/3665939.3665959","relation":{},"subject":[],"published":{"date-parts":[[2024,6,14]]},"assertion":[{"value":"2024-06-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}