{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T20:55:56Z","timestamp":1776113756960,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2348121"],"award-info":[{"award-number":["2348121"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3679157","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:11Z","timestamp":1729452851000},"page":"5365-5369","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["A Generative Benchmark Creation Framework for Detecting Common Data Table Versions"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-7622-7196","authenticated-orcid":false,"given":"Daniel C.","family":"Fox","sequence":"first","affiliation":[{"name":"Worcester Polytechnic Institute, Worcester, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5720-1207","authenticated-orcid":false,"given":"Aamod","family":"Khatiwada","sequence":"additional","affiliation":[{"name":"Northeastern University, Boston, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8803-8481","authenticated-orcid":false,"given":"Roee","family":"Shraga","sequence":"additional","affiliation":[{"name":"Worcester Polytechnic Institute, Worcester, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2024. https:\/\/nnsight.net\/"},{"key":"e_1_3_2_1_2_1","unstructured":"2024. https:\/\/llama.meta.com\/llama3\/"},{"key":"e_1_3_2_1_3_1","unstructured":"2024. https:\/\/openai.com\/index\/hello-gpt-4o\/"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.14778\/3626292.3626294"},{"key":"e_1_3_2_1_5_1","volume-title":"Parameswaran","author":"Bhardwaj Anant P.","year":"2015","unstructured":"Anant P. Bhardwaj, Souvik Bhattacherjee, Amit Chavan, Amol Deshpande, Aaron J. Elmore, Samuel Madden, and Aditya G. Parameswaran. 2015. DataHub: Collaborative Data Science & Dataset Version Management at Scale. In CIDR."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.14778\/2824032.2824035"},{"key":"e_1_3_2_1_7_1","volume-title":"Reng Chiz Der, and Christian Bizer","author":"Brinkmann Alexander","year":"2023","unstructured":"Alexander Brinkmann, Roee Shraga, Reng Chiz Der, and Christian Bizer. 2023. Product Information Extraction using ChatGPT. arXiv preprint arXiv:2306.14921 (2023)."},{"key":"e_1_3_2_1_8_1","unstructured":"Boris Glavic Giansalvatore Mecca Ren\u00e9e J. Miller Paolo Papotti Donatello Santoro and Enzo Veltri. 2024. Similarity Measures For Incomplete Database Instances. In EDBT. 461--473."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2882903.2899389"},{"key":"e_1_3_2_1_10_1","volume-title":"Government's open data","author":"The","year":"2020","unstructured":"The home of the U.S. Government's open data. 2020. https:\/\/data.gov\/"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"key":"e_1_3_2_1_12_1","unstructured":"Albert Q. Jiang Alexandre Sablayrolles Arthur Mensch Chris Bamford Devendra Singh Chaplot Diego de las Casas Florian Bressand Gianna Lengyel Guillaume Lample Lucile Saulnier L\u00e9lio Renard Lavaud Marie-Anne Lachaux Pierre Stock Teven Le Scao Thibaut Lavril Thomas Wang Timoth\u00e9e Lacroix and William El Sayed. 2023. Mistral 7B. Technical Report."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3588689"},{"key":"e_1_3_2_1_14_1","volume-title":"Joint Proceedings of Workshops at the 49th International Conference on Very Large Data Bases (VLDB 2023), Vancouver, Canada, August 28 - September 1, 2023 (CEUR Workshop Proceedings","volume":"1","author":"Korini Keti","year":"2023","unstructured":"Keti Korini and Christian Bizer. 2023. Column Type Annotation using ChatGPT. In Joint Proceedings of Workshops at the 49th International Conference on Very Large Data Bases (VLDB 2023), Vancouver, Canada, August 28 - September 1, 2023 (CEUR Workshop Proceedings, Vol. 3462). CEUR-WS.org. https:\/\/ceur-ws.org\/Vol- 3462\/TADA1.pdf"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3560815"},{"key":"e_1_3_2_1_16_1","volume-title":"DataLore: Can a large language model find all lost scrolls in a data repository?","author":"Lou Yuze","year":"2024","unstructured":"Yuze Lou, Chuan Lei, Xiao Qin, Zichen Wang, Christos Faloutsos, Rishita Anubhai, and Huzefa Rangwala. 2024. DataLore: Can a large language model find all lost scrolls in a data repository? (2024)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.14778\/3352063.3352116"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.14778\/3192965.3192973"},{"key":"e_1_3_2_1_19_1","volume-title":"Proceedings of the VLDB Endowment (Aug.","author":"Pal Koyena","year":"2023","unstructured":"Koyena Pal, Aamod Khatiwada, Roee Shraga, and Renee J. Miller. 2023. Generative Benchmark Creation for Table Union Search. Proceedings of the VLDB Endowment (Aug. 2023)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.14778\/3231751.3231757"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978--3-030--27615--7_23"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3448016.3452767"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.14778\/3476249.3476303"},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"12706","author":"Zhao Zihao","year":"2021","unstructured":"Zihao Zhao, Eric Wallace, Shi Feng, Dan Klein, and Sameer Singh. 2021. Calibrate Before Use: Improving Few-shot Performance of Language Models. In Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 12697--12706. https:\/\/proceedings.mlr.press\/v139\/zhao21c.html"}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","location":"Boise ID USA","acronym":"CIKM '24","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"]},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679157","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3679157","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:03:28Z","timestamp":1750291408000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3679157"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":25,"alternative-id":["10.1145\/3627673.3679157","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3679157","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}