{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T20:02:55Z","timestamp":1779825775241,"version":"3.53.1"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","funder":[{"name":"NSF &#x28;National Science Foundation&#x29;","award":["2345794"],"award-info":[{"award-number":["2345794"]}]},{"name":"Florida Department of Health","award":["25C06"],"award-info":[{"award-number":["25C06"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,5,31]]},"DOI":"10.1145\/3788853.3801601","type":"proceedings-article","created":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T19:14:47Z","timestamp":1779822887000},"page":"54-57","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Hemolix.Extract.V: LLM-based Information Extraction for Documents with AI-based Plan Selection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9679-8947","authenticated-orcid":false,"given":"Todor","family":"Ivanov","sequence":"first","affiliation":[{"name":"Florida State University, Tallahassee, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6911-1815","authenticated-orcid":false,"given":"Gyanendra","family":"Shrestha","sequence":"additional","affiliation":[{"name":"Florida State University, Tallahassee, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0189-2733","authenticated-orcid":false,"given":"Karthik","family":"Vemireddy","sequence":"additional","affiliation":[{"name":"Florida State University, Tallahassee, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8432-6179","authenticated-orcid":false,"given":"Anna","family":"Pyayt","sequence":"additional","affiliation":[{"name":"University of South Florida, Tampa, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1354-1215","authenticated-orcid":false,"given":"Michael","family":"Gubanov","sequence":"additional","affiliation":[{"name":"Florida State University, Tallahassee, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,5,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"CIDR","author":"Anderson E.","year":"2025","unstructured":"E. Anderson et al. The design of an llm-powered unstructured analytics system. In CIDR, Amsterdam, Jan. 19-22, 2025, 2025."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.findings-emnlp.503"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.14778\/3626292.3626294"},{"key":"e_1_3_2_1_4_1","volume-title":"Proc. VLDB Endow.","author":"Chen Z.","year":"2023","unstructured":"Z. Chen et al. Lingua manga: A generic large language model centric system for data curation. Proc. VLDB Endow., 2023."},{"key":"e_1_3_2_1_5_1","volume-title":"EMNLP","author":"Deng Z.","year":"2024","unstructured":"Z. Deng et al. Text-tuple-table: Towards information integration in text-to-table generation via global tuple extraction. In EMNLP, 2024."},{"key":"e_1_3_2_1_6_1","volume-title":"NAACL-HLT","author":"Devlin J.","year":"2019","unstructured":"J. Devlin et al. BERT: pre-training of deep bidirectional transformers for language understanding. In NAACL-HLT 2019."},{"key":"e_1_3_2_1_7_1","volume-title":"ACM TOIS","author":"Huang L.","year":"2023","unstructured":"L. Huang et al. A survey on hallucination in large language models: Principles, taxonomy, challenges, and open questions. ACM TOIS, 2023."},{"key":"e_1_3_2_1_8_1","volume-title":"Autorag: Automated framework for optimization of retrieval augmented generation pipeline. arXiv:2410.20878","author":"Kim D.","year":"2024","unstructured":"D. Kim et al. Autorag: Automated framework for optimization of retrieval augmented generation pipeline. arXiv:2410.20878, 2024."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/btz682"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"e_1_3_2_1_11_1","volume-title":"ACL","author":"Li T.","year":"2023","unstructured":"T. Li et al. A sequence-to-sequence&set model for text-to-table generation. In ACL, 2023."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE65448.2025.00183"},{"key":"e_1_3_2_1_13_1","volume-title":"SIGMOD Demo","author":"Liu C.","year":"2025","unstructured":"C. Liu et al. Palimpchat: Declarative and interactive ai analytics. In SIGMOD Demo, 2025."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.14778\/3749646.3749685"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.eacl-long.151"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.14778\/3746405.3746426"},{"key":"e_1_3_2_1_17_1","volume-title":"EDBT","author":"Shrestha G.","year":"2025","unstructured":"G. Shrestha et al. Tabular embeddings for tables with bi-dimensional hierarchical metadata and nesting. In EDBT, 2025."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2017.8258558"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3184558.3191600"},{"key":"e_1_3_2_1_20_1","unstructured":"spacy. https:\/\/spacy.io\/."},{"key":"e_1_3_2_1_21_1","volume-title":"Gpt-ner: Named entity recognition via large language models. arXiv:2304.10428","author":"Wang S.","year":"2023","unstructured":"S. Wang et al. Gpt-ner: Named entity recognition via large language models. arXiv:2304.10428, 2023."},{"key":"e_1_3_2_1_22_1","volume-title":"ACL","author":"Wu X.","year":"2022","unstructured":"X. Wu et al. Text-to-table: A new way of information extraction. In ACL 2022."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3722212.3725103"}],"event":{"name":"SIGMOD\/PODS '26: International Conference on Management of Data","location":"Bengaluru India","sponsor":["SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Companion of the International Conference on Management of Data"],"original-title":[],"deposited":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T19:17:54Z","timestamp":1779823074000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3788853.3801601"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,5,30]]},"references-count":23,"alternative-id":["10.1145\/3788853.3801601","10.1145\/3788853"],"URL":"https:\/\/doi.org\/10.1145\/3788853.3801601","relation":{},"subject":[],"published":{"date-parts":[[2026,5,30]]},"assertion":[{"value":"2026-05-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}