{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T06:58:04Z","timestamp":1780383484231,"version":"3.54.1"},"reference-count":32,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,12,15]],"date-time":"2023-12-15T00:00:00Z","timestamp":1702598400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,15]],"date-time":"2023-12-15T00:00:00Z","timestamp":1702598400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,12,15]]},"DOI":"10.1109\/bigdata59044.2023.10386931","type":"proceedings-article","created":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T18:28:47Z","timestamp":1705948127000},"page":"1824-1834","source":"Crossref","is-referenced-by-count":16,"title":["Automatic Data Transformation Using Large Language Model - An Experimental Study on Building Energy Data"],"prefix":"10.1109","author":[{"given":"Ankita","family":"Sharma","sequence":"first","affiliation":[{"name":"Lawrence Berkeley National Lab"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xuanmao","family":"Li","sequence":"additional","affiliation":[{"name":"Arizona State University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hong","family":"Guan","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Lab"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Guoxin","family":"Sun","sequence":"additional","affiliation":[{"name":"Arizona State University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Liang","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Arizona"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lanjun","family":"Wang","sequence":"additional","affiliation":[{"name":"Tianjin University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kesheng","family":"Wu","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Lab"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lei","family":"Cao","sequence":"additional","affiliation":[{"name":"University of Arizona"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Erkang","family":"Zhu","sequence":"additional","affiliation":[{"name":"Microsoft"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Alexander","family":"Sim","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Lab"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Teresa","family":"Wu","sequence":"additional","affiliation":[{"name":"Arizona State University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jia","family":"Zou","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Lab"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"U.S. energy consumption by source and sector, 2022"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.adapen.2022.100084"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.14778\/3407790.3407831"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.14778\/3476249.3476303"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.14778\/3611479.3611534"},{"key":"ref6","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv preprint arXiv:1810.04805"},{"key":"ref7","article-title":"Survive the schema changes: integration of unmanaged data using deep learning","author":"Wang","year":"2020","journal-title":"arXiv preprint arXiv:2010.07586"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.14778\/3551793.3551841"},{"key":"ref9","article-title":"Interleaving pre-trained language models and large language models for zero-shot nl2sql generation","author":"Gu","year":"2023","journal-title":"arXiv preprint arXiv:2306.08891"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3589292"},{"key":"ref11","first-page":"1593","article-title":"Addressing limitations of encoder-decoder based approach to text-to-sql","volume-title":"Proceedings of the 29th International Conference on Computational Linguistics","author":"Popescu"},{"key":"ref12","article-title":"Evaluating large language models trained on code","author":"Chen","year":"2021","journal-title":"arXiv preprint arXiv:2107.03374"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.14778\/3587136.3587146"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE51399.2021.00046"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.14778\/2777598.2777601"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1425"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.167"},{"key":"ref18","article-title":"Dataset and enhanced model for eligibility criteria-to-sql semantic parsing","volume-title":"12th International Conference on Language Resources and Evaluation (LREC)","author":"Yu"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.176"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE.2016.7498319"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/1925844.1926423"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.14778\/3231751.3231766"},{"key":"ref23","article-title":"Predictive interaction for data transformation","volume-title":"CIDR","author":"Heer"},{"key":"ref24","article-title":"Clx: Towards verifiable pbe data transformation","author":"Jin","year":"2018","journal-title":"arXiv preprint arXiv:1803.00701"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.14778\/2977797.2977807"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3035918.3064034"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.14778\/3115404.3115409"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3517908"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1080\/00987913.2019.1644891"},{"key":"ref30","article-title":"Trifacta wrangler","year":"2020"},{"key":"ref31","article-title":"Covid-19 data repository by the center for systems science and engineering (csse) at johns hopkins university"},{"key":"ref32","article-title":"Codex models and azure openai service"}],"event":{"name":"2023 IEEE International Conference on Big Data (BigData)","location":"Sorrento, Italy","start":{"date-parts":[[2023,12,15]]},"end":{"date-parts":[[2023,12,18]]}},"container-title":["2023 IEEE International Conference on Big Data (BigData)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10385234\/10386078\/10386931.pdf?arnumber=10386931","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,1]],"date-time":"2024-02-01T01:48:32Z","timestamp":1706752112000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10386931\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,15]]},"references-count":32,"URL":"https:\/\/doi.org\/10.1109\/bigdata59044.2023.10386931","relation":{},"subject":[],"published":{"date-parts":[[2023,12,15]]}}}