{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T12:10:08Z","timestamp":1750421408888,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":44,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,23]]},"DOI":"10.1145\/3733723.3733731","type":"proceedings-article","created":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T08:49:08Z","timestamp":1750409348000},"page":"1-10","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["ICEAGE: Intelligent Contextual Exploration and Answer Generation Engine for Scientific Data Discovery"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2142-1731","authenticated-orcid":false,"given":"Chenxu","family":"Niu","sequence":"first","affiliation":[{"name":"Texas Tech University, Lubbock, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6921-4926","authenticated-orcid":false,"given":"Wei","family":"Zhang","sequence":"additional","affiliation":[{"name":"Lawrence Berkeley National Laboratory, Columbus, OH, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0815-4763","authenticated-orcid":false,"given":"Mert","family":"Side","sequence":"additional","affiliation":[{"name":"Computer Science, Texas Tech University, Lubbock, TX, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9961-9051","authenticated-orcid":false,"given":"Yong","family":"Chen","sequence":"additional","affiliation":[{"name":"Texas Tech University, Lubbock, TX, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,6,22]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"[n.d.]. National Snow and Ice Data Center a part of CIRES at the University of Colorado Boulder. https:\/\/nsidc.org\/."},{"key":"e_1_3_3_1_3_2","unstructured":"Ebtesam Almazrouei Hamza Alobeidli Abdulaziz Alshamsi Alessandro Cappelli Ruxandra Cojocaru M\u00e9rouane Debbah \u00c9tienne Goffinet Daniel Hesslow Julien Launay Quentin Malartic et\u00a0al. 2023. The falcon series of open language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.16867 (2023)."},{"key":"e_1_3_3_1_4_2","unstructured":"Francesc Alted Martin Durant Stephan Hoyer John Kirkham Alistair Miles Mamy Ratsimbazafy Matthew Rocklin Vincent Schut Anthony Scopatz and Prakhar Goel. 2018. Zarr. https:\/\/zarr.readthedocs.io."},{"key":"e_1_3_3_1_5_2","unstructured":"Tom Brown Benjamin Mann Nick Ryder Melanie Subbiah Jared\u00a0D Kaplan Prafulla Dhariwal Arvind Neelakantan Pranav Shyam Girish Sastry Amanda Askell et\u00a0al. 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020) 1877\u20131901."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","unstructured":"R. Brun and F. Rademakers. 1997. ROOT: An Object Oriented Data Analysis Framework. Nucl. Instrum. Meth. A389 (1997) 81\u201386. 10.1016\/S0168-9002(97)00048-X","DOI":"10.1016\/S0168-9002(97)00048-X"},{"key":"e_1_3_3_1_7_2","volume-title":"LangChain","author":"Chase Harrison","year":"2022","unstructured":"Harrison Chase. 2022. LangChain. https:\/\/github.com\/langchain-ai\/langchain"},{"key":"e_1_3_3_1_8_2","unstructured":"Aakanksha Chowdhery Sharan Narang Jacob Devlin Maarten Bosma Gaurav Mishra Adam Roberts Paul Barham Hyung\u00a0Won Chung Charles Sutton Sebastian Gehrmann et\u00a0al. 2023. Palm: Scaling language modeling with pathways. Journal of Machine Learning Research 24 240 (2023) 1\u2013113."},{"key":"e_1_3_3_1_9_2","unstructured":"Tull Craig\u00a0E. Essiari Abdelilah Gunter Dan et\u00a0al. 2013. SPOT Suite. http:\/\/spot.nersc.gov\/."},{"key":"e_1_3_3_1_10_2","first-page":"4171","volume-title":"Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers)","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. Bert: Pre-training of deep bidirectional transformers for language understanding. In Proceedings of the 2019 conference of the North American chapter of the association for computational linguistics: human language technologies, volume 1 (long and short papers). 4171\u20134186."},{"key":"e_1_3_3_1_11_2","first-page":"5","volume-title":"Proceedings of supercomputing","volume":"99","author":"Folk Mike","year":"1999","unstructured":"Mike Folk, Albert Cheng, and Kim Yates. 1999. HDF5: A File Format and I\/O Library for High Performance Computing Applications. In Proceedings of supercomputing , Vol.\u00a099. 5\u201333."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"P Greenfield M Droettboom and E Bray. 2015. ASDF: A New Data Format for Astronomy. Astronomy and Computing 12 (2015) 240\u2013251.","DOI":"10.1016\/j.ascom.2015.06.004"},{"key":"e_1_3_3_1_13_2","unstructured":"The\u00a0HDF Group. 2018. The HDF Group. https:\/\/www.hdfgroup.org."},{"key":"e_1_3_3_1_14_2","unstructured":"Joint\u00a0Genome Institute. 2013. The JGI Archive and Metadata Organizer(JAMO). http:\/\/cs.lbl.gov\/news-media\/news\/2013\/new-metadata-organizer-streamlines-jgi-data-management."},{"key":"e_1_3_3_1_15_2","unstructured":"Albert\u00a0Q Jiang Alexandre Sablayrolles Antoine Roux Arthur Mensch Blanche Savary Chris Bamford Devendra\u00a0Singh Chaplot Diego de\u00a0las Casas Emma\u00a0Bou Hanna Florian Bressand et\u00a0al. 2024. Mixtral of experts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2401.04088 (2024)."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Jeff Johnson Matthijs Douze and Herv\u00e9 J\u00e9gou. 2019. Billion-scale similarity search with GPUs. IEEE Transactions on Big Data 7 3 (2019) 535\u2013547.","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Daniel Korenblum Daniel\u00a0L. Rubin Sandy Napel Cesar Rodriguez and Christopher\u00a0F. Beaulieu. 2011. Managing Biomedical Image Metadata for Search and Retrieval of Similar Images. J. Digital Imaging 24 4 (2011) 739\u2013748.","DOI":"10.1007\/s10278-010-9328-z"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/PDSW-DISCS.2018.00004"},{"key":"e_1_3_3_1_19_2","unstructured":"M Lewis. 2019. Bart: Denoising sequence-to-sequence pre-training for natural language generation translation and comprehension. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1910.13461 (2019)."},{"key":"e_1_3_3_1_20_2","unstructured":"Patrick Lewis Ethan Perez Aleksandra Piktus Fabio Petroni Vladimir Karpukhin Naman Goyal Heinrich K\u00fcttler Mike Lewis Wen-tau Yih Tim Rockt\u00e4schel et\u00a0al. 2020. Retrieval-augmented generation for knowledge-intensive nlp tasks. Advances in Neural Information Processing Systems 33 (2020) 9459\u20139474."},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.1234"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"publisher","DOI":"10.1145\/1383529.1383533"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Yu\u00a0A Malkov and Dmitry\u00a0A Yashunin. 2018. Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs. IEEE transactions on pattern analysis and machine intelligence 42 4 (2018) 824\u2013836.","DOI":"10.1109\/TPAMI.2018.2889473"},{"key":"e_1_3_3_1_24_2","unstructured":"Tomas Mikolov Kai Chen Greg Corrado and Jeffrey Dean. 2013. Efficient Estimation of Word Representations in Vector Space. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1301.3781 (2013)."},{"key":"e_1_3_3_1_25_2","unstructured":"MongoDB. 2018. MongoDB. https:\/\/www.mongodb.com."},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC55821.2022.9926389"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1109\/BigData59044.2023.10386205"},{"key":"e_1_3_3_1_28_2","unstructured":"OpenAI. 2023. ."},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"e_1_3_3_1_30_2","unstructured":"PostgreSQL. 2018. PostgreSQL. https:\/\/www.postgresql.org."},{"key":"e_1_3_3_1_31_2","unstructured":"Colin Raffel Noam Shazeer Adam Roberts Katherine Lee Sharan Narang Michael Matena Yanqi Zhou Wei Li and Peter\u00a0J Liu. 2020. Exploring the limits of transfer learning with a unified text-to-text transformer. Journal of machine learning research 21 140 (2020) 1\u201367."},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"crossref","unstructured":"Russ Rew and Glenn Davis. 1990. NetCDF: An Interface For Scientific Data Access. IEEE computer graphics and applications 10 4 (1990) 76\u201382.","DOI":"10.1109\/38.56302"},{"key":"e_1_3_3_1_33_2","unstructured":"Rob Latham Rob Ross Rajeev Thakur Kui Gao Alok Choudhary Wei-keng Liao Jianwei Li and Bill Gropp. 2010. Parallel NetCDF. http:\/\/trac.mcs.anl.gov\/projects\/parallel-netcdf."},{"key":"e_1_3_3_1_34_2","unstructured":"Baptiste Roziere Jonas Gehring Fabian Gloeckle Sten Sootla Itai Gat Xiaoqing\u00a0Ellen Tan Yossi Adi Jingyu Liu Romain Sauvestre Tal Remez et\u00a0al. 2023. Code llama: Open foundation models for code. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.12950 (2023)."},{"key":"e_1_3_3_1_35_2","unstructured":"S Saalfeld. 2017. N5: Not HDF5. https:\/\/github.com\/saalfeldlab\/n5."},{"key":"e_1_3_3_1_36_2","unstructured":"sqlite.org. 2017. SQLite. https:\/\/sqlite.org."},{"key":"e_1_3_3_1_37_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER.2017.53"},{"key":"e_1_3_3_1_38_2","unstructured":"Hugo Touvron Thibaut Lavril Gautier Izacard Xavier Martinet Marie-Anne Lachaux Timoth\u00e9e Lacroix Baptiste Rozi\u00e8re Naman Goyal Eric Hambro Faisal Azhar et\u00a0al. 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2302.13971 (2023)."},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"crossref","unstructured":"Immanuel Trummer. 2022. CodexDB: Synthesizing code for query processing from natural language instructions using GPT-3 Codex. Proceedings of the VLDB Endowment 15 11 (2022) 2921\u20132928.","DOI":"10.14778\/3551793.3551841"},{"key":"e_1_3_3_1_40_2","unstructured":"A Vaswani. 2017. Attention is all you need. Advances in Neural Information Processing Systems (2017)."},{"key":"e_1_3_3_1_41_2","unstructured":"Xiaojun Xu Chang Liu and Dawn Song. 2017. Sqlnet: Generating structured queries from natural language without reinforcement learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1711.04436 (2017)."},{"key":"e_1_3_3_1_42_2","unstructured":"Susan Zhang Stephen Roller Naman Goyal Mikel Artetxe Moya Chen Shuohui Chen Christopher Dewan Mona Diab Xian Li Xi\u00a0Victoria Lin et\u00a0al. 2022. Opt: Open pre-trained transformer language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2205.01068 (2022)."},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1145\/3295500.3356146"},{"key":"e_1_3_3_1_44_2","unstructured":"Wei Zhang Houjun Tang and Suren Byna. [n.d.]. IDIOMS: Index-powered Distributed Object-centric Metadata Search for Scientific Data Management. ([n. d.])."},{"key":"e_1_3_3_1_45_2","unstructured":"Victor Zhong Caiming Xiong and Richard Socher. 2017. Seq2sql: Generating structured queries from natural language using reinforcement learning. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1709.00103 (2017)."}],"event":{"name":"SSDBM 2025: 37th International Conference on Scalable Scientific Data Management","location":"Columbus USA","acronym":"SSDBM 2025"},"container-title":["Proceedings of the 37th International Conference on Scalable Scientific Data Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3733723.3733731","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,20]],"date-time":"2025-06-20T11:37:53Z","timestamp":1750419473000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3733723.3733731"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,22]]},"references-count":44,"alternative-id":["10.1145\/3733723.3733731","10.1145\/3733723"],"URL":"https:\/\/doi.org\/10.1145\/3733723.3733731","relation":{},"subject":[],"published":{"date-parts":[[2025,6,22]]},"assertion":[{"value":"2025-06-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}