{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T12:07:18Z","timestamp":1781093238062,"version":"3.54.1"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031938573","type":"print"},{"value":"9783031938580","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,7,30]],"date-time":"2025-07-30T00:00:00Z","timestamp":1753833600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,30]],"date-time":"2025-07-30T00:00:00Z","timestamp":1753833600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-031-93858-0_9","type":"book-chapter","created":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T05:06:14Z","timestamp":1753765574000},"page":"134-147","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Evaluation Considerations of Synthetic Natural Language Datasets for Question Answering Applications"],"prefix":"10.1007","author":[{"given":"Chris","family":"Van Buren","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaotong","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jieyu","family":"Lin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sachin Gopal","family":"Wani","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ajay","family":"Dholakia","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"David","family":"Ellison","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,7,30]]},"reference":[{"key":"9_CR1","unstructured":"Schler, J., Koppel, M., Argamon, S., Pennebaker, J.: Effects of age and gender on blogging. In: Proceedings of 2006 AAAI Spring Symposium on Computational Approaches for Analyzing Weblogs (2006). https:\/\/u.cs.biu.ac.il\/~schlerj\/schler_springsymp06.pdf"},{"key":"9_CR2","doi-asserted-by":"crossref","unstructured":"Zhu, Y., et al.: Aligning books and movies: towards story-like visual explanations by watching movies and reading books. arXiv:1506.06724 (2015). https:\/\/arxiv.org\/abs\/1506.06724","DOI":"10.1109\/ICCV.2015.11"},{"key":"9_CR3","unstructured":"Zhang, T., et al.: RAFT: adapting language model to domain specific RAG. arXiv:2403.10131 (2024). https:\/\/arxiv.org\/abs\/2403.10131"},{"key":"9_CR4","unstructured":"Lin, X., et al.: Data-efficient fine-tuning for LLM-based recommendation. arXiv:2401.17197 (2024). https:\/\/arxiv.org\/abs\/2401.17197"},{"key":"9_CR5","unstructured":"Vaswani, A., et al.: Attention is all you need. arXiv:1706.03762 (2017). https:\/\/arxiv.org\/abs\/1706.03762"},{"key":"9_CR6","unstructured":"Minaee, S., et al.: Large language models: a survey. arXiv:2402.06196 (2024). https:\/\/arxiv.org\/abs\/2402.06196"},{"key":"9_CR7","doi-asserted-by":"crossref","unstructured":"Samsi, S., et al.: From words to watts: benchmarking the energy costs of large language model inference. arXiv:2310.03003 (2023). https:\/\/arxiv.org\/abs\/2310.03003","DOI":"10.1109\/HPEC58863.2023.10363447"},{"key":"9_CR8","unstructured":"Izacard, G., et al.: Atlas: few-shot learning with retrieval augmented language models. J. Mach. Learn. Res. 24(251), 1\u201343 (2023). https:\/\/jmlr.org\/papers\/v24\/23-0037.html"},{"key":"9_CR9","unstructured":"Lewis, P., et al.: Retrieval-augmented generation for knowledge-intensive NLP tasks. arXiv:2005.11401 (2020). https:\/\/arxiv.org\/abs\/2005.11401"},{"key":"9_CR10","unstructured":"Gao, Y., et al.: Retrieval-augmented generation for large language models: a survey. arXiv:2312.10997 (2023). https:\/\/arxiv.org\/abs\/2312.10997"},{"key":"9_CR11","doi-asserted-by":"crossref","unstructured":"Soudani, H., Kanoulas, E., Hasibi, F.: Fine tuning vs. retrieval augmented generation for less popular knowledge. arXiv:2403.01432 (2024). https:\/\/arxiv.org\/abs\/2403.01432","DOI":"10.1145\/3673791.3698415"},{"key":"9_CR12","unstructured":"Lin, X.V., et al.: RA-DIT: retrieval-augmented dual instruction tuning. arXiv:2310.01352 (2023). https:\/\/arxiv.org\/abs\/2310.01352"},{"key":"9_CR13","unstructured":"Xu, P., et al.: Retrieval meets long context large language models. arXiv:2310.03025 (2024). https:\/\/arxiv.org\/abs\/2310.03025"},{"key":"9_CR14","unstructured":"Wang, B., et al.: InstructRetro: instruction tuning post retrieval-augmented pretraining. arXiv:2310.07713 (2023). https:\/\/arxiv.org\/abs\/2310.07713"},{"key":"9_CR15","unstructured":"Zhang, Y., et al.: DatasetGAN: efficient labeled data factory with minimal human effort. arXiv:2401.10225 (2024). https:\/\/arxiv.org\/abs\/2401.10225"},{"key":"9_CR16","doi-asserted-by":"crossref","unstructured":"Li, Z., Zhu, H., Lu, Z., Yin, M.: Synthetic data generation with large language models for text classification: potential and limitations. arXiv:2310.07849 (2023). https:\/\/arxiv.org\/abs\/2310.07849","DOI":"10.18653\/v1\/2023.emnlp-main.647"},{"key":"9_CR17","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. arXiv:1812.04948 (2018). https:\/\/arxiv.org\/abs\/1812.04948"},{"key":"9_CR18","unstructured":"Zhang, Y., et al.: GLIDE: towards photorealistic image generation and editing with text-guided diffusion models. arXiv:2112.10741 (2021). https:\/\/arxiv.org\/abs\/2112.10741"},{"key":"9_CR19","unstructured":"He, R., et al.: Is synthetic data from generative models ready for image recognition?. arXiv:2210.07574 (2022). https:\/\/arxiv.org\/abs\/2210.07574"},{"key":"9_CR20","doi-asserted-by":"publisher","unstructured":"Besnier, V., Jain, H., Bursuc, A., Cord, M., P\u00e9rez, P.: This dataset does not exist: training models from generated images. In: ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 1\u20135. IEEE, Barcelona, Spain (2020). https:\/\/doi.org\/10.1109\/ICASSP40776.2020.9053146","DOI":"10.1109\/ICASSP40776.2020.9053146"},{"key":"9_CR21","unstructured":"Wang, Z., Yu, A., Firat, O., Cao, Y.: Towards Zero-Label Language Learning. arXiv:2109.09193 (2021). https:\/\/arxiv.org\/abs\/2109.09193"},{"key":"9_CR22","doi-asserted-by":"crossref","unstructured":"Ye, J., et al.: ZeroGen: efficient zero-shot learning via dataset generation. arXiv:2202.07922 (2022). https:\/\/arxiv.org\/abs\/2202.07922","DOI":"10.18653\/v1\/2022.emnlp-main.801"},{"key":"9_CR23","doi-asserted-by":"crossref","unstructured":"Kumar, V., Choudhary, A., Cho, E.: Data augmentation using pre-trained transformer models. arXiv:2003.02245 (2020). https:\/\/arxiv.org\/abs\/2003.02245","DOI":"10.18653\/v1\/2020.lifelongnlp-1.3"},{"key":"9_CR24","doi-asserted-by":"crossref","unstructured":"Chung, J.J.Y., Kamar, E., Rosenbaum, A., Kim, S., Ray, D., Amershi, S.: increasing diversity while maintaining accuracy: text data generation with large language models and human interventions. arXiv:2306.04140 (2023). https:\/\/arxiv.org\/abs\/2306.04140","DOI":"10.18653\/v1\/2023.acl-long.34"},{"key":"9_CR25","unstructured":"Gupta, H., et al.: TarGEN: targeted data generation with large language models. arXiv:2310.17876 (2023). https:\/\/arxiv.org\/abs\/2310.17876"},{"key":"9_CR26","unstructured":"Xu, J., Wu, H., Wang, J., Long, M.: Anomaly transformer: time series anomaly detection with association discrepancy (2022). https:\/\/openreview.net\/forum?id=aDC4benbIwL"},{"key":"9_CR27","unstructured":"Chen, M., et al.: Weakly supervised data augmentation through prompting for dialogue understanding. arXiv:2210.14169 (2022). https:\/\/arxiv.org\/abs\/2210.14169"},{"key":"9_CR28","unstructured":"Veselovsky, V., Ribeiro, M.H., Arora, A., Josifoski, M., Anderson, A., West, R.: Generating faithful synthetic data with large language models: a case study in computational social science. arXiv:2305.15041 (2023). https:\/\/arxiv.org\/abs\/2305.15041"},{"key":"9_CR29","unstructured":"Guo, X., Chen, Y.: Generative AI for synthetic data generation: methods, challenges and the future. arXiv:2403.04190 (2024). https:\/\/arxiv.org\/abs\/2403.04190"},{"key":"9_CR30","unstructured":"Layeghy, S., Gallagher, M., Portmann, M.: Benchmarking the benchmark \u2013 analysis of synthetic NIDS datasets. arXiv:2104.09029 (2021). https:\/\/arxiv.org\/abs\/2104.09029"},{"key":"9_CR31","doi-asserted-by":"crossref","unstructured":"Singh, K., Navaratnam, T., Holmer, J., Schaub-Meyer, S., Roth, S.: Is synthetic data all we need? benchmarking the robustness of models trained with synthetic images. arXiv:2405.20469 (2024). https:\/\/arxiv.org\/abs\/2405.20469","DOI":"10.1109\/CVPRW63382.2024.00257"},{"key":"9_CR32","unstructured":"Jin, Q., et al.: A multifaceted benchmarking of synthetic electronic health record generation models. arXiv:2208.01230 (2022). https:\/\/arxiv.org\/abs\/2208.01230"},{"key":"9_CR33","doi-asserted-by":"crossref","unstructured":"Yang, Z., et al.: HotpotQA: a dataset for diverse, explainable multi-hop question answering. arXiv:1809.09600 (2018). https:\/\/arxiv.org\/abs\/1809.09600","DOI":"10.18653\/v1\/D18-1259"},{"key":"9_CR34","unstructured":"Jin, Q., Dhingra, B., Liu, Z., Cohen, W.W., Lu, X.: PubMedQA: a dataset for biomedical research question answering. arXiv:1909.06146 (2019). https:\/\/arxiv.org\/abs\/1909.06146"},{"key":"9_CR35","unstructured":"Hu, E., et al.: LoRA: low-rank adaptation of large language models. arXiv:2106.09685 (2021). https:\/\/arxiv.org\/abs\/2106.09685"},{"key":"9_CR36","doi-asserted-by":"crossref","unstructured":"Hodak, M., Ellison, D., Van Buren, C., Jiang, X., Dholakia, A.: Benchmarking large language models: opportunities and challenges. In: TPC Technology Conference 2023, Vancouver, BC, Canada (2023)","DOI":"10.1007\/978-3-031-68031-1_6"},{"key":"9_CR37","unstructured":"Hendrycks, D., et al.: Measuring massive multitask language understanding. arXiv:2009.03300 (2021). https:\/\/arxiv.org\/abs\/2009.03300"},{"key":"9_CR38","unstructured":"Wei, J., et al.: Chain-of-thought prompting elicits reasoning in large language models. arXiv:2201.11903 (2023). https:\/\/arxiv.org\/abs\/2201.11903"},{"key":"9_CR39","unstructured":"Patil, L.: RAFT [Source Code] (2024). https:\/\/github.com\/ShishirPatil\/gorilla\/tree\/main\/raft"}],"container-title":["Lecture Notes in Computer Science","Performance Evaluation and Benchmarking"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-93858-0_9","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T11:43:18Z","timestamp":1781091798000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-93858-0_9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,30]]},"ISBN":["9783031938573","9783031938580"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-93858-0_9","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7,30]]},"assertion":[{"value":"30 July 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TPCTC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Technology Conference on Performance Evaluation and Benchmarking","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Guangzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tpctc2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.tpc.org\/tpctc\/tpctc2024\/default5.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}