{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,25]],"date-time":"2026-01-25T17:10:43Z","timestamp":1769361043092,"version":"3.49.0"},"reference-count":117,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,8,20]],"date-time":"2025-08-20T00:00:00Z","timestamp":1755648000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,8,20]],"date-time":"2025-08-20T00:00:00Z","timestamp":1755648000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"name":"The Science and technology innovation project of\u00a0Chinese Academy of Agricultural Sciences","award":["No. CAAS-ASTIP-2025-AII"],"award-info":[{"award-number":["No. CAAS-ASTIP-2025-AII"]}]},{"name":"The Science and technology innovation project of\u00a0Chinese Academy of Agricultural Sciences","award":["No. CAAS-ASTIP-2025-AII"],"award-info":[{"award-number":["No. CAAS-ASTIP-2025-AII"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Big Data"],"DOI":"10.1186\/s40537-025-01236-0","type":"journal-article","created":{"date-parts":[[2025,8,20]],"date-time":"2025-08-20T11:48:03Z","timestamp":1755690483000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Chat-rgie: precision extraction of rice germplasm data using large language models and prompt engineering"],"prefix":"10.1186","volume":"12","author":[{"given":"Yijin","family":"Wei","sequence":"first","affiliation":[]},{"given":"Jingchao","family":"Fan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,20]]},"reference":[{"issue":"1","key":"1236_CR1","doi-asserted-by":"publisher","first-page":"1293","DOI":"10.1038\/ncomms2296","volume":"3","author":"DK Ray","year":"2012","unstructured":"Ray DK, Ramankutty N, Mueller ND, West PC, Foley JA. Recent patterns of crop yield growth and stagnation. Nat Commun. 2012;3(1):1293.","journal-title":"Nat Commun"},{"issue":"6","key":"1236_CR2","doi-asserted-by":"publisher","first-page":"66428","DOI":"10.1371\/journal.pone.0066428","volume":"8","author":"DK Ray","year":"2013","unstructured":"Ray DK, Mueller ND, West PC, Foley JA. Yield trends are insufficient to double global crop production by 2050. PLoS ONE. 2013;8(6):66428.","journal-title":"PLoS ONE"},{"issue":"2","key":"1236_CR3","doi-asserted-by":"publisher","first-page":"241","DOI":"10.2478\/fcds-2019-0012","volume":"44","author":"M Szachniuk","year":"2019","unstructured":"Szachniuk M. Rnapolis: computational platform for rna structure analysis. Found Comput Decis Sci. 2019;44(2):241\u201357.","journal-title":"Found Comput Decis Sci"},{"issue":"1","key":"1236_CR4","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1104\/pp.105.063008","volume":"140","author":"N Kurata","year":"2006","unstructured":"Kurata N, Yamazaki YO. An integrated biological and genome information database for rice. Plant Physiol. 2006;140(1):12\u20137.","journal-title":"Plant Physiol"},{"issue":"10","key":"1236_CR5","doi-asserted-by":"publisher","first-page":"1894","DOI":"10.1021\/acs.jcim.6b00207","volume":"56","author":"MC Swain","year":"2016","unstructured":"Swain MC, Cole JM. Chemdataextractor: a toolkit for automated extraction of chemical information from the scientific literature. J Chem Inf Model. 2016;56(10):1894\u2013904.","journal-title":"J Chem Inf Model"},{"issue":"9","key":"1236_CR6","doi-asserted-by":"publisher","first-page":"4280","DOI":"10.1021\/acs.jcim.1c00446","volume":"61","author":"J Mavracic","year":"2021","unstructured":"Mavracic J, Court CJ, Isazawa T, Elliott SR, Cole JM. Chemdataextractor 2.0: autopopulated ontologies for materials science. J Chem Inf Model. 2021;61(9):4280\u20139.","journal-title":"J Chem Inf Model"},{"issue":"1","key":"1236_CR7","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1038\/s41524-020-0287-8","volume":"6","author":"CJ Court","year":"2020","unstructured":"Court CJ, Cole JM. Magnetic and superconducting phase diagrams and transition temperatures predicted using text mining and machine learning. NPJ Comput Mater. 2020;6(1):18.","journal-title":"NPJ Comput Mater"},{"issue":"1","key":"1236_CR8","doi-asserted-by":"publisher","first-page":"292","DOI":"10.1038\/s41597-022-01301-w","volume":"9","author":"P Kumar","year":"2022","unstructured":"Kumar P, Kabra S, Cole JM. Auto-generating databases of yield strength and grain size using chemdataextractor. Sci Data. 2022;9(1):292.","journal-title":"Sci Data"},{"issue":"1","key":"1236_CR9","doi-asserted-by":"publisher","first-page":"648","DOI":"10.1038\/s41597-022-01752-1","volume":"9","author":"O Sierepeklis","year":"2022","unstructured":"Sierepeklis O, Cole JM. A thermoelectric materials database auto-generated from the scientific literature using chemdataextractor. Sci Data. 2022;9(1):648.","journal-title":"Sci Data"},{"issue":"11","key":"1236_CR10","doi-asserted-by":"publisher","first-page":"2670","DOI":"10.1021\/acs.jcim.2c00253","volume":"62","author":"J Zhao","year":"2022","unstructured":"Zhao J, Cole JM. Reconstructing chromatic-dispersion relations and predicting refractive indices using text mining and machine learning. J Chem Inf Model. 2022;62(11):2670\u201384.","journal-title":"J Chem Inf Model"},{"issue":"1","key":"1236_CR11","doi-asserted-by":"publisher","first-page":"192","DOI":"10.1038\/s41597-022-01295-5","volume":"9","author":"J Zhao","year":"2022","unstructured":"Zhao J, Cole JM. A database of refractive indices and dielectric constants auto-generated using chemdataextractor. Sci Data. 2022;9(1):192.","journal-title":"Sci Data"},{"issue":"1","key":"1236_CR12","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1038\/s41597-022-01355-w","volume":"9","author":"EJ Beard","year":"2022","unstructured":"Beard EJ, Cole JM. Perovskite-and dye-sensitized solar-cell device databases auto-generated using chemdataextractor. Sci Data. 2022;9(1):329.","journal-title":"Sci Data"},{"issue":"1","key":"1236_CR13","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1038\/s41597-022-01294-6","volume":"9","author":"Q Dong","year":"2022","unstructured":"Dong Q, Cole JM. Auto-generated database of semiconductor band gaps using chemdataextractor. Sci Data. 2022;9(1):193.","journal-title":"Sci Data"},{"issue":"1","key":"1236_CR14","doi-asserted-by":"publisher","first-page":"307","DOI":"10.1038\/s41597-019-0306-0","volume":"6","author":"EJ Beard","year":"2019","unstructured":"Beard EJ, Sivaraman G, V\u00e1zquez-Mayagoitia \u00c1, Vishwanath V, Cole JM. Comparative dataset of experimental and computational attributes of uv\/vis absorption spectra. Sci Data. 2019;6(1):307.","journal-title":"Sci Data"},{"issue":"1","key":"1236_CR15","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1038\/s41597-022-01317-2","volume":"9","author":"Z Wang","year":"2022","unstructured":"Wang Z, Kononova O, Cruse K, He T, Huo H, Fei Y, Zeng Y, Sun Y, Cai Z, Sun W, et al. Dataset of solution-based inorganic materials synthesis procedures extracted from the scientific literature. Sci Data. 2022;9(1):231.","journal-title":"Sci Data"},{"issue":"16","key":"1236_CR16","doi-asserted-by":"publisher","first-page":"7323","DOI":"10.1021\/acs.chemmater.2c01293","volume":"34","author":"H Huo","year":"2022","unstructured":"Huo H, Bartel CJ, He T, Trewartha A, Dunn A, Ouyang B, Jain A, Ceder G. Machine-learning rationalization and prediction of solid-state synthesis conditions. Chem Mater. 2022;34(16):7323\u201336.","journal-title":"Chem Mater"},{"key":"1236_CR17","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1146\/annurev-matsci-090319-010954","volume":"50","author":"JE Saal","year":"2020","unstructured":"Saal JE, Oliynyk AO, Meredig B. Machine learning in materials discovery: confirmed predictions and their underlying approaches. Annu Rev Mater Res. 2020;50:49\u201369.","journal-title":"Annu Rev Mater Res"},{"issue":"1","key":"1236_CR18","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1146\/annurev-matsci-070218-010015","volume":"50","author":"D Morgan","year":"2020","unstructured":"Morgan D, Jacobs R. Opportunities and challenges for machine learning in materials science. Annu Rev Mater Res. 2020;50(1):71\u2013103.","journal-title":"Annu Rev Mater Res"},{"key":"1236_CR19","unstructured":"Karpovich C, Jensen Z, Venugopal V, Olivetti E. Inorganic synthesis reaction condition prediction with generative machine learning. arXiv preprint. 2021. arXiv:2112.09612"},{"issue":"14","key":"1236_CR20","doi-asserted-by":"publisher","first-page":"5591","DOI":"10.1021\/acs.chemmater.1c00905","volume":"33","author":"AB Georgescu","year":"2021","unstructured":"Georgescu AB, Ren P, Toland AR, Zhang S, Miller KD, Apley DW, Olivetti EA, Wagner N, Rondinelli JM. Database, features, and machine learning model to identify thermally driven metal-insulator transition compounds. Chem Mater. 2021;33(14):5591\u2013605.","journal-title":"Chem Mater"},{"issue":"3","key":"1236_CR21","doi-asserted-by":"publisher","DOI":"10.1016\/j.isci.2021.102155","volume":"24","author":"O Kononova","year":"2021","unstructured":"Kononova O, He T, Huo H, Trewartha A, Olivetti EA, Ceder G. Opportunities and challenges of text mining in materials research. Iscience. 2021;24(3): 102155.","journal-title":"Iscience"},{"issue":"3","key":"1236_CR22","doi-asserted-by":"publisher","first-page":"1194","DOI":"10.1021\/acs.jcim.9b00995","volume":"60","author":"E Kim","year":"2020","unstructured":"Kim E, Jensen Z, Grootel A, Huang K, Staib M, Mysore S, Chang H-S, Strubell E, McCallum A, Jegelka S, et al. Inorganic materials synthesis planning with literature-trained neural networks. J Chem Inf Model. 2020;60(3):1194\u2013201.","journal-title":"J Chem Inf Model"},{"issue":"21","key":"1236_CR23","doi-asserted-by":"publisher","first-page":"9436","DOI":"10.1021\/acs.chemmater.7b03500","volume":"29","author":"E Kim","year":"2017","unstructured":"Kim E, Huang K, Saunders A, McCallum A, Ceder G, Olivetti E. Materials synthesis insights from scientific literature via text extraction and machine learning. Chem Mater. 2017;29(21):9436\u201344.","journal-title":"Chem Mater"},{"issue":"5","key":"1236_CR24","doi-asserted-by":"publisher","first-page":"892","DOI":"10.1021\/acscentsci.9b00193","volume":"5","author":"Z Jensen","year":"2019","unstructured":"Jensen Z, Kim E, Kwon S, Gani TZ, Rom\u00e1n-Leshkov Y, Moliner M, Corma A, Olivetti E. A machine learning approach to zeolite synthesis enabled by automatic literature data extraction. ACS Cent Sci. 2019;5(5):892\u20139.","journal-title":"ACS Cent Sci"},{"issue":"1","key":"1236_CR25","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1038\/s41524-023-01171-9","volume":"9","author":"LP Gilligan","year":"2023","unstructured":"Gilligan LP, Cobelli M, Taufour V, Sanvito S. A rule-free workflow for the automated generation of databases from scientific literature. NPJ Comput Mater. 2023;9(1):222.","journal-title":"NPJ Comput Mater"},{"issue":"1","key":"1236_CR26","doi-asserted-by":"publisher","first-page":"1569","DOI":"10.1038\/s41467-024-45914-8","volume":"15","author":"MP Polak","year":"2024","unstructured":"Polak MP, Morgan D. Extracting accurate materials data from research papers with conversational language models and prompt engineering. Nat Commun. 2024;15(1):1569.","journal-title":"Nat Commun"},{"key":"1236_CR27","first-page":"1877","volume":"33","author":"T Brown","year":"2020","unstructured":"Brown T, Mann B, Ryder N, Subbiah M, Kaplan JD, Dhariwal P, Neelakantan A, Shyam P, Sastry G, Askell A, et al. Language models are few-shot learners. Adv Neural Inf Process Syst. 2020;33:1877\u2013901.","journal-title":"Adv Neural Inf Process Syst"},{"key":"1236_CR28","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang L, Wu J, Jiang X, Almeida D, Wainwright C, Mishkin P, Zhang C, Agarwal S, Slama K, Ray A, et al. Training language models to follow instructions with human feedback. Adv Neural Inf Process Syst. 2022;35:27730\u201344.","journal-title":"Adv Neural Inf Process Syst"},{"key":"1236_CR29","unstructured":"Workshop B, Scao TL, Fan A, Akiki C, Pavlick E, Ili\u0107 S, Hesslow D, Castagn\u00e9 R, Luccioni AS, Yvon F, et al. Bloom: A 176b-parameter open-access multilingual language model. Preprint at. 2022. arXiv:2211.05100"},{"key":"1236_CR30","unstructured":"Zhang S, Roller S, Goyal N, Artetxe M, Chen M, Chen S, Dewan C, Diab M, Li X, Lin XV, et al. Opt: Open pre-trained transformer language models. Preprint at. 2022. arXiv:2205.01068"},{"key":"1236_CR31","unstructured":"Touvron H, Lavril T, Izacard G, Martinet X, Lachaux M-A, Lacroix T, Rozi\u00e8re B, Goyal N, Hambro E, Azhar F, et al. Llama: Open and efficient foundation language models. Preprint at. 2023. arXiv:2302.13971"},{"key":"1236_CR32","unstructured":"Dunn A, Dagdelen J, Walker N, Lee S, Rosen AS, Ceder G, Persson K, Jain A. Structured information extraction from complex scientific text with fine-tuned large language models. Preprint at. 2022. arXiv:2212.05238"},{"issue":"6","key":"1236_CR33","doi-asserted-by":"publisher","first-page":"1221","DOI":"10.1039\/D4DD00016A","volume":"3","author":"MP Polak","year":"2024","unstructured":"Polak MP, Modi S, Latosinska A, Zhang J, Wang C-W, Wang S, Hazra AD, Morgan D. Flexible, model-agnostic method for materials data extraction from text using general purpose language models. Digit Discov. 2024;3(6):1221\u201335.","journal-title":"Digit Discov"},{"key":"1236_CR34","doi-asserted-by":"crossref","unstructured":"Qin C, Zhang A, Zhang Z, Chen J, Yasunaga M, Yang D. Is chatgpt a general-purpose natural language processing task solver? arXiv preprint. 2023. arXiv:2302.06476","DOI":"10.18653\/v1\/2023.emnlp-main.85"},{"key":"1236_CR35","unstructured":"Achiam J, Adler S, Agarwal S, Ahmad L, Akkaya I, Aleman FL, Almeida D, Altenschmidt J, Altman S, Anadkat S, et al. Gpt-4 technical report. Preprint at. 2023. arXiv:2303.08774"},{"key":"1236_CR36","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1016\/j.aiopen.2021.08.002","volume":"2","author":"X Han","year":"2021","unstructured":"Han X, Zhang Z, Ding N, Gu Y, Liu X, Huo Y, Qiu J, Yao Y, Zhang A, Zhang L, et al. Pre-trained models: past, present and future. AI Open. 2021;2:225\u201350.","journal-title":"AI Open"},{"key":"1236_CR37","unstructured":"Wei J, Tay Y, Bommasani R, Raffel C, Zoph B, Borgeaud S, Yogatama D, Bosma M, Zhou D, Metzler D, et al. Emergent abilities of large language models. arXiv preprint. 2022. arXiv:2206.07682"},{"issue":"9","key":"1236_CR38","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3649449","volume":"56","author":"J Li","year":"2024","unstructured":"Li J, Tang T, Zhao WX, Nie J-Y, Wen J-R. Pre-trained language models for text generation: a survey. ACM Comput Surv. 2024;56(9):1\u201339.","journal-title":"ACM Comput Surv"},{"key":"1236_CR39","unstructured":"Hoffmann, J., Borgeaud, S., Mensch, A., Buchatskaya, E., Cai, T., Rutherford, E., Casas, D.d.L., Hendricks, L.A., Welbl, J., Clark, A., et al.: Training compute-optimal large language models. arXiv preprint. 2022. arXiv:2203.15556"},{"key":"1236_CR40","unstructured":"Bian J, Zheng J, Zhang Y, Zhu S. Inspire the large language model by external knowledge on biomedical named entity recognition. arXiv preprint. 2023. arXiv:2309.12278"},{"issue":"1","key":"1236_CR41","doi-asserted-by":"publisher","first-page":"1569","DOI":"10.1038\/s41467-024-45914-8","volume":"15","author":"MP Polak","year":"2024","unstructured":"Polak MP, Morgan D. Extracting accurate materials data from research papers with conversational language models and prompt engineering. Nat Commun. 2024;15(1):1569.","journal-title":"Nat Commun"},{"issue":"30","key":"1236_CR42","doi-asserted-by":"publisher","first-page":"2305016120","DOI":"10.1073\/pnas.2305016120","volume":"120","author":"F Gilardi","year":"2023","unstructured":"Gilardi F, Alizadeh M, Kubli M. Chatgpt outperforms crowd workers for text-annotation tasks. Proc Natl Acad Sci. 2023;120(30):2305016120.","journal-title":"Proc Natl Acad Sci"},{"key":"1236_CR43","doi-asserted-by":"crossref","unstructured":"Huang J, Gu SS, Hou L, Wu Y, Wang X, Yu H, Han J. Large language models can self-improve. arXiv preprint. 2022. arXiv:2210.11610","DOI":"10.18653\/v1\/2023.emnlp-main.67"},{"key":"1236_CR44","unstructured":"Bubeck S, Chandrasekaran V, Eldan R, Gehrke J, Horvitz E, Kamar E, Lee P, Lee YT, Li Y, Lundberg S, et al. Sparks of artificial general intelligence: Early experiments with gpt-4. arXiv preprint. 2023. arXiv:2303.12712"},{"key":"1236_CR45","unstructured":"Fu Y, Peng H, Khot T. How does gpt obtain its ability? tracing emergent abilities of language models to their sources. Yao Fu\u2019s Notion. 2022."},{"key":"1236_CR46","unstructured":"Ye J, Chen X, Xu N, Zu C, Shao Z, Liu S, Cui Y, Zhou Z, Gong C, Shen Y, et al. A comprehensive capability analysis of gpt-3 and gpt-3.5 series models. arXiv preprint. 2023. arXiv:2303.10420"},{"key":"1236_CR47","first-page":"109","volume-title":"Psychology of learning and motivation","author":"M McCloskey","year":"1989","unstructured":"McCloskey M, Cohen NJ. Catastrophic interference in connectionist networks: the sequential learning problem. In: Psychology of learning and motivation. Amsterdam: Elsevier; 1989. p. 109\u201365."},{"key":"1236_CR48","doi-asserted-by":"crossref","unstructured":"Kemker R, McClure M, Abitino A, Hayes T, Kanan C. Measuring catastrophic forgetting in neural networks. In: Proceedings of the AAAI Conference on Artificial Intelligence, 2018; vol. 32","DOI":"10.1609\/aaai.v32i1.11651"},{"key":"1236_CR49","first-page":"27730","volume":"35","author":"L Ouyang","year":"2022","unstructured":"Ouyang L, Wu J, Jiang X, Almeida D, Wainwright C, Mishkin P, Zhang C, Agarwal S, Slama K, Ray A, et al. Training language models to follow instructions with human feedback. Adv Neural Inf Process Syst. 2022;35:27730\u201344.","journal-title":"Adv Neural Inf Process Syst"},{"key":"1236_CR50","doi-asserted-by":"crossref","unstructured":"Bang Y, Cahyawijaya S, Lee N, Dai W, Su D, Wilie B, Lovenia H, Ji Z, Yu T, Chung W, et al. A multitask, multilingual, multimodal evaluation of chatgpt on reasoning, hallucination, and interactivity. arXiv preprint. 2023. arXiv:2302.04023","DOI":"10.18653\/v1\/2023.ijcnlp-main.45"},{"key":"1236_CR51","doi-asserted-by":"crossref","unstructured":"Lu P, Qiu L, Yu W, Welleck S, Chang K-W. A survey of deep learning for mathematical reasoning. arXiv preprint. 2022. arXiv:2212.10535","DOI":"10.18653\/v1\/2023.acl-long.817"},{"key":"1236_CR52","unstructured":"Qian J, Wang H, Li Z, Li S, Yan X. Limitations of language models in arithmetic and symbolic induction. arXiv preprint. 2022. arXiv:2208.05051"},{"key":"1236_CR53","first-page":"24824","volume":"35","author":"J Wei","year":"2022","unstructured":"Wei J, Wang X, Schuurmans D, Bosma M, Xia F, Chi E, Le QV, Zhou D, et al. Chain-of-thought prompting elicits reasoning in large language models. Adv Neural Inf Process Syst. 2022;35:24824\u201337.","journal-title":"Adv Neural Inf Process Syst"},{"key":"1236_CR54","doi-asserted-by":"crossref","unstructured":"Lyu Q, Havaldar S, Stein A, Zhang L, Rao D, Wong E, Apidianaki M, Callison-Burch C. Faithful chain-of-thought reasoning. arXiv preprint. 2023. arXiv:2301.13379","DOI":"10.18653\/v1\/2023.ijcnlp-main.20"},{"key":"1236_CR55","doi-asserted-by":"crossref","unstructured":"Patel A, Bhattamishra S, Goyal N. Are nlp models really able to solve simple math word problems? arXiv preprint. 2021. arXiv:2103.07191","DOI":"10.18653\/v1\/2021.naacl-main.168"},{"key":"1236_CR56","unstructured":"Singhal K, Azizi S, Tu T, Mahdavi SS, Wei J, Chung HW, Scales N, Tanwani A, Cole-Lewis H, Pfohl S, et al. Large language models encode clinical knowledge. arXiv preprint. 2022. arXiv:2212.13138"},{"issue":"4","key":"1236_CR57","doi-asserted-by":"publisher","first-page":"611","DOI":"10.1038\/s41562-024-01847-2","volume":"8","author":"Z Lin","year":"2024","unstructured":"Lin Z. How to write effective prompts for large language models. Nat Hum Behav. 2024;8(4):611\u20135.","journal-title":"Nat Hum Behav"},{"issue":"9","key":"1236_CR58","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3560815","volume":"55","author":"P Liu","year":"2023","unstructured":"Liu P, Yuan W, Fu J, Jiang Z, Hayashi H, Neubig G. Pre-train, prompt, and predict: a systematic survey of prompting methods in natural language processing. ACM Comput Surv. 2023;55(9):1\u201335.","journal-title":"ACM Comput Surv"},{"key":"1236_CR59","first-page":"11809","volume":"36","author":"S Yao","year":"2024","unstructured":"Yao S, Yu D, Zhao J, Shafran I, Griffiths T, Cao Y, Narasimhan K. Tree of thoughts: deliberate problem solving with large language models. Adv Neural Inf Process Syst. 2024;36:11809\u201322.","journal-title":"Adv Neural Inf Process Syst"},{"key":"1236_CR60","doi-asserted-by":"crossref","unstructured":"Besta M, Blach N, Kubicek A, Gerstenberger R, Podstawski M, Gianinazzi L, Gajda J, Lehmann T, Niewiadomski H, Nyczyk P, et\u00a0al. Graph of thoughts: Solving elaborate problems with large language models. In: Proceedings of the AAAI Conference on Artificial Intelligence, 2024; vol. 38, pp. 17682\u201317690","DOI":"10.1609\/aaai.v38i16.29720"},{"key":"1236_CR61","unstructured":"Yao S, Zhao J, Yu D, Du N, Shafran I, Narasimhan K, Cao Y. React: Synergizing reasoning and acting in language models. arXiv preprint. 2022. arXiv:2210.03629"},{"key":"1236_CR62","unstructured":"Wang X, Wei J, Schuurmans D, Le Q, Chi E, Narang S, Chowdhery A, Zhou D. Self-consistency improves chain of thought reasoning in language models. arXiv preprint. 2022. arXiv:2203.11171"},{"key":"1236_CR63","unstructured":"Brown TB. Language models are few-shot learners. arXiv preprint. 2020. arXiv:2005.14165"},{"key":"1236_CR64","unstructured":"Humphreys K, Gaizauskas R, Azzam S, Huyck C, Mitchell B, Cunningham H, Wilks Y. University of sheffield: Description of the lasie-ii system as used for muc-7. In: Seventh Message Understanding Conference (MUC-7): Proceedings of a Conference Held in Fairfax, Virginia, 1998."},{"key":"1236_CR65","unstructured":"Krupka G, IsoQuest K. Description of the nerowl extractor system as used for muc-7. In: Proc. 7th Message Understanding Conf, 2005; pp. 21\u201328"},{"issue":"1","key":"1236_CR66","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/MASSP.1986.1165342","volume":"3","author":"L Rabiner","year":"1986","unstructured":"Rabiner L, Juang B. An introduction to hidden markov models. IEEE ASSP Mag. 1986;3(1):4\u201316.","journal-title":"IEEE ASSP Mag"},{"key":"1236_CR67","volume-title":"Maximum-entropy models in science and engineering","author":"JN Kapur","year":"1989","unstructured":"Kapur JN. Maximum-entropy models in science and engineering. Hoboken: John Wiley & Sons; 1989."},{"issue":"2","key":"1236_CR68","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1023\/A:1009715923555","volume":"2","author":"CJ Burges","year":"1998","unstructured":"Burges CJ. A tutorial on support vector machines for pattern recognition. Data Min Knowl Disc. 1998;2(2):121\u201367.","journal-title":"Data Min Knowl Disc"},{"key":"1236_CR69","unstructured":"Lafferty J, McCallum A, Pereira F, et\u00a0al. Conditional random fields: probabilistic models for segmenting and labeling sequence data. In: Icml. Williamstown, MA, 2001; vol. 1, p. 3."},{"issue":"12","key":"1236_CR70","doi-asserted-by":"publisher","first-page":"3016","DOI":"10.3390\/foods10123016","volume":"10","author":"PS Sampaio","year":"2021","unstructured":"Sampaio PS, Almeida AS, Brites CM. Use of artificial neural network model for rice quality prediction based on grain physical parameters. Foods. 2021;10(12):3016.","journal-title":"Foods"},{"issue":"1","key":"1236_CR71","doi-asserted-by":"publisher","first-page":"1000","DOI":"10.1038\/s42003-023-05379-9","volume":"6","author":"RJN Tiozon","year":"2023","unstructured":"Tiozon RJN, Sreenivasulu N, Alseekh S, Sartagoda KJD, Usadel B, Fernie AR. Metabolomics and machine learning technique revealed that germination enhances the multi-nutritional properties of pigmented rice. Commun Biol. 2023;6(1):1000.","journal-title":"Commun Biol"},{"key":"1236_CR72","doi-asserted-by":"crossref","unstructured":"Baevski A, Edunov S, Liu Y, Zettlemoyer L, Auli M. Cloze-driven pretraining of self-attention networks. arXiv preprint. 2019. arXiv:1903.07785","DOI":"10.18653\/v1\/D19-1539"},{"key":"1236_CR73","first-page":"2493","volume":"12","author":"R Collobert","year":"2011","unstructured":"Collobert R, Weston J, Bottou L, Karlen M, Kavukcuoglu K, Kuksa P. Natural language processing (almost) from scratch. J Mach Learn Res. 2011;12:2493\u2013537.","journal-title":"J Mach Learn Res"},{"key":"1236_CR74","unstructured":"Devlin J. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint. 2018. arXiv:1810.04805"},{"key":"1236_CR75","doi-asserted-by":"crossref","unstructured":"Bharadwaj A, Mortensen DR, Dyer C, Carbonell JG. Phonologically aware neural model for named entity recognition in low resource transfer settings. In: Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing, 2016; pp. 1462\u20131472","DOI":"10.18653\/v1\/D16-1153"},{"key":"1236_CR76","unstructured":"Rei M, Crichton GK, Pyysalo S. Attending to characters in neural sequence labeling models. arXiv preprint. 2016. arXiv:1611.04361"},{"issue":"7","key":"1236_CR77","doi-asserted-by":"publisher","DOI":"10.1016\/j.isci.2022.104546","volume":"25","author":"R Kumar","year":"2022","unstructured":"Kumar R, Khatri A, Acharya V. Deep learning uncovers distinct behavior of rice network to pathogens response. Iscience. 2022;25(7): 104521.","journal-title":"Iscience"},{"issue":"1","key":"1236_CR78","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1186\/s13007-024-01250-y","volume":"20","author":"I-T Vourlaki","year":"2024","unstructured":"Vourlaki I-T, Ramos-Onsins SE, P\u00e9rez-Enciso M, Castanera R. Evaluation of deep learning for predicting rice traits using structural and single-nucleotide genomic variants. Plant Methods. 2024;20(1):121.","journal-title":"Plant Methods"},{"key":"1236_CR79","unstructured":"Achiam J, Adler S, Agarwal S, Ahmad L, Akkaya I, Aleman FL, Almeida D, Altenschmidt J, Altman S, Anadkat S, et\u00a0al. Gpt-4 technical report. Preprint at. 2023. arXiv:2303.08774"},{"issue":"8","key":"1236_CR80","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford A, Wu J, Child R, Luan D, Amodei D, Sutskever I, et al. Language models are unsupervised multitask learners. OpenAI Blog. 2019;1(8):9.","journal-title":"OpenAI Blog"},{"key":"1236_CR81","unstructured":"Touvron H, Martin L, Stone K, Albert P, Almahairi A, Babaei Y, Bashlykov N, Batra S, Bhargava P, Bhosale S, et al. Llama 2: Open foundation and fine-tuned chat models. Preprint at. 2023. arXiv:2307.09288"},{"issue":"2","key":"1236_CR82","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3605943","volume":"56","author":"B Min","year":"2023","unstructured":"Min B, Ross H, Sulem E, Veyseh APB, Nguyen TH, Sainz O, Agirre E, Heintz I, Roth D. Recent advances in natural language processing via large pre-trained language models: a survey. ACM Comput Surv. 2023;56(2):1\u201340.","journal-title":"ACM Comput Surv"},{"key":"1236_CR83","doi-asserted-by":"crossref","unstructured":"Yoo KM, Park D, Kang J, Lee S-W, Park W. Gpt3mix: Leveraging large-scale language models for text augmentation. Preprint at. 2021. arXiv:2104.08826","DOI":"10.18653\/v1\/2021.findings-emnlp.192"},{"key":"1236_CR84","unstructured":"Albrecht J, Kitanidis E, Fetterman AJ. Despite\u201d super-human\u201d performance, current LLMs are unsuited for decisions about ethics and safety. Preprint at. 2022. arXiv:2212.06295"},{"key":"1236_CR85","unstructured":"Liang PP, Wu C, Morency L-P, Salakhutdinov R. Towards understanding and mitigating social biases in language models. In: International Conference on Machine Learning, PMLR. 2021; pp. 6565\u20136576"},{"key":"1236_CR86","doi-asserted-by":"crossref","unstructured":"Alvi M, Zisserman A, Nell\u00e5ker C. Turning a blind eye: Explicit removal of biases and variation from deep neural network embeddings. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops, 2018; pp. 0\u20130.","DOI":"10.1007\/978-3-030-11009-3_34"},{"key":"1236_CR87","doi-asserted-by":"crossref","unstructured":"Zhang J, Verma V. Discover discriminatory bias in high accuracy models embedded in machine learning algorithms. In: The International Conference on Natural Computation, Fuzzy Systems and Knowledge Discovery, Springer, 2020; pp. 1537\u20131545.","DOI":"10.1007\/978-3-030-70665-4_166"},{"key":"1236_CR88","doi-asserted-by":"crossref","unstructured":"Hajikhani A, Cole C. A critical review of large language models: sensitivity, bias, and the path toward specialized ai. Quantitative Science Studies, 2024; 1\u201322","DOI":"10.1162\/qss_a_00310"},{"key":"1236_CR89","doi-asserted-by":"crossref","unstructured":"Gartlehner G, Kahwati L, Hilscher R, Thomas I, Kugley S, Crotty K, Viswanathan M, Nussbaumer-Streit B, Booth G, Erskine N, et al. Data extraction for evidence synthesis using a large language model: A proof-of-concept study. Research Synthesis Methods. 2024.","DOI":"10.1101\/2023.10.02.23296415"},{"key":"1236_CR90","doi-asserted-by":"crossref","unstructured":"Beltagy I, Lo K, Cohan A. SciBERT: A pretrained language model for scientific text. Preprint at. 2019. arXiv:1903.10676","DOI":"10.18653\/v1\/D19-1371"},{"key":"1236_CR91","doi-asserted-by":"crossref","unstructured":"Lewis P, Ott M, Du J, Stoyanov V. Pretrained language models for biomedical and clinical tasks: understanding and extending the state-of-the-art. In: Proceedings of the 3rd Clinical Natural Language Processing Workshop, 2020; pp. 146\u2013157","DOI":"10.18653\/v1\/2020.clinicalnlp-1.17"},{"key":"1236_CR92","unstructured":"Wei, J., Bosma, M., Zhao, V.Y., Guu, K., Yu, A.W., Lester, B., Du, N., Dai, A.M., Le, Q.V.: Finetuned language models are zero-shot learners. arXiv preprint. 2021. arXiv:2109.01652"},{"key":"1236_CR93","unstructured":"Sanh V, Webson A, Raffel C, Bach S, Sutawika L, Alyafeai Z, Chaffin A, Stiegler A, Raja A, Dey M, et\u00a0al. Multitask prompted training enables zero-shot task generalization. In: International Conference on Learning Representations. 2022"},{"issue":"70","key":"1236_CR94","first-page":"1","volume":"25","author":"HW Chung","year":"2024","unstructured":"Chung HW, Hou L, Longpre S, Zoph B, Tay Y, Fedus W, Li Y, Wang X, Dehghani M, Brahma S, et al. Scaling instruction-finetuned language models. J Mach Learn Res. 2024;25(70):1\u201353.","journal-title":"J Mach Learn Res"},{"key":"1236_CR95","unstructured":"Kenton Z, Everitt T, Weidinger L, Gabriel I, Mikulik V, Irving G. Alignment of language agents. arXiv preprint. 2021. arXiv:2103.14659"},{"key":"1236_CR96","unstructured":"Askell A, Bai Y, Chen A, Drain D, Ganguli D, Henighan T, Jones A, Joseph N, Mann B, DasSarma N, et\u00a0al. A general language assistant as a laboratory for alignment. arXiv preprint. 2021. arXiv:2112.00861"},{"key":"1236_CR97","unstructured":"Bai Y, Jones A, Ndousse K, Askell A, Chen A, DasSarma N, Drain D, Fort S, Ganguli D, Henighan T, et al. Training a helpful and harmless assistant with reinforcement learning from human feedback. arXiv preprint. 2022. arXiv:2204.05862"},{"key":"1236_CR98","unstructured":"Hu EJ, Shen Y, Wallis P, Allen-Zhu Z, Li Y, Wang S, Wang L, Chen W. Lora: Low-rank adaptation of large language models. arXiv preprint. 2021. arXiv:2106.09685"},{"key":"1236_CR99","doi-asserted-by":"crossref","unstructured":"Hu Z, Wang L, Lan Y, Xu W, Lim E-P, Bing L, Xu X, Poria S, Lee RK-W. Llm-adapters: An adapter family for parameter-efficient fine-tuning of large language models. arXiv preprint. 2023. arXiv:2304.01933","DOI":"10.18653\/v1\/2023.emnlp-main.319"},{"key":"1236_CR100","unstructured":"Houlsby N, Giurgiu A, Jastrzebski S, Morrone B, De\u00a0Laroussilhe Q, Gesmundo A, Attariyan M, Gelly S. Parameter-efficient transfer learning for nlp. In: International Conference on Machine Learning, PMLR, 2019; pp. 2790\u20132799"},{"key":"1236_CR101","unstructured":"He J, Zhou C, Ma X, Berg-Kirkpatrick T, Neubig G. Towards a unified view of parameter-efficient transfer learning. arXiv preprint. 2021. arXiv:2110.04366"},{"key":"1236_CR102","doi-asserted-by":"crossref","unstructured":"Pfeiffer J, Vuli\u0107 I, Gurevych I, Ruder S.: Mad-x: An adapter-based framework for multi-task cross-lingual transfer. arXiv preprint. 2020. arXiv:2005.00052","DOI":"10.18653\/v1\/2020.emnlp-main.617"},{"issue":"4","key":"1236_CR103","doi-asserted-by":"publisher","first-page":"611","DOI":"10.1038\/s41562-024-01847-2","volume":"8","author":"Z Lin","year":"2024","unstructured":"Lin Z. How to write effective prompts for large language models. Nat Hum Behav. 2024;8(4):611\u20135.","journal-title":"Nat Hum Behav"},{"key":"1236_CR104","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.126708","volume":"557","author":"B Zhao","year":"2023","unstructured":"Zhao B, Jin W, Del Ser J, Yang G. Chatagri: Exploring potentials of ChatGPT on cross-linguistic agricultural text classification. Neurocomputing. 2023;557: 126708.","journal-title":"Neurocomputing"},{"key":"1236_CR105","unstructured":"Peng R, Liu K, Yang P, Yuan Z, Li S. Embedding-based retrieval with llm for effective agriculture information extracting from unstructured data. arXiv preprint. 2023. arXiv:2308.03107"},{"key":"1236_CR106","doi-asserted-by":"publisher","DOI":"10.1016\/j.compag.2023.108168","volume":"213","author":"J Qing","year":"2023","unstructured":"Qing J, Deng X, Lan Y, Li Z. Gpt-aided diagnosis on agricultural image based on a new light yolopc. Comput Electron Agric. 2023;213: 108168.","journal-title":"Comput Electron Agric"},{"key":"1236_CR107","unstructured":"Hendrycks D, Burns C, Basart S, Zou A, Mazeika M, Song D, Steinhardt J. Measuring massive multitask language understanding. Preprint at. 2020. arXiv:2009.03300"},{"key":"1236_CR108","doi-asserted-by":"crossref","unstructured":"Zellers R, Holtzman A, Bisk Y, Farhadi A, Choi Y. Hellaswag: can a machine really finish your sentence? Preprint at. 2019. arXiv:1905.07830","DOI":"10.18653\/v1\/P19-1472"},{"key":"1236_CR109","doi-asserted-by":"crossref","unstructured":"Suzgun M, Scales N, Sch\u00e4rli N, Gehrmann S, Tay Y, Chung HW, Chowdhery A, Le QV, Chi EH, Zhou D, et al. Challenging big-bench tasks and whether chain-of-thought can solve them. Preprint at. 2022. arXiv:2210.09261","DOI":"10.18653\/v1\/2023.findings-acl.824"},{"key":"1236_CR110","unstructured":"Cobbe K, Kosaraju V, Bavarian M, Chen M, Jun H, Kaiser L, Plappert M, Tworek J, Hilton J, Nakano R, et al. Training verifiers to solve math word problems. Preprint at. 2021. arXiv:2110.14168"},{"key":"1236_CR111","unstructured":"Hendrycks D, Burns C, Kadavath S, Arora A, Basart S, Tang E, Song D, Steinhardt J. Measuring mathematical problem solving with the math dataset. Preprint at. 2021. arXiv:2103.03874"},{"key":"1236_CR112","unstructured":"Bai J, Bai S, Chu Y, Cui Z, Dang K, Deng X, Fan Y, Ge W, Han Y, Huang F, et al. Qwen technical report. arXiv preprint. 2023. arXiv:2309.16609"},{"key":"1236_CR113","unstructured":"Young A, Chen B, Li C, Huang C, Zhang G, Zhang G, Li H, Zhu J, Chen J, Chang J, et al. Yi: Open foundation models by 01. ai. arXiv preprint. 2024. arXiv:2403.04652"},{"key":"1236_CR114","unstructured":"Dubey A, Jauhri A, Pandey A, Kadian A, Al-Dahle A, Letman A, Mathur A, Schelten A, Yang A, Fan A, et al. The llama 3 herd of models. arXiv preprint. 2024. arXiv:2407.21783"},{"key":"1236_CR115","unstructured":"Liu A, Feng B, Wang B, Wang B, Liu B, Zhao C, Dengr C, Ruan C, Dai D, Guo D, et al. Deepseek-v2: A strong, economical, and efficient mixture-of-experts language model. arXiv preprint. 2024. arXiv:2405.04434"},{"key":"1236_CR116","unstructured":"Sanmartin D. KG-RAG: Bridging the Gap Between Knowledge and Creativity. Preprint at. 2024. arXiv:2405.12035"},{"key":"1236_CR117","unstructured":"Powers DM. Evaluation: from precision, recall and f-measure to roc, informedness, markedness and correlation. arXiv preprint. 2020. arXiv:2010.16061"}],"container-title":["Journal of Big Data"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-025-01236-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1186\/s40537-025-01236-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1186\/s40537-025-01236-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,9]],"date-time":"2025-09-09T10:19:46Z","timestamp":1757413186000},"score":1,"resource":{"primary":{"URL":"https:\/\/journalofbigdata.springeropen.com\/articles\/10.1186\/s40537-025-01236-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,20]]},"references-count":117,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,12]]}},"alternative-id":["1236"],"URL":"https:\/\/doi.org\/10.1186\/s40537-025-01236-0","relation":{},"ISSN":["2196-1115"],"issn-type":[{"value":"2196-1115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,20]]},"assertion":[{"value":"17 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 June 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 August 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"202"}}