{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,1]],"date-time":"2026-05-01T19:15:35Z","timestamp":1777662935516,"version":"3.51.4"},"reference-count":84,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T00:00:00Z","timestamp":1765238400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T00:00:00Z","timestamp":1765238400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2404035"],"award-info":[{"award-number":["2404035"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Future Wanxiang Foundation"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Comput Sci"],"DOI":"10.1038\/s43588-025-00906-6","type":"journal-article","created":{"date-parts":[[2025,12,9]],"date-time":"2025-12-09T10:58:05Z","timestamp":1765277885000},"page":"301-315","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["SciSciGPT: advancing human\u2013AI collaboration in the science of science"],"prefix":"10.1038","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2440-271X","authenticated-orcid":false,"given":"Erzhuo","family":"Shao","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6267-9440","authenticated-orcid":false,"given":"Yifang","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3914-1981","authenticated-orcid":false,"given":"Yifan","family":"Qian","sequence":"additional","affiliation":[]},{"given":"Zhenyu","family":"Pan","sequence":"additional","affiliation":[]},{"given":"Han","family":"Liu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7054-2206","authenticated-orcid":false,"given":"Dashun","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,12,9]]},"reference":[{"key":"906_CR1","doi-asserted-by":"publisher","unstructured":"Wang, D. & Barab\u00e1si, A.-L. The Science of Science (Cambridge Univ. Press, 2021); https:\/\/doi.org\/10.1017\/9781108610834","DOI":"10.1017\/9781108610834"},{"key":"906_CR2","doi-asserted-by":"publisher","unstructured":"Stephan, P. How Economics Shapes Science (Harvard Univ. Press, 2012); https:\/\/doi.org\/10.4159\/harvard.9780674062757","DOI":"10.4159\/harvard.9780674062757"},{"key":"906_CR3","unstructured":"Bush, V. Science, the Endless Frontier, a Report to the President (US Government Printing Office, 1945); https:\/\/www.torrossa.com\/en\/resources\/an\/5563905"},{"key":"906_CR4","doi-asserted-by":"publisher","first-page":"583","DOI":"10.1126\/science.aam9527","volume":"357","author":"M Ahmadpoor","year":"2017","unstructured":"Ahmadpoor, M. & Jones, B. F. The dual frontier: patented inventions and prior scientific advance. Science 357, 583\u2013587 (2017).","journal-title":"Science"},{"key":"906_CR5","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1126\/science.abe3084","volume":"371","author":"Y Yin","year":"2021","unstructured":"Yin, Y., Gao, J., Jones, B. F. & Wang, D. Coevolution of policy and science during the pandemic. Science 371, 128\u2013130 (2021).","journal-title":"Science"},{"key":"906_CR6","doi-asserted-by":"publisher","first-page":"1344","DOI":"10.1038\/s41562-022-01397-5","volume":"6","author":"Y Yin","year":"2022","unstructured":"Yin, Y., Dong, Y., Wang, K., Wang, D. & Jones, B. F. Public use and public funding of science. Nat. Hum. Behav. 6, 1344\u20131350 (2022).","journal-title":"Nat. Hum. Behav."},{"key":"906_CR7","doi-asserted-by":"publisher","DOI":"10.1126\/science.aao0185","volume":"359","author":"S Fortunato","year":"2018","unstructured":"Fortunato, S. et al. Science of science. Science 359, eaao0185 (2018).","journal-title":"Science"},{"key":"906_CR8","doi-asserted-by":"publisher","first-page":"1046","DOI":"10.1038\/s41562-023-01562-4","volume":"7","author":"L Liu","year":"2023","unstructured":"Liu, L., Jones, B. F., Uzzi, B. & Wang, D. Data, measurement and empirical methods in the science of science. Nat. Hum. Behav. 7, 1046\u20131058 (2023).","journal-title":"Nat. Hum. Behav."},{"key":"906_CR9","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-023-02198-9","volume":"10","author":"Z Lin","year":"2023","unstructured":"Lin, Z., Yin, Y., Liu, L. & Wang, D. SciSciNet: a large-scale open data lake for the science of science research. Sci. Data 10, 315 (2023).","journal-title":"Sci. Data"},{"key":"906_CR10","unstructured":"Priem, J., Piwowar, H. & Orr, R. OpenAlex: a fully-open index of scholarly works, authors, venues, institutions, and concepts. Preprint at https:\/\/arxiv.org\/10.48550\/arXiv.2205.01833 (2022)."},{"key":"906_CR11","doi-asserted-by":"publisher","first-page":"387","DOI":"10.1162\/qss_a_00020","volume":"1","author":"C Herzog","year":"2020","unstructured":"Herzog, C., Hook, D. & Konkiel, S. Dimensions: bringing down barriers between scientometricians and data. Quant. Sci. Stud. 1, 387\u2013395 (2020).","journal-title":"Quant. Sci. Stud."},{"key":"906_CR12","doi-asserted-by":"publisher","first-page":"414","DOI":"10.1162\/qss_a_00022","volume":"1","author":"G Hendricks","year":"2020","unstructured":"Hendricks, G., Tkaczyk, D., Lin, J. & Feeney, P. Crossref: the sustainable source of community-owned scholarly metadata. Quant.Sci. Stud. 1, 414\u2013427 (2020).","journal-title":"Quant.Sci. Stud."},{"key":"906_CR13","doi-asserted-by":"publisher","first-page":"396","DOI":"10.1162\/qss_a_00021","volume":"1","author":"K Wang","year":"2020","unstructured":"Wang, K. et al. Microsoft Academic Graph: when experts are not enough. Quant. Sci. Stud. 1, 396\u2013413 (2020).","journal-title":"Quant. Sci. Stud."},{"key":"906_CR14","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1162\/qss_a_00019","volume":"1","author":"J Baas","year":"2020","unstructured":"Baas, J., Schotten, M., Plume, A., C\u00f4t\u00e9, G. & Karimi, R. Scopus as a curated, high-quality bibliometric data source for academic research in quantitative science studies. Quant. Sci. Stud. 1, 377\u2013386 (2020).","journal-title":"Quant. Sci. Stud."},{"key":"906_CR15","doi-asserted-by":"publisher","first-page":"363","DOI":"10.1162\/qss_a_00018","volume":"1","author":"C Birkle","year":"2020","unstructured":"Birkle, C., Pendlebury, D. A., Schnell, J. & Adams, J. Web of Science as a data source for research on scientific and scholarly activity. Quant. Sci. Stud. 1, 363\u2013376 (2020).","journal-title":"Quant. Sci. Stud."},{"key":"906_CR16","doi-asserted-by":"publisher","first-page":"624","DOI":"10.1162\/qss_a_00204","volume":"3","author":"M Szomszor","year":"2022","unstructured":"Szomszor, M. & Adie, E. Overton: a bibliometric database of policy document citations. Quant. Sci. Stud. 3, 624\u2013650 (2022).","journal-title":"Quant. Sci. Stud."},{"key":"906_CR17","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1111\/jems.12455","volume":"31","author":"M Marx","year":"2022","unstructured":"Marx, M. & Fuegi, A. Reliance on science by inventors: Hybrid extraction of in-text patent-to-article citations. J. Econ. Manag. Strat. 31, 369\u2013392 (2022).","journal-title":"J. Econ. Manag. Strat."},{"key":"906_CR18","unstructured":"Salganik, M. J. Bit by Bit: Social Research in the Digital Age (Princeton Univ. Press, 2019)."},{"key":"906_CR19","doi-asserted-by":"publisher","first-page":"283","DOI":"10.1111\/j.1467-937X.2008.00531.x","volume":"76","author":"BF Jones","year":"2009","unstructured":"Jones, B. F. The burden of knowledge and the \u2018death of the renaissance man\u2019: is innovation getting harder? Rev. Econ. Stud. 76, 283\u2013317 (2009).","journal-title":"Rev. Econ. Stud."},{"key":"906_CR20","doi-asserted-by":"publisher","first-page":"1036","DOI":"10.1126\/science.1136099","volume":"316","author":"S Wuchty","year":"2007","unstructured":"Wuchty, S., Jones, B. F. & Uzzi, B. The increasing dominance of teams in production of knowledge. Science 316, 1036\u20131039 (2007).","journal-title":"Science"},{"key":"906_CR21","doi-asserted-by":"publisher","first-page":"999","DOI":"10.1038\/s41586-025-09048-1","volume":"642","author":"R Hill","year":"2025","unstructured":"Hill, R. et al. The pivot penalty in research. Nature 642, 999\u20131006 (2025).","journal-title":"Nature"},{"key":"906_CR22","doi-asserted-by":"publisher","first-page":"518","DOI":"10.1109\/TVCG.2023.3327387","volume":"30","author":"Y Wang","year":"2024","unstructured":"Wang, Y., Qian, Y., Qi, X., Cao, N. & Wang, D. InnovationInsights: a visual analytics approach for understanding the dual frontiers of science and technology. IEEE Trans. Visual Comput. Graphics 30, 518\u2013528 (2024).","journal-title":"IEEE Trans. Visual Comput. Graphics"},{"key":"906_CR23","doi-asserted-by":"publisher","first-page":"2293","DOI":"10.1038\/s41562-024-02024-1","volume":"8","author":"M Vaccaro","year":"2024","unstructured":"Vaccaro, M., Almaatouq, A. & Malone, T. When combinations of humans and AI are useful: a systematic review and meta-analysis. Nat. Hum. Behav. 8, 2293\u20132303 (2024).","journal-title":"Nat. Hum. Behav."},{"key":"906_CR24","doi-asserted-by":"publisher","first-page":"46","DOI":"10.1038\/s42256-022-00593-2","volume":"5","author":"A Sharma","year":"2023","unstructured":"Sharma, A., Lin, I. W., Miner, A. S., Atkins, D. C. & Althoff, T. Human\u2013AI collaboration enables more empathic conversations in text-based peer-to-peer mental health support. Nat. Mach. Intell. 5, 46\u201357 (2023).","journal-title":"Nat. Mach. Intell."},{"key":"906_CR25","doi-asserted-by":"crossref","unstructured":"Bail, C. A. Can generative AI improve social science? Proc. Natl Acad. Sci USA 121, e2314021121 (2024).","DOI":"10.1073\/pnas.2314021121"},{"key":"906_CR26","unstructured":"Brown, T. et al. Language models are few-shot learners. Adv. Neural Inf. Process. Syst. 33, 1877\u20131901 (2020)."},{"key":"906_CR27","doi-asserted-by":"crossref","unstructured":"Wei, J. et al. Chain-of-thought prompting elicits reasoning in large language models. Adv. Neural Inf. Process. Syst. 35, 24824\u201324837 (2022).","DOI":"10.52202\/068431-1800"},{"key":"906_CR28","doi-asserted-by":"crossref","unstructured":"Yao, S. et al. Tree of thoughts: Deliberate problem solving with large language models. Adv. Neural Inf. Process. Syst. 36, 11809\u201311822 (2023).","DOI":"10.52202\/075280-0517"},{"key":"906_CR29","doi-asserted-by":"crossref","unstructured":"Schick, T. et al. Toolformer: Language models can teach themselves to use tools. Adv. Neural Inf. Process. Syst. 36, 68539\u201368551 (2023).","DOI":"10.52202\/075280-2997"},{"key":"906_CR30","unstructured":"Yao, S. et al. React: Synergizing reasoning and acting in language models. in The Eleventh International Conference on Learning Representations (2023)."},{"key":"906_CR31","doi-asserted-by":"crossref","unstructured":"Wang, Y., Wang, W., Joty, S. & Hoi, S. C. CodeT5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing 8696\u20138708 (2021).","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"906_CR32","unstructured":"Fried, D. et al. InCoder: A generative model for code infilling and synthesis. in The Eleventh International Conference on Learning Representations (2023)."},{"key":"906_CR33","doi-asserted-by":"publisher","first-page":"1092","DOI":"10.1126\/science.abq1158","volume":"378","author":"Y Li","year":"2022","unstructured":"Li, Y. et al. Competition-level code generation with AlphaCode. Science 378, 1092\u20131097 (2022).","journal-title":"Science"},{"key":"906_CR34","unstructured":"Chen, M. et al. Evaluating large language models trained on code. Preprint at https:\/\/arxiv.org\/10.48550\/arXiv.2107.03374 (2021)."},{"key":"906_CR35","unstructured":"Skarlinski, M. D. et al. Language agents achieve superhuman synthesis of scientific knowledge. Preprint at https:\/\/arxiv.org\/10.48550\/arXiv.2409.13740 (2024)."},{"key":"906_CR36","unstructured":"L\u00e1la, J. et al. PaperQA: retrieval-augmented generative agent for scientific research. Preprint at https:\/\/arxiv.org\/10.48550\/arXiv.2312.07559 (2023)."},{"key":"906_CR37","unstructured":"Hu, X. et al. InfiAgent-DABench: evaluating agents on data analysis tasks. in Proc. 41st International Conference on Machine Learning 19544\u201319572 (2024)."},{"key":"906_CR38","unstructured":"Guo, S. et al. DS-Agent: automated data science by empowering large language models with case-based reasoning. in Proc. 41st International Conference on Machine Learning 16813\u201316848 (2024)."},{"key":"906_CR39","doi-asserted-by":"crossref","unstructured":"Hong, S. et al. Data interpreter: an LLM agent for data science. in Findings of the Association for Computational Linguistics: ACL 2025 (eds Che, W. et al.) 19796\u201319821 (Association for Computational Linguistics, 2025).","DOI":"10.18653\/v1\/2025.findings-acl.1016"},{"key":"906_CR40","doi-asserted-by":"publisher","unstructured":"Sun, M. et al. Lambda: A large model based data agent. J. Am. Stat. Assoc. https:\/\/doi.org\/10.1080\/01621459.2025.2510000 (2025).","DOI":"10.1080\/01621459.2025.2510000"},{"key":"906_CR41","doi-asserted-by":"publisher","unstructured":"Enhancing the Effectiveness of Team Science (National Academies Press, 2015); https:\/\/doi.org\/10.17226\/19007","DOI":"10.17226\/19007"},{"key":"906_CR42","doi-asserted-by":"publisher","first-page":"20120375","DOI":"10.1098\/rsta.2012.0375","volume":"371","author":"A-L Barab\u00e1si","year":"2013","unstructured":"Barab\u00e1si, A.-L. Network science. Philos. Trans. R. Soc. A 371, 20120375 (2013).","journal-title":"Philos. Trans. R. Soc. A"},{"key":"906_CR43","unstructured":"Zhang, Y., Yuan, Y. & Yao, A. C.-C. Meta prompting for AI systems. Preprint at https:\/\/arxiv.org\/10.48550\/arXiv.2311.11482 (2024)."},{"key":"906_CR44","doi-asserted-by":"publisher","DOI":"10.1126\/science.aac4716","volume":"349","author":"Open Science Collaboration.","year":"2015","unstructured":"Open Science Collaboration. Estimating the reproducibility of psychological science. Science 349, aac4716 (2015).","journal-title":"Science"},{"key":"906_CR45","doi-asserted-by":"publisher","first-page":"1422","DOI":"10.1126\/science.aab2374","volume":"348","author":"BA Nosek","year":"2015","unstructured":"Nosek, B. A. et al. Promoting an open research culture. Science 348, 1422\u20131425 (2015).","journal-title":"Science"},{"key":"906_CR46","doi-asserted-by":"publisher","first-page":"378","DOI":"10.1038\/s41586-019-0941-9","volume":"566","author":"L Wu","year":"2019","unstructured":"Wu, L., Wang, D. & Evans, J. A. Large teams develop and small teams disrupt science and technology. Nature 566, 378\u2013382 (2019).","journal-title":"Nature"},{"key":"906_CR47","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1109\/52.2014","volume":"5","author":"WS Humphrey","year":"1988","unstructured":"Humphrey, W. S. Characterizing the software process: a maturity framework. IEEE Softw. 5, 73\u201379 (1988).","journal-title":"IEEE Softw."},{"key":"906_CR48","unstructured":"Carnegie Mellon University, C., Paulk, M. C., Weber, C. V., Curtis, B. & Chrissis, M. B. The Capability Maturity Model: Guidelines for Improving the Software Process (Addison-Wesley Longman, 1995)."},{"key":"906_CR49","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/52.219617","volume":"10","author":"MC Paulk","year":"1993","unstructured":"Paulk, M. C., Curtis, B., Chrissis, M. B. & Weber, C. V. Capability maturity model, version 1.1. IEEE Softw. 10, 18\u201327 (1993).","journal-title":"IEEE Softw."},{"key":"906_CR50","doi-asserted-by":"publisher","unstructured":"Center for Science of Science and Innovation. SciSciNet-v2. Hugging Face https:\/\/doi.org\/10.57967\/HF\/5692 (2025).","DOI":"10.57967\/HF\/5692"},{"key":"906_CR51","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2203150119","volume":"119","author":"N Breznau","year":"2022","unstructured":"Breznau, N. et al. Observing many researchers using the same data and hypothesis reveals a hidden universe of uncertainty. Proc. Natl Acad. Sci. USA 119, e2203150119 (2022).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"906_CR52","doi-asserted-by":"crossref","unstructured":"Otis, N. G., Delecourt, S., Cranney, K. & Koning, R. Global Evidence on Gender Gaps and Generative AI (Harvard Business School, 2024).","DOI":"10.31219\/osf.io\/h6a7c"},{"key":"906_CR53","doi-asserted-by":"publisher","first-page":"2281","DOI":"10.1038\/s41562-024-02020-5","volume":"8","author":"J Gao","year":"2024","unstructured":"Gao, J. & Wang, D. Quantifying the use and potential benefits of artificial intelligence in scientific research. Nat. Hum. Behav. 8, 2281\u20132292 (2024).","journal-title":"Nat. Hum. Behav."},{"key":"906_CR54","doi-asserted-by":"publisher","unstructured":"Yin, Y. SciSciNet: a large-scale open data lake for the science of science research. Figshare https:\/\/doi.org\/10.6084\/M9.FIGSHARE.C.6076908.V1 (2023).","DOI":"10.6084\/M9.FIGSHARE.C.6076908.V1"},{"key":"906_CR55","doi-asserted-by":"publisher","first-page":"58","DOI":"10.1162\/dint_a_00006","volume":"1","author":"H Wan","year":"2019","unstructured":"Wan, H., Zhang, Y., Zhang, J. & Tang, J. AMiner: search and mining of academic social networks. Data Intell. 1, 58\u201376 (2019).","journal-title":"Data Intell."},{"key":"906_CR56","unstructured":"Lopez, P. et al. GROBID. Github https:\/\/github.com\/kermitt2\/grobid (2025)."},{"key":"906_CR57","unstructured":"Ganesan, H. S. LLM-Research-Scripts. GitHub https:\/\/github.com\/harishsg993010\/LLM-Research-Scripts (2025)."},{"key":"906_CR58","unstructured":"OpenAI et al. OpenAI o1 system card. Preprint at https:\/\/arxiv.org\/10.48550\/arXiv.2412.16720 (2024)."},{"key":"906_CR59","doi-asserted-by":"crossref","unstructured":"Guo, D. et al. DeepSeek-R1 incentivizes reasoning in LLMs through reinforcement learning. Nature 645, 633\u2013638 (2025).","DOI":"10.1038\/s41586-025-09422-z"},{"key":"906_CR60","doi-asserted-by":"crossref","unstructured":"Liu, N. F. et al. Lost in the middle: how language models use long contexts. Trans. Assoc. Comput. Linguist. 12, 157\u2013173 (2024).","DOI":"10.1162\/tacl_a_00638"},{"key":"906_CR61","doi-asserted-by":"crossref","unstructured":"Zhao, J. et al. LONGAGENT: achieving question answering for 128k-token-long documents through multi-agent collaboration. In Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (eds Al-Onaizan, Y. et al.) 16310\u201316324 (Association for Computational Linguistics, 2024).","DOI":"10.18653\/v1\/2024.emnlp-main.912"},{"key":"906_CR62","first-page":"17682","volume":"38","author":"M Besta","year":"2024","unstructured":"Besta, M. et al. Graph of thoughts: solving elaborate problems with large language models. Proc. AAAI Conf. Artif. Intell. 38, 17682\u201317690 (2024).","journal-title":"Proc. AAAI Conf. Artif. Intell."},{"key":"906_CR63","doi-asserted-by":"crossref","unstructured":"Fan, W. et al. A Survey on RAG meeting LLMs: towards retrieval-augmented large language models. In Proc. 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining 6491\u20136501 (2024).","DOI":"10.1145\/3637528.3671470"},{"key":"906_CR64","unstructured":"Gao, Y. et al. Retrieval-augmented generation for large language models: a survey. Preprint at https:\/\/arxiv.org\/10.48550\/arXiv.2312.10997 (2024)."},{"key":"906_CR65","first-page":"e35179","volume":"15","author":"H Alkaissi","year":"2023","unstructured":"Alkaissi, H. & McFarlane, S. I. Artificial hallucinations in ChatGPT: implications in scientific writing. Cureus 15, e35179 (2023).","journal-title":"Cureus"},{"key":"906_CR66","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1093\/jla\/laae003","volume":"16","author":"M Dahl","year":"2024","unstructured":"Dahl, M., Magesh, V., Suzgun, M. & Ho, D. E. Large legal fictions: profiling legal hallucinations in large language models. J. Leg. Anal. 16, 64\u201393 (2024).","journal-title":"J. Leg. Anal."},{"key":"906_CR67","unstructured":"Evans, O. et al. Truthful AI: developing and governing AI that does not lie. Preprint at https:\/\/arxiv.org\/10.48550\/arXiv.2110.06674 (2021)."},{"key":"906_CR68","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3571730","volume":"55","author":"Z Ji","year":"2023","unstructured":"Ji, Z. et al. Survey of hallucination in natural language generation. ACM Comput. Surv. 55, 1\u201338 (2023).","journal-title":"ACM Comput. Surv."},{"key":"906_CR69","doi-asserted-by":"crossref","unstructured":"Ji, Z. et al. Towards mitigating LLM hallucination via self reflection. In Findings of the Association for Computational Linguistics: EMNLP 2023 (eds Bouamor, H. et al.) 1827\u20131843 (Association for Computational Linguistics, 2023).","DOI":"10.18653\/v1\/2023.findings-emnlp.123"},{"key":"906_CR70","unstructured":"Cheng, J. et al. Dated data: tracing knowledge cutoffs in large language models. in The First Conference on Language Modeling (2024)."},{"key":"906_CR71","unstructured":"Lewis, P. et al. Retrieval-augmented generation for knowledge-intensive NLP tasks. In Proc. 34th International Conference on Neural Information Processing Systems (Curran Associates Inc., 2020)."},{"key":"906_CR72","unstructured":"Karpas, E. et al. MRKL systems: a modular, neuro-symbolic architecture that combines large language models, external knowledge sources and discrete reasoning. Preprint at https:\/\/arxiv.org\/10.48550\/arXiv.2205.00445 (2022)."},{"key":"906_CR73","doi-asserted-by":"crossref","unstructured":"Press, O. et al. Measuring and narrowing the compositionality gap in language models. in Findings of the Association for Computational Linguistics: EMNLP 2023 (eds Bouamor, H. et al.) 5687\u20135711 (Association for Computational Linguistics, 2023).","DOI":"10.18653\/v1\/2023.findings-emnlp.378"},{"key":"906_CR74","unstructured":"Asai, A., Wu, Z., Wang, Y., Sil, A. & Hajishirzi, H. Self-RAG: learning to retrieve, generate, and critique through self-reflection. in The Twelfth International Conference on Learning Representations (2024)."},{"key":"906_CR75","doi-asserted-by":"crossref","unstructured":"Gao, L., Ma, X., Lin, J. & Callan, J. Precise zero-shot dense retrieval without relevance labels. in Proc. 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) (eds Rogers, A. et al.) 1762\u20131777 (Association for Computational Linguistics, 2023).","DOI":"10.18653\/v1\/2023.acl-long.99"},{"key":"906_CR76","doi-asserted-by":"crossref","unstructured":"Yu, W. et al. Chain-of-note: enhancing robustness in retrieval-augmented language models. in Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (eds Al-Onaizan, Y. et al.) 14672\u201314685 (Association for Computational Linguistics, 2024).","DOI":"10.18653\/v1\/2024.emnlp-main.813"},{"key":"906_CR77","doi-asserted-by":"crossref","unstructured":"Sun, M. et al. A survey on large language model-based agents for statistics and data science. The American Statistician 1\u201314 (2025).","DOI":"10.1080\/00031305.2025.2561140"},{"key":"906_CR78","unstructured":"Qiao, B. et al. TaskWeaver: a code-first agent framework. Preprint at https:\/\/arxiv.org\/10.48550\/arXiv.2311.17541 (2024)."},{"key":"906_CR79","unstructured":"Zhang, W., Shen, Y., Lu, W. & Zhuang, Y. Data-copilot: bridging billions of data and humans with autonomous workflow. in ICLR 2024 Workshop on Large Language Model (LLM) Agents (2024)."},{"key":"906_CR80","unstructured":"Lu, C. et al. The AI Scientist: Towards Fully Automated Open-Ended Scientific Discovery. Preprint at https:\/\/arxiv.org\/10.48550\/arXiv.2408.06292 (2024)."},{"key":"906_CR81","doi-asserted-by":"crossref","unstructured":"Schmidgall, S. et al. Agent Laboratory: Using LLM Agents as Research Assistants. Preprint at https:\/\/arxiv.org\/10.48550\/arXiv.2501.04227 (2025).","DOI":"10.18653\/v1\/2025.findings-emnlp.320"},{"key":"906_CR82","doi-asserted-by":"publisher","unstructured":"Kellogg Center for Science of Science and Innovation. SciSciGPT-SciSciNet. Hugging Face https:\/\/doi.org\/10.57967\/HF\/6649 (2025).","DOI":"10.57967\/HF\/6649"},{"key":"906_CR83","doi-asserted-by":"publisher","unstructured":"Kellogg Center for Science of Science and Innovation. SciSciGPT-SciSciCorpus. Hugging Face https:\/\/doi.org\/10.57967\/HF\/6650 (2025).","DOI":"10.57967\/HF\/6650"},{"key":"906_CR84","doi-asserted-by":"publisher","unstructured":"Shao, E. Northwestern-CSSI\/SciSciGPT. Zenodo https:\/\/doi.org\/10.5281\/ZENODO.17271393 (2025).","DOI":"10.5281\/ZENODO.17271393"}],"container-title":["Nature Computational Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s43588-025-00906-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s43588-025-00906-6","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s43588-025-00906-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T23:02:41Z","timestamp":1774566161000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s43588-025-00906-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,9]]},"references-count":84,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2026,3]]}},"alternative-id":["906"],"URL":"https:\/\/doi.org\/10.1038\/s43588-025-00906-6","relation":{},"ISSN":["2662-8457"],"issn-type":[{"value":"2662-8457","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,9]]},"assertion":[{"value":"6 April 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The study protocol was reviewed and approved by the Institutional Review Board of Northwestern University (no. STU00223588).","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}