{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T07:33:05Z","timestamp":1775115185906,"version":"3.50.1"},"reference-count":92,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T00:00:00Z","timestamp":1732579200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T00:00:00Z","timestamp":1732579200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Mach Intell"],"DOI":"10.1038\/s42256-024-00925-4","type":"journal-article","created":{"date-parts":[[2024,11,26]],"date-time":"2024-11-26T10:03:51Z","timestamp":1732615431000},"page":"1467-1477","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":33,"title":["Contextual feature extraction hierarchies converge in large language models and the brain"],"prefix":"10.1038","volume":"6","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4776-3518","authenticated-orcid":false,"given":"Gavin","family":"Mischler","sequence":"first","affiliation":[]},{"given":"Yinghao Aaron","family":"Li","sequence":"additional","affiliation":[]},{"given":"Stephan","family":"Bickel","sequence":"additional","affiliation":[]},{"given":"Ashesh D.","family":"Mehta","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2987-759X","authenticated-orcid":false,"given":"Nima","family":"Mesgarani","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,26]]},"reference":[{"key":"925_CR1","unstructured":"Toneva, M. & Wehbe, L. Interpreting and improving natural-language processing (in machines) with natural language-processing (in the brain). In Advances in Neural Information Processing Systems 32 (eds Wallach, H. et al.) (NeurIPS, 2019)."},{"key":"925_CR2","doi-asserted-by":"crossref","unstructured":"Abnar, S., Beinborn, L., Choenni, R. & Zuidema, W. Blackbox meets blackbox: representational similarity and stability analysis of neural language models and brains. Preprint at https:\/\/arxiv.org\/abs\/1906.01539 (2019).","DOI":"10.18653\/v1\/W19-4820"},{"key":"925_CR3","doi-asserted-by":"publisher","first-page":"e2105646118","DOI":"10.1073\/pnas.2105646118","volume":"118","author":"M Schrimpf","year":"2021","unstructured":"Schrimpf, M. et al. The neural architecture of language: integrative modeling converges on predictive processing. Proc. Natl Acad. Sci. USA 118, e2105646118 (2021).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"925_CR4","doi-asserted-by":"publisher","first-page":"43","DOI":"10.1162\/nol_a_00137","volume":"5","author":"EA Hosseini","year":"2024","unstructured":"Hosseini, E. A. et al. Artificial neural network language models predict human brain responses to language even after a developmentally realistic amount of training. Neurobiol. Lang. 5, 43\u201363 (2024).","journal-title":"Neurobiol. Lang."},{"key":"925_CR5","doi-asserted-by":"publisher","first-page":"4100","DOI":"10.1523\/JNEUROSCI.1152-20.2021","volume":"41","author":"AJ Anderson","year":"2021","unstructured":"Anderson, A. J. et al. Deep artificial neural networks reveal a distributed cortical network encoding propositional sentence-level meaning. J. Neurosci. 41, 4100\u20134119 (2021).","journal-title":"J. Neurosci."},{"key":"925_CR6","unstructured":"Caucheteux, C., Gramfort, A. & King, J.-R. Disentangling syntax and semantics in the brain with deep networks. In Proc. 38th International Conference on Machine Learning (eds Meila, M. & Zhang, T.) 1336\u20131348 (Proceedings of Machine Learning Research, 2021)."},{"key":"925_CR7","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1038\/s42003-022-03036-1","volume":"5","author":"C Caucheteux","year":"2022","unstructured":"Caucheteux, C. & King, J.-R. \u0301 Brains and algorithms partially converge in natural language processing. Commun. Biol. 5, 134 (2022).","journal-title":"Commun. Biol."},{"key":"925_CR8","doi-asserted-by":"publisher","first-page":"589","DOI":"10.1109\/TNNLS.2020.3027595","volume":"32","author":"J Sun","year":"2020","unstructured":"Sun, J., Wang, S., Zhang, J. & Zong, C. Neural encoding and decoding with distributed sentence representations. IEEE Trans. Neural Networks Learn. Syst. 32, 589\u2013603 (2020).","journal-title":"IEEE Trans. Neural Networks Learn. Syst."},{"key":"925_CR9","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1038\/s41593-022-01026-4","volume":"25","author":"A Goldstein","year":"2022","unstructured":"Goldstein, A. et al. Shared computational principles for language processing in humans and deep language models. Nat. Neurosci. 25, 369\u2013380 (2022).","journal-title":"Nat. Neurosci."},{"key":"925_CR10","doi-asserted-by":"publisher","first-page":"430","DOI":"10.1038\/s41562-022-01516-2","volume":"7","author":"C Caucheteux","year":"2023","unstructured":"Caucheteux, C., Gramfort, A. & King, J.-R. \u0301 Evidence of a predictive coding hierarchy in the human brain listening to speech. Nat. Hum. Behav. 7, 430\u2013441 (2023).","journal-title":"Nat. Hum. Behav."},{"key":"925_CR11","first-page":"21895","volume":"36","author":"R Antonello","year":"2023","unstructured":"Antonello, R., Vaidya, A. & Huth, A. G. Scaling laws for language encoding models in fmri. Adv. Neural Inf. Process. Syst. 36, 21895\u201321907 (2023).","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"925_CR12","first-page":"64","volume":"5","author":"R Antonello","year":"2024","unstructured":"Antonello, R. & Huth, A. Predictive coding or just feature discovery? An alternative account of why language models fit brain data. Neurobiol. Lang. 5, 64\u201379 (2024).","journal-title":"Neurobiol. Lang."},{"key":"925_CR13","doi-asserted-by":"publisher","first-page":"393","DOI":"10.1038\/nrn2113","volume":"8","author":"G Hickok","year":"2007","unstructured":"Hickok, G. & Poeppel, D. The cortical organization of speech processing. Nat. Rev. Neurosci. 8, 393\u2013402 (2007).","journal-title":"Nat. Rev. Neurosci."},{"key":"925_CR14","doi-asserted-by":"publisher","first-page":"2539","DOI":"10.1523\/JNEUROSCI.5487-07.2008","volume":"28","author":"U Hasson","year":"2008","unstructured":"Hasson, U., Yang, E., Vallines, I., Heeger, D. J. & Rubin, N. A hierarchy of temporal receptive windows in human cortex. J. Neurosci. 28, 2539\u20132550 (2008).","journal-title":"J. Neurosci."},{"key":"925_CR15","doi-asserted-by":"publisher","first-page":"2906","DOI":"10.1523\/JNEUROSCI.3684-10.2011","volume":"31","author":"Y Lerner","year":"2011","unstructured":"Lerner, Y., Honey, C. J., Silbert, L. J. & Hasson, U. Topographic mapping of a hierarchy of temporal receptive windows using a narrated story. J. Neurosci. 31, 2906\u20132915 (2011).","journal-title":"J. Neurosci."},{"key":"925_CR16","doi-asserted-by":"publisher","first-page":"481","DOI":"10.3389\/fnhum.2017.00481","volume":"11","author":"N Ding","year":"2017","unstructured":"Ding, N. et al. Characterizing neural entrainment to hierarchical linguistic units using electroencephalography (EEG). Front. Hum. Neurosci. 11, 481 (2017).","journal-title":"Front. Hum. Neurosci."},{"key":"925_CR17","doi-asserted-by":"crossref","unstructured":"Ethayarajh, K. How contextual are contextualized word representations? Comparing the geometry of BERT, ELMo, and GPT-2 embeddings. Preprint at https:\/\/arxiv.org\/abs\/1909.00512 (2019).","DOI":"10.18653\/v1\/D19-1006"},{"key":"925_CR18","doi-asserted-by":"crossref","unstructured":"Tenney, I., Das, D. & Pavlick, E. BERT rediscovers the classical NLP pipeline. In Proc. 57th Annual Meeting of the Association for Computational Linguistics (eds Korhonen, A.) 4593\u20134601 (Association for Computational Linguistics, 2019).","DOI":"10.18653\/v1\/P19-1452"},{"key":"925_CR19","unstructured":"Touvron, H. et al. Llama 2: open foundation and fine-tuned chat models. Preprint at https:\/\/arxiv.org\/abs\/2307.09288 (2023)."},{"key":"925_CR20","first-page":"65","volume":"6","author":"S Holm","year":"1979","unstructured":"Holm, S. A simple sequentially rejective multiple test procedure. Scand. J. Stat. 6, 65\u201370 (1979).","journal-title":"Scand. J. Stat."},{"key":"925_CR21","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1016\/j.neuron.2012.08.011","volume":"76","author":"CJ Honey","year":"2012","unstructured":"Honey, C. J. et al. Slow cortical dynamics and the accumulation of information over long timescales. Neuron 76, 423\u2013434 (2012).","journal-title":"Neuron"},{"key":"925_CR22","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2209307119","volume":"119","author":"CHC Chang","year":"2022","unstructured":"Chang, C. H. C., Nastase, S. A. & Hasson, U. Information flow across the cortical timescale hierarchy during narrative construction. Proc. Natl Acad. Sci. USA 119, e2209307119 (2022).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"925_CR23","doi-asserted-by":"publisher","first-page":"761","DOI":"10.1016\/j.conb.2011.05.027","volume":"21","author":"TO Sharpee","year":"2011","unstructured":"Sharpee, T. O., Atencio, C. A. & Schreiner, C. E. Hierarchical representations in the auditory cortex. Curr. Opin. Neurobiol. 21, 761\u2013767 (2011).","journal-title":"Curr. Opin. Neurobiol."},{"key":"925_CR24","doi-asserted-by":"publisher","first-page":"684","DOI":"10.1006\/nimg.2000.0715","volume":"13","author":"P Morosan","year":"2001","unstructured":"Morosan, P. et al. Human primary auditory cortex: cytoarchitectonic subdivisions and mapping into a spatial reference system. NeuroImage 13, 684\u2013701 (2001).","journal-title":"NeuroImage"},{"key":"925_CR25","doi-asserted-by":"publisher","first-page":"11","DOI":"10.3389\/fnsys.2013.00011","volume":"7","author":"S Baumann","year":"2013","unstructured":"Baumann, S., Petkov, C. I. & Griffiths, T. D. A unified framework for the organization of the primate auditory cortex. Front. Syst. Neurosci. 7, 11 (2013).","journal-title":"Front. Syst. Neurosci."},{"key":"925_CR26","doi-asserted-by":"publisher","first-page":"e2005127","DOI":"10.1371\/journal.pbio.2005127","volume":"16","author":"SV Norman-Haignere","year":"2018","unstructured":"Norman-Haignere, S. V. & McDermott, J. H. Neural responses to natural and model-matched stimuli reveal distinct computations in primary and nonprimary auditory cortex. PLoS Biol. 16, e2005127 (2018).","journal-title":"PLoS Biol."},{"key":"925_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuroimage.2022.119819","volume":"266","author":"G Mischler","year":"2023","unstructured":"Mischler, G., Keshishian, M., Bickel, S., Mehta, A. D. & Mesgarani, N. Deep neural networks effectively model neural adaptation to changing background noise and suggest nonlinear noise filtering methods in auditory cortex. NeuroImage 266, 119819 (2023).","journal-title":"NeuroImage"},{"key":"925_CR28","doi-asserted-by":"publisher","first-page":"5523","DOI":"10.1038\/s41467-024-49173-5","volume":"15","author":"S Kumar","year":"2024","unstructured":"Kumar, S. et al. Shared functional specialization in transformer-based language models and the human brain. Nat. Commun. 15, 5523 (2024).","journal-title":"Nat. Commun."},{"key":"925_CR29","doi-asserted-by":"publisher","first-page":"103013","DOI":"10.1016\/j.isci.2021.103013","volume":"24","author":"S Nonaka","year":"2021","unstructured":"Nonaka, S., Majima, K., Aoki, S. C. & Kamitani, Y. Brain hierarchy score: which deep neural networks are hierarchically brain-like? iScience 24, 103013 (2021).","journal-title":"iScience"},{"key":"925_CR30","unstructured":"Kornblith, S., Norouzi, M., Lee, H. & Hinton, G. Similarity of neural network representations revisited. In Proc. 36th International Conference on Machine Learning (eds Chaudhuri, K. & Salakhutdinov, R.) 3519\u20133529 (Proceedings of Machine Learning Research, 2019)."},{"key":"925_CR31","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-022-20460-9","volume":"12","author":"C Caucheteux","year":"2022","unstructured":"Caucheteux, C., Gramfort, A. & King, J.-R. \u0301 Deep language algorithms predict semantic comprehension from brain activity. Sci. Rep. 12, 16327 (2022).","journal-title":"Sci. Rep."},{"key":"925_CR32","doi-asserted-by":"publisher","first-page":"799","DOI":"10.1002\/hbm.20221","volume":"27","author":"SG Costafreda","year":"2006","unstructured":"Costafreda, S. G. et al. A systematic review and quantitative appraisal of fMRI studies of verbal fluency: role of the left inferior frontal gyrus. Hum. Brain Mapp. 27, 799\u2013810 (2006).","journal-title":"Hum. Brain Mapp."},{"key":"925_CR33","doi-asserted-by":"publisher","first-page":"2914","DOI":"10.1523\/JNEUROSCI.2271-19.2020","volume":"40","author":"S Arana","year":"2020","unstructured":"Arana, S., Marquand, A., Hulte\u0301n, A., Hagoort, P. & Schoffe-len, J.-M. Sensory modality-independent activation of the brain network for language. J. Neurosci. 40, 2914\u20132924 (2020).","journal-title":"J. Neurosci."},{"key":"925_CR34","doi-asserted-by":"publisher","first-page":"3232","DOI":"10.1093\/cercor\/bhy191","volume":"29","author":"J Sheng","year":"2019","unstructured":"Sheng, J. et al. The cortical maps of hierarchical linguistic structures during speech perception. Cereb. Cortex 29, 3232\u20133240 (2019).","journal-title":"Cereb. Cortex"},{"key":"925_CR35","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1038\/s41562-023-01520-0","volume":"7","author":"M Keshishian","year":"2023","unstructured":"Keshishian, M. et al. Joint, distributed and hierarchically organized encoding of linguistic features in the human auditory cortex. Nat. Hum. Behav. 7, 740\u2013753 (2023).","journal-title":"Nat. Hum. Behav."},{"key":"925_CR36","doi-asserted-by":"publisher","first-page":"664","DOI":"10.1038\/s41593-023-01285-9","volume":"26","author":"BL Giordano","year":"2023","unstructured":"Giordano, B. L., Esposito, M., Valente, G. & Formisano, E. Intermediate acoustic-to-semantic representations link behavioral and neural responses to natural sounds. Nat. Neurosci. 26, 664\u2013672 (2023).","journal-title":"Nat. Neurosci."},{"key":"925_CR37","doi-asserted-by":"publisher","first-page":"e3002366","DOI":"10.1371\/journal.pbio.3002366","volume":"21","author":"G Tuckute","year":"2023","unstructured":"Tuckute, G., Feather, J., Boebinger, D. & McDermott, J. H. Many but not all deep neural network audio models capture brain responses and exhibit correspondence between model stages and brain regions. PLoS Biol. 21, e3002366 (2023).","journal-title":"PLoS Biol."},{"key":"925_CR38","doi-asserted-by":"publisher","first-page":"417","DOI":"10.1146\/annurev-vision-082114-035447","volume":"1","author":"N Kriegeskorte","year":"2015","unstructured":"Kriegeskorte, N. Deep neural networks: a new framework for modeling biological vision and brain information processing. Annu. Rev. Vision Sci. 1, 417\u2013446 (2015).","journal-title":"Annu. Rev. Vision Sci."},{"key":"925_CR39","doi-asserted-by":"publisher","DOI":"10.1038\/srep27755","volume":"6","author":"RM Cichy","year":"2016","unstructured":"Cichy, R. M., Khosla, A., Pantazis, D., Torralba, A. & Oliva, A. Comparison of deep neural networks to spatio-temporal cortical dynamics of human visual object recognition reveals hierarchical correspondence. Sci. Rep. 6, 27755 (2016).","journal-title":"Sci. Rep."},{"key":"925_CR40","doi-asserted-by":"publisher","DOI":"10.1126\/sciadv.abm2219","volume":"8","author":"NJ Sexton","year":"2022","unstructured":"Sexton, N. J. & Love, B. C. Reassessing hierarchical correspondences between brain and deep networks through direct interface. Sci. Adv. 8, eabm2219 (2022).","journal-title":"Sci. Adv."},{"key":"925_CR41","doi-asserted-by":"publisher","first-page":"4","DOI":"10.3389\/fncom.2017.00004","volume":"11","author":"T Horikawa","year":"2017","unstructured":"Horikawa, T. & Kamitani, Y. Hierarchical neural representation of dreamed objects revealed by brain decoding with deep neural network features. Front. Comput. Neurosci. 11, 4 (2017).","journal-title":"Front. Comput. Neurosci."},{"key":"925_CR42","unstructured":"Vaswani, A. et al. Attention is all you need. In Advances in Neural Information Processing Systems 30 (eds Guyon, I. et al.) (NeurIPS, 2017)."},{"key":"925_CR43","unstructured":"O\u2019Connor, J. & Andreas, J. What context features can transformer language models use? Preprint at https:\/\/arxiv.org\/abs\/2106.08367 (2021)."},{"key":"925_CR44","doi-asserted-by":"crossref","unstructured":"Clark, K., Khandelwal, U., Levy, O. & Manning, C. D. What does BERT look at? An analysis of BERT\u2019s attention. Preprint at https:\/\/arxiv.org\/abs\/1906.04341 (2019).","DOI":"10.18653\/v1\/W19-4828"},{"key":"925_CR45","unstructured":"Skrill, D. & Norman-Haignere, S. V. Large language models transition from integrating across position-yoked, exponential windows to structure-yoked, power-law windows. In Proc. 37th International Conference on Neural Information Processing Systems (eds Oh, A. et al.) 638\u2013654 (Curran Associates, Inc., 2023)."},{"key":"925_CR46","doi-asserted-by":"publisher","first-page":"455","DOI":"10.1038\/s41562-021-01261-y","volume":"6","author":"SV Norman-Haignere","year":"2022","unstructured":"Norman-Haignere, S. V. et al. Multiscale temporal integration organizes hierarchical computation in human auditory cortex. Nat. Hum. Behav. 6, 455\u2013469 (2022).","journal-title":"Nat. Hum. Behav."},{"key":"925_CR47","doi-asserted-by":"publisher","first-page":"6539","DOI":"10.1523\/JNEUROSCI.3267-16.2017","volume":"37","author":"WA de Heer","year":"2017","unstructured":"de Heer, W. A., Huth, A. G., Griffiths, T. L., Gallant, J. L. & Theunissen, F. E. The hierarchical cortical organization of human speech processing. J. Neurosci. 37, 6539\u20136557 (2017).","journal-title":"J. Neurosci."},{"key":"925_CR48","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuroimage.2020.117586","volume":"227","author":"GM Di Liberto","year":"2021","unstructured":"Di Liberto, G. M. et al. Neural representation of linguistic feature hierarchy reflects second-language proficiency. NeuroImage 227, 117586 (2021).","journal-title":"NeuroImage"},{"key":"925_CR49","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-39872-w","volume":"14","author":"XL Gong","year":"2023","unstructured":"Gong, X. L. et al. Phonemic segmentation of narrative speech in human cerebral cortex. Nat. Commun. 14, 4309 (2023).","journal-title":"Nat. Commun."},{"key":"925_CR50","doi-asserted-by":"publisher","DOI":"10.3389\/fnhum.2021.672665","volume":"15","author":"MV Ivanova","year":"2021","unstructured":"Ivanova, M. V., Zhong, A., Turken, A., Baldo, J. V. & Dronkers, N. F. Functional contributions of the arcuate fasciculus to language processing. Front. Hum. Neurosci. 15, 672665 (2021).","journal-title":"Front. Hum. Neurosci."},{"key":"925_CR51","doi-asserted-by":"publisher","first-page":"3529","DOI":"10.1093\/brain\/aws222","volume":"135","author":"AS Dick","year":"2012","unstructured":"Dick, A. S. & Tremblay, P. Beyond the arcuate fasciculus: consensus and controversy in the connectional anatomy of language. Brain 135, 3529\u20133550 (2012).","journal-title":"Brain"},{"key":"925_CR52","doi-asserted-by":"publisher","first-page":"593","DOI":"10.1016\/j.neuroimage.2010.04.270","volume":"56","author":"NN Oosterhof","year":"2011","unstructured":"Oosterhof, N. N., Wiestler, T., Downing, P. E. & Diedrichsen, J. A comparison of volume-based and surface-based multi-voxel pattern analysis. NeuroImage 56, 593\u2013600 (2011).","journal-title":"NeuroImage"},{"key":"925_CR53","unstructured":"Naveed, H. et al. A comprehensive overview of large language models. Preprint at https:\/\/arxiv.org\/abs\/2307.06435 (2023)."},{"key":"925_CR54","first-page":"5485","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C. et al. Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21, 5485\u20135551 (2020).","journal-title":"J. Mach. Learn. Res."},{"key":"925_CR55","unstructured":"Lee, K. et al. Deduplicating training data makes language models better. Preprint at https:\/\/arxiv.org\/abs\/2107.06499 (2021)."},{"key":"925_CR56","doi-asserted-by":"publisher","first-page":"453","DOI":"10.1038\/nature17637","volume":"532","author":"AG Huth","year":"2016","unstructured":"Huth, A. G., De Heer, W. A., Griffiths, T. L., Theunissen, F. E. & Gallant, J. L. Natural speech reveals the semantic maps that tile human cerebral cortex. Nature 532, 453\u2013458 (2016).","journal-title":"Nature"},{"key":"925_CR57","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-023-42087-8","volume":"14","author":"E Murphy","year":"2023","unstructured":"Murphy, E. et al. The spatiotemporal dynamics of semantic integration in the human brain. Nat. Commun. 14, 6336 (2023).","journal-title":"Nat. Commun."},{"key":"925_CR58","unstructured":"Xiong, W. et al. Effective long-context scaling of foundation models. Preprint at https:\/\/arxiv.org\/abs\/2309.16039 (2023)."},{"key":"925_CR59","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1162\/tacl_a_00638","volume":"12","author":"NF Liu","year":"2024","unstructured":"Liu, N. F. et al. Lost in the middle: how language models use long contexts. Trans. Assoc. Comput. Linguist. 12, 157\u2013173 (2024).","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"925_CR60","doi-asserted-by":"publisher","first-page":"707","DOI":"10.1017\/S0140525X00081061","volume":"13","author":"S Pinker","year":"1990","unstructured":"Pinker, S. & Bloom, P. Natural language and natural selection. Behav. Brain Sci. 13, 707\u2013727 (1990).","journal-title":"Behav. Brain Sci."},{"key":"925_CR61","unstructured":"Deacon, T. W. The Symbolic Species: The Co-Evolution of Language and the Brain (W. W. Norton & Company, 1997)."},{"key":"925_CR62","doi-asserted-by":"publisher","first-page":"573","DOI":"10.1080\/23273798.2018.1499946","volume":"35","author":"LS Hamilton","year":"2020","unstructured":"Hamilton, L. S. & Huth, A. G. The revolution will not be controlled: natural stimuli in speech neuroscience. Lang. Cognit. Neurosci. 35, 573\u2013582 (2020).","journal-title":"Lang. Cognit. Neurosci."},{"key":"925_CR63","doi-asserted-by":"publisher","first-page":"377","DOI":"10.1152\/jn.90954.2008","volume":"102","author":"E Edwards","year":"2009","unstructured":"Edwards, E. et al. Comparison of time\u2013frequency responses and the event-related potential to auditory speech stimuli in human cortex. J. Neurophysiol. 102, 377\u2013386 (2009).","journal-title":"J. Neurophysiol."},{"key":"925_CR64","doi-asserted-by":"publisher","first-page":"e1000610","DOI":"10.1371\/journal.pbio.1000610","volume":"9","author":"S Ray","year":"2011","unstructured":"Ray, S. & Maunsell, J. H. R. Different origins of gamma rhythm and high-gamma activity in macaque visual cortex. PLoS Biol. 9, e1000610 (2011).","journal-title":"PLoS Biol."},{"key":"925_CR65","doi-asserted-by":"publisher","first-page":"610","DOI":"10.1093\/cercor\/bhm094","volume":"18","author":"M Steinschneider","year":"2008","unstructured":"Steinschneider, M., Fishman, Y. I. & Arezzo, J. C. Spectrotemporal analysis of evoked and induced electroencephalographic responses in primary auditory cortex (A1) of the awake monkey. Cereb. Cortex 18, 610\u2013625 (2008).","journal-title":"Cereb. Cortex"},{"key":"925_CR66","doi-asserted-by":"publisher","first-page":"1006","DOI":"10.1126\/science.1245994","volume":"343","author":"N Mesgarani","year":"2014","unstructured":"Mesgarani, N., Cheung, C., Johnson, K. & Chang, E. F. Phonetic feature encoding in human superior temporal gyrus. Science 343, 1006\u20131010 (2014).","journal-title":"Science"},{"key":"925_CR67","doi-asserted-by":"publisher","first-page":"327","DOI":"10.1038\/nature11911","volume":"495","author":"KE Bouchard","year":"2013","unstructured":"Bouchard, K. E., Mesgarani, N., Johnson, K. & Chang, E. F. Functional organization of human sensorimotor cortex for speech articulation. Nature 495, 327\u2013332 (2013).","journal-title":"Nature"},{"key":"925_CR68","unstructured":"Wolf, T. et al. HuggingFace\u2019s transformers: state-of-the-art natural language processing. Preprint at https:\/\/arxiv.org\/abs\/1910.03771 (2019)."},{"key":"925_CR69","doi-asserted-by":"publisher","unstructured":"Gao, L. et al. A framework for few-shot language model evaluation (v0.0.1). Zenodo https:\/\/doi.org\/10.5281\/zenodo.5371629 (2021).","DOI":"10.5281\/zenodo.5371629"},{"key":"925_CR70","doi-asserted-by":"crossref","unstructured":"Rajpurkar, P., Jia, R. & Liang, P. Know what you don\u2019t know: unanswerable questions for squad. Preprint at https:\/\/arxiv.org\/abs\/1806.03822 (2018).","DOI":"10.18653\/v1\/P18-2124"},{"key":"925_CR71","unstructured":"Clark, C. et al. BoolQ: exploring the surprising difficulty of natural yes\/no questions. Preprint at https:\/\/arxiv.org\/abs\/1905.10044 (2019)."},{"key":"925_CR72","doi-asserted-by":"crossref","unstructured":"Mihaylov, T., Clark, P., Khot, T. & Sabharwal, A. Can a suit of armor conduct electricity? A new dataset for open book question answering. Preprint at https:\/\/arxiv.org\/abs\/1809.02789 (2018).","DOI":"10.18653\/v1\/D18-1260"},{"key":"925_CR73","doi-asserted-by":"crossref","unstructured":"Bisk, Y. et al. PIQA: reasoning about physical commonsense in natural language. Proc. AAAI Conference on Artificial Intelligence 34, 7432\u20137439 (2020).","DOI":"10.1609\/aaai.v34i05.6239"},{"key":"925_CR74","doi-asserted-by":"crossref","unstructured":"Zellers, R., Holtzman, A., Bisk, Y., Farhadi, A. & Choi, Y. HellaSwag: can a machine really finish your sentence? Preprint at https:\/\/arxiv.org\/abs\/1905.07830 (2019).","DOI":"10.18653\/v1\/P19-1472"},{"key":"925_CR75","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1145\/3474381","volume":"64","author":"K Sakaguchi","year":"2021","unstructured":"Sakaguchi, K., Le Bras, R., Bhagavatula, C. & Choi, Y. WinoGrande: an adversarial winograd schema challenge at scale. Commun. ACM 64, 99\u2013106 (2021).","journal-title":"Commun. ACM"},{"key":"925_CR76","first-page":"2825","volume":"12","author":"F Pedregosa","year":"2011","unstructured":"Pedregosa, F. et al. Scikit-learn: machine learning in python. J. Mach. Learn. Res. 12, 2825\u20132830 (2011).","journal-title":"J. Mach. Learn. Res."},{"key":"925_CR77","doi-asserted-by":"publisher","first-page":"40","DOI":"10.1016\/j.jneumeth.2017.01.022","volume":"281","author":"DM Groppe","year":"2017","unstructured":"Groppe, D. M. et al. iELVis: an open source MATLAB toolbox for localizing and visualizing human intracranial electrode data. J. Neurosci. Methods 281, 40\u201348 (2017).","journal-title":"J. Neurosci. Methods"},{"key":"925_CR78","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1093\/cercor\/bhg087","volume":"14","author":"B Fischl","year":"2004","unstructured":"Fischl, B. et al. Automatically parcellating the human cerebral cortex. Cereb. Cortex 14, 11\u201322 (2004).","journal-title":"Cereb. Cortex"},{"key":"925_CR79","doi-asserted-by":"publisher","unstructured":"Margulies, D. S., Falkiewicz, M. & Huntenburg, J. M. A cortical surface-based geodesic distance package for Python. GigaScience 5, https:\/\/doi.org\/10.1186\/s13742-016-0147-0-q (2016).","DOI":"10.1186\/s13742-016-0147-0-q"},{"key":"925_CR80","doi-asserted-by":"publisher","unstructured":"Mischler, G., Aaron Li, Y., Bickel, S., Mehta, A. D. & Mesgarani, N. Contextual feature extraction hierarchies converge in large language models and the brain. Code Ocean https:\/\/doi.org\/10.24433\/CO.0003780.v1 (2024).","DOI":"10.24433\/CO.0003780.v1"},{"key":"925_CR81","doi-asserted-by":"publisher","first-page":"100541","DOI":"10.1016\/j.simpa.2023.100541","volume":"17","author":"G Mischler","year":"2023","unstructured":"Mischler, G., Raghavan, V., Keshishian, M. & Mesgarani, N. Naplib-python: neural acoustic data processing and analysis tools in python. Softw. Impacts 17, 100541 (2023).","journal-title":"Softw. Impacts"},{"key":"925_CR82","unstructured":"Taylor, R. et al. Galactica: a large language model for science. Preprint at https:\/\/arxiv.org\/abs\/2211.09085 (2022)."},{"key":"925_CR83","unstructured":"Dey, N. et al. Cerebras-GPT: open compute-optimal language models trained on the Cerebras wafer-scale cluster. Preprint at https:\/\/arxiv.org\/abs\/2304.03208 (2023)."},{"key":"925_CR84","unstructured":"Biderman, S. et al. Pythia: a suite for analyzing large language models across training and scaling. In Proc. 40th International Conference on Machine Learning (eds Krause, A. et al.) 2397\u20132430 (Proceedings of Machine Learning Research, 2023)."},{"key":"925_CR85","unstructured":"Zhang, S. et al. Opt: open pre-trained transformer language models. Preprint at https:\/\/arxiv.org\/abs\/2205.01068 (2022)."},{"key":"925_CR86","unstructured":"Artetxe, M. et al. Efficient large scale language modeling with mixtures of experts. Preprint at https:\/\/arxiv.org\/abs\/2112.10684 (2021)."},{"key":"925_CR87","unstructured":"LAION. LeoLM: Linguistically Enhanced Open Language Model. Hugging Face https:\/\/huggingface.co\/LeoLM\/leo-hessianai-13b (accessed 1 October 2023)."},{"key":"925_CR88","unstructured":"MosaicML NLP Team. Introducing MPT-7B: A New Standard for Open-Source, Commercially Usable LLMs. DataBricks (May, 2023) www.mosaicml.com\/blog\/mpt-7b"},{"key":"925_CR89","unstructured":"Almazrouei, E. et al. The falcon series of open language models. Preprint at https:\/\/arxiv.org\/abs\/2311.16867 (2023)."},{"key":"925_CR90","unstructured":"Touvron, H. et al. LlaMA: open and efficient foundation language models. Preprint at https:\/\/arxiv.org\/abs\/2302.13971 (2023)."},{"key":"925_CR91","unstructured":"Xwin-LM: Powerful, Stable, and Reproducible LLM Alignment. Hugging Face https:\/\/huggingface.co\/Xwin-LM\/Xwin-LM-7B-V0.2 (accessed 1 October 2023)."},{"key":"925_CR92","unstructured":"Jiang, A. Q. et al. Mistral 7b. Preprint at https:\/\/arxiv.org\/abs\/2310.06825 (2023)."}],"container-title":["Nature Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s42256-024-00925-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-024-00925-4","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-024-00925-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,18]],"date-time":"2024-12-18T00:02:51Z","timestamp":1734480171000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s42256-024-00925-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,26]]},"references-count":92,"journal-issue":{"issue":"12","published-online":{"date-parts":[[2024,12]]}},"alternative-id":["925"],"URL":"https:\/\/doi.org\/10.1038\/s42256-024-00925-4","relation":{},"ISSN":["2522-5839"],"issn-type":[{"value":"2522-5839","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,26]]},"assertion":[{"value":"30 January 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}