{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T20:52:33Z","timestamp":1776804753504,"version":"3.51.2"},"reference-count":67,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T00:00:00Z","timestamp":1694649600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T00:00:00Z","timestamp":1694649600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/100001312","name":"Charles H. Revson Foundation","doi-asserted-by":"crossref","id":[{"id":"10.13039\/100001312","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["1948004"],"award-info":[{"award-number":["1948004"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Mach Intell"],"DOI":"10.1038\/s42256-023-00718-1","type":"journal-article","created":{"date-parts":[[2023,9,14]],"date-time":"2023-09-14T16:03:52Z","timestamp":1694707432000},"page":"952-964","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["Testing the limits of natural language models for predicting human language judgements"],"prefix":"10.1038","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7940-7473","authenticated-orcid":false,"given":"Tal","family":"Golan","sequence":"first","affiliation":[]},{"given":"Matthew","family":"Siegelman","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7433-9005","authenticated-orcid":false,"given":"Nikolaus","family":"Kriegeskorte","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3540-5019","authenticated-orcid":false,"given":"Christopher","family":"Baldassano","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,9,14]]},"reference":[{"key":"718_CR1","doi-asserted-by":"publisher","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart, D. E., Hinton, G. E. & Williams, R. J. Learning representations by back-propagating errors. Nature 323, 533\u2013536 (1986).","journal-title":"Nature"},{"key":"718_CR2","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S. & Schmidhuber, J. Long short-term memory. Neural Comput. 9, 1735\u20131780 (1997).","journal-title":"Neural Comput."},{"key":"718_CR3","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M., Lee, K. & Toutanova, K. BERT: pre-training of deep bidirectional transformers for language understanding. In Proc. 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (eds Burstein, J. et al.) 4171\u20134186 (Association for Computational Linguistics, 2019); https:\/\/doi.org\/10.18653\/v1\/n19-1423","DOI":"10.18653\/v1\/n19-1423"},{"key":"718_CR4","unstructured":"Liu, Y. et al. RoBERTa: a robustly optimized BERT pretraining approach. Preprint at https:\/\/arxiv.org\/abs\/1907.11692 (2019)."},{"key":"718_CR5","unstructured":"Conneau, A. & Lample, G. Cross-lingual language model pretraining. In Advances in Neural Information Processing Systems (eds Wallach, H. et al.) Vol. 32 (Curran Associates, 2019); https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/c04c19c2c2474dbf5f7ac4372c5b9af1-Paper.pdf"},{"key":"718_CR6","unstructured":"Clark, K., Luong, M., Le, Q. V. & Manning, C. D. ELECTRA: pre-training text encoders as discriminators rather than generators. In Proc. 8th International Conference on Learning Representations ICLR 2020 (ICLR, 2020); https:\/\/openreview.net\/forum?id=r1xMH1BtvB"},{"key":"718_CR7","unstructured":"Radford, A. et al. Language Models are Unsupervised Multitask Learners (OpenAI, 2019); https:\/\/cdn.openai.com\/better-language-models\/language_models_are_unsupervised_multitask_learners.pdf"},{"key":"718_CR8","doi-asserted-by":"publisher","unstructured":"Goodkind, A. & Bicknell, K. Predictive power of word surprisal for reading times is a linear function of language model quality. In Proc. 8th Workshop on Cognitive Modeling and Computational Linguistics, CMCL 2018 10\u201318 (Association for Computational Linguistics, 2018); https:\/\/doi.org\/10.18653\/v1\/W18-0102","DOI":"10.18653\/v1\/W18-0102"},{"key":"718_CR9","doi-asserted-by":"publisher","first-page":"107307","DOI":"10.1016\/j.neuropsychologia.2019.107307","volume":"138","author":"C Shain","year":"2020","unstructured":"Shain, C., Blank, I. A., Schijndel, M., Schuler, W. & Fedorenko, E. fMRI reveals language-specific predictive coding during naturalistic sentence comprehension. Neuropsychologia 138, 107307 (2020).","journal-title":"Neuropsychologia"},{"key":"718_CR10","doi-asserted-by":"publisher","first-page":"803","DOI":"10.1016\/j.cub.2018.01.080","volume":"28","author":"MP Broderick","year":"2018","unstructured":"Broderick, M. P., Anderson, A. J., Di Liberto, G. M., Crosse, M. J. & Lalor, E. C. Electrophysiological correlates of semantic dissimilarity reflect the comprehension of natural, narrative speech. Curr. Biol. 28, 803\u2013809 (2018).","journal-title":"Curr. Biol."},{"key":"718_CR11","doi-asserted-by":"publisher","first-page":"369","DOI":"10.1038\/s41593-022-01026-4","volume":"25","author":"A Goldstein","year":"2022","unstructured":"Goldstein, A. et al. Shared computational principles for language processing in humans and deep language models. Nat. Neurosci. 25, 369\u2013380 (2022).","journal-title":"Nat. Neurosci."},{"key":"718_CR12","doi-asserted-by":"publisher","first-page":"1202","DOI":"10.1111\/cogs.12414","volume":"41","author":"JH Lau","year":"2017","unstructured":"Lau, J. H., Clark, A. & Lappin, S. Grammaticality, acceptability and probability: a probabilistic view of linguistic knowledge. Cogn. Sci. 41, 1202\u20131241 (2017).","journal-title":"Cogn. Sci."},{"key":"718_CR13","first-page":"296","volume":"8","author":"JH Lau","year":"2020","unstructured":"Lau, J. H., Armendariz, C., Lappin, S., Purver, M. & Shu, C. How furiously can colorless green ideas sleep? Sentence acceptability in context. Trans. Assoc. Comput. Ling. 8, 296\u2013310 (2020).","journal-title":"Trans. Assoc. Comput. Ling."},{"key":"718_CR14","unstructured":"Wang, A. et al. GLUE: a multi-task benchmark and analysis platform for natural language understanding. In Proc. 7th International Conference on Learning Representations, ICLR 2019 (ICLR, 2019); https:\/\/openreview.net\/forum?id=rJ4km2R5t7"},{"key":"718_CR15","unstructured":"Wang, A. et al. SuperGLUE: a stickier benchmark for general-purpose language understanding systems. In Advances in Neural Information Processing Systems (eds Wallach, H. et al.) 3266\u20133280 (Curran Associates, 2019); https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/4496bf24afe7fab6f046bf4923da8de6-Paper.pdf"},{"key":"718_CR16","first-page":"377","volume":"8","author":"A Warstadt","year":"2020","unstructured":"Warstadt, A. et al. BLiMP: the benchmark of linguistic minimal pairs for English. Trans. Assoc. Comput. Ling. 8, 377\u2013392 (2020).","journal-title":"Trans. Assoc. Comput. Ling."},{"key":"718_CR17","doi-asserted-by":"publisher","unstructured":"Kiela, D. et al. Dynabench: rethinking benchmarking in NLP. In Proc. 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies 4110\u20134124 (Association for Computational Linguistics, 2021); https:\/\/doi.org\/10.18653\/v1\/2021.naacl-main.324","DOI":"10.18653\/v1\/2021.naacl-main.324"},{"key":"718_CR18","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1080\/00401706.1967.10490441","volume":"9","author":"GEP Box","year":"1967","unstructured":"Box, G. E. P. & Hill, W. J. Discrimination among mechanistic models. Technometrics 9, 57\u201371 (1967).","journal-title":"Technometrics"},{"key":"718_CR19","doi-asserted-by":"publisher","first-page":"29330","DOI":"10.1073\/pnas.1912334117","volume":"117","author":"T Golan","year":"2020","unstructured":"Golan, T., Raju, P. C. & Kriegeskorte, N. Controversial stimuli: pitting neural networks against each other as models of human cognition. Proc. Natl Acad. Sci. USA 117, 29330\u201329337 (2020).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"718_CR20","doi-asserted-by":"publisher","first-page":"547","DOI":"10.3758\/BF03211196","volume":"14","author":"DV Cross","year":"1973","unstructured":"Cross, D. V. Sequential dependencies and regression in psychophysical judgments. Perception Psychophys. 14, 547\u2013552 (1973).","journal-title":"Perception Psychophys."},{"key":"718_CR21","doi-asserted-by":"publisher","first-page":"551","DOI":"10.3758\/BF03211601","volume":"48","author":"HJ Foley","year":"1990","unstructured":"Foley, H. J., Cross, D. V. & O\u2019reilly, J. A. Pervasiveness and magnitude of context effects: evidence for the relativity of absolute magnitude estimation. Perception Psychophys. 48, 551\u2013558 (1990).","journal-title":"Perception Psychophys."},{"key":"718_CR22","doi-asserted-by":"publisher","first-page":"285","DOI":"10.1016\/j.tics.2015.03.002","volume":"19","author":"FH Petzschner","year":"2015","unstructured":"Petzschner, F. H., Glasauer, S. & Stephan, K. E. A Bayesian perspective on magnitude estimation. Trends Cogn. Sci. 19, 285\u2013293 (2015).","journal-title":"Trends Cogn. Sci."},{"key":"718_CR23","doi-asserted-by":"publisher","first-page":"5","DOI":"10.1515\/ling.1977.15.187.5","volume":"15","author":"S Greenbaum","year":"1977","unstructured":"Greenbaum, S. Contextual influence on acceptability judgments. Linguistics 15, 5\u201312 (1977).","journal-title":"Linguistics"},{"key":"718_CR24","doi-asserted-by":"publisher","unstructured":"Sch\u00fctze, C. T. & Sprouse, J. in Research Methods in Linguistics (eds Podesva, R. J. & Sharma, D.) 27\u201350 (Cambridge Univ. Press, 2014); https:\/\/doi.org\/10.1017\/CBO9781139013734.004","DOI":"10.1017\/CBO9781139013734.004"},{"key":"718_CR25","doi-asserted-by":"publisher","first-page":"14","DOI":"10.5334\/gjgl.236","volume":"2","author":"J Sprouse","year":"2017","unstructured":"Sprouse, J. & Almeida, D. Design sensitivity and statistical power in acceptability judgment experiments. Glossa 2, 14 (2017).","journal-title":"Glossa"},{"key":"718_CR26","doi-asserted-by":"publisher","first-page":"2017","DOI":"10.1162\/jocn_a_01544","volume":"33","author":"GW Lindsay","year":"2021","unstructured":"Lindsay, G. W. Convolutional neural networks as a model of the visual system: past, present and future. J. Cogn. Neurosci. 33, 2017\u20132031 (2021).","journal-title":"J. Cogn. Neurosci."},{"key":"718_CR27","doi-asserted-by":"publisher","unstructured":"Wehbe, L., Vaswani, A., Knight, K. & Mitchell, T. Aligning context-based statistical models of language with brain activity during reading. In Proc. 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP) 233\u2013243 (Association for Computational Linguistics, 2014); https:\/\/doi.org\/10.3115\/v1\/D14-1030","DOI":"10.3115\/v1\/D14-1030"},{"key":"718_CR28","unstructured":"Toneva, M. & Wehbe, L. Interpreting and improving natural-language processing (in machines) with natural language-processing (in the brain). In Advances in Neural Information Processing Systems (eds Wallach, H. et al.) Vol. 32 (Curran Associates, 2019); https:\/\/proceedings.neurips.cc\/paper\/2019\/file\/749a8e6c231831ef7756db230b4359c8-Paper.pdf"},{"key":"718_CR29","doi-asserted-by":"publisher","first-page":"2201968119","DOI":"10.1073\/pnas.2201968119","volume":"119","author":"M Heilbron","year":"2022","unstructured":"Heilbron, M., Armeni, K., Schoffelen, J.-M., Hagoort, P. & De Lange, F. P. A hierarchy of linguistic predictions during natural language comprehension. Proc. Natl Acad. Sci. USA 119, 2201968119 (2022).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"718_CR30","unstructured":"Jain, S. et al. Interpretable multi-timescale models for predicting fMRI responses to continuous natural speech. In Advances in Neural Information Processing Systems (eds Larochelle, H. et al.) Vol. 33, 13738\u201313749 (Curran Associates, 2020); https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2020\/file\/9e9a30b74c49d07d8150c8c83b1ccf07-Paper.pdf"},{"key":"718_CR31","unstructured":"Lyu, B., Marslen-Wilson, W. D., Fang, Y. & Tyler, L. K. Finding structure in time: humans, machines and language. Preprint at https:\/\/www.biorxiv.org\/content\/10.1101\/2021.10.25.465687v2 (2021)."},{"key":"718_CR32","doi-asserted-by":"publisher","first-page":"2105646118","DOI":"10.1073\/pnas.2105646118","volume":"118","author":"M Schrimpf","year":"2021","unstructured":"Schrimpf, M. et al. The neural architecture of language: integrative modeling converges on predictive processing. Proc. Natl Acad. Sci. USA 118, 2105646118 (2021).","journal-title":"Proc. Natl Acad. Sci. USA"},{"key":"718_CR33","doi-asserted-by":"publisher","unstructured":"Wilcox, E., Vani, P. & Levy, R. A targeted assessment of incremental processing in neural language models and humans. In Proc. 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers) 939\u2013952 (Association for Computational Linguistics, 2021); https:\/\/doi.org\/10.18653\/v1\/2021.acl-long.76","DOI":"10.18653\/v1\/2021.acl-long.76"},{"key":"718_CR34","doi-asserted-by":"publisher","first-page":"134","DOI":"10.1038\/s42003-022-03036-1","volume":"5","author":"C Caucheteux","year":"2022","unstructured":"Caucheteux, C. & King, J.-R. Brains and algorithms partially converge in natural language processing. Commun. Biol. 5, 134 (2022).","journal-title":"Commun. Biol."},{"key":"718_CR35","doi-asserted-by":"crossref","unstructured":"Arehalli, S., Dillon, B. & Linzen, T. Syntactic surprisal from neural models predicts, but underestimates, human processing difficulty from syntactic ambiguities. In Proc. 26th Conference on Computational Natural Language Learning (CoNLL) 301\u2013313 (Association for Computational Linguistics, 2022); https:\/\/aclanthology.org\/2022.conll-1.20","DOI":"10.18653\/v1\/2022.conll-1.20"},{"key":"718_CR36","doi-asserted-by":"publisher","unstructured":"Merkx, D. & Frank, S. L. Human sentence processing: recurrence or attention? In Proc. Workshop on Cognitive Modeling and Computational Linguistics 12\u201322 (Association for Computational Linguistics, 2021); https:\/\/doi.org\/10.18653\/v1\/2021.cmcl-1.2","DOI":"10.18653\/v1\/2021.cmcl-1.2"},{"key":"718_CR37","unstructured":"Michaelov, J. A., Bardolph, M. D., Coulson, S. & Bergen, B. K. Different kinds of cognitive plausibility: why are transformers better than RNNs at predicting N400 amplitude? In Proc. Annual Meeting of the Cognitive Science Society Vol. 43 (2021); https:\/\/escholarship.org\/uc\/item\/9z06m20f"},{"key":"718_CR38","unstructured":"Rakocevic, L. I. Synthesizing controversial sentences for testing the brain-predictivity of language models. PhD thesis, Massachusetts Institute of Technology (2021); https:\/\/hdl.handle.net\/1721.1\/130713"},{"key":"718_CR39","doi-asserted-by":"publisher","first-page":"818","DOI":"10.1016\/j.tics.2016.08.005","volume":"20","author":"ND Goodman","year":"2016","unstructured":"Goodman, N. D. & Frank, M. C. Pragmatic language interpretation as probabilistic inference. Trends Cogn. Sci. 20, 818\u2013829 (2016).","journal-title":"Trends Cogn. Sci."},{"key":"718_CR40","doi-asserted-by":"publisher","first-page":"258","DOI":"10.1016\/j.jml.2005.03.002","volume":"53","author":"SR Howell","year":"2005","unstructured":"Howell, S. R., Jankowicz, D. & Becker, S. A model of grounded language acquisition: sensorimotor features improve lexical and grammatical learning. J. Mem. Lang. 53, 258\u2013276 (2005).","journal-title":"J. Mem. Lang."},{"key":"718_CR41","unstructured":"Szegedy, C. et al. Intriguing properties of neural networks. Preprint at http:\/\/arxiv.org\/abs\/1312.6199 (2013)."},{"key":"718_CR42","unstructured":"Goodfellow, I. J., Shlens, J. & Szegedy, C. Explaining and harnessing adversarial examples. In Proc. 3rd International Conference on Learning Representations, ICLR 2015, Conference Track Proceedings (2015); http:\/\/arxiv.org\/abs\/1412.6572"},{"key":"718_CR43","first-page":"1","volume":"11","author":"WE Zhang","year":"2020","unstructured":"Zhang, W. E., Sheng, Q. Z., Alhazmi, A. & Li, C. Adversarial attacks on deep-learning models in natural language processing: a survey. ACM Trans. Intell. Syst. Technol. 11, 1\u201341 (2020).","journal-title":"ACM Trans. Intell. Syst. Technol."},{"key":"718_CR44","doi-asserted-by":"publisher","unstructured":"Liang, B. et al. Deep text classification can be fooled. In Proc. Twenty-Seventh International Joint Conference on Artificial Intelligence, IJCAI-18 4208\u20134215 (International Joint Conferences on Artificial Intelligence Organization, 2018); https:\/\/doi.org\/10.24963\/ijcai.2018\/585","DOI":"10.24963\/ijcai.2018\/585"},{"key":"718_CR45","doi-asserted-by":"publisher","unstructured":"Ebrahimi, J., Rao, A., Lowd, D. & Dou, D. HotFlip: white-box adversarial examples for text classification. In Proc. 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers) 31\u201336 (Association for Computational Linguistics, 2018); https:\/\/doi.org\/10.18653\/v1\/P18-2006","DOI":"10.18653\/v1\/P18-2006"},{"key":"718_CR46","doi-asserted-by":"publisher","unstructured":"Abdou, M. et al. The sensitivity of language models and humans to Winograd schema perturbations. In Proc. 58th Annual Meeting of the Association for Computational Linguistics 7590\u20137604 (Association for Computational Linguistics, 2020); https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.679","DOI":"10.18653\/v1\/2020.acl-main.679"},{"key":"718_CR47","doi-asserted-by":"publisher","unstructured":"Alzantot, M. et al. Generating natural language adversarial examples. In Proc. 2018 Conference on Empirical Methods in Natural Language Processing 2890\u20132896 (Association for Computational Linguistics, 2018); https:\/\/doi.org\/10.18653\/v1\/D18-1316","DOI":"10.18653\/v1\/D18-1316"},{"key":"718_CR48","doi-asserted-by":"publisher","unstructured":"Ribeiro, M. T., Singh, S. & Guestrin, C. Semantically equivalent adversarial rules for debugging NLP models. In Proc. 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) 856\u2013865 (Association for Computational Linguistics, 2018); https:\/\/doi.org\/10.18653\/v1\/P18-1079","DOI":"10.18653\/v1\/P18-1079"},{"key":"718_CR49","doi-asserted-by":"publisher","unstructured":"Ren, S., Deng, Y., He, K. & Che, W. Generating natural language adversarial examples through probability weighted word saliency. In Proc. 57th Annual Meeting of the Association for Computational Linguistics 1085\u20131097 (Association for Computational Linguistics, 2019); https:\/\/doi.org\/10.18653\/v1\/P19-1103","DOI":"10.18653\/v1\/P19-1103"},{"key":"718_CR50","doi-asserted-by":"publisher","unstructured":"Morris, J., Lifland, E., Lanchantin, J., Ji, Y. & Qi, Y. Reevaluating adversarial examples in natural language. In Findings of the Association for Computational Linguistics: EMNLP 2020 3829\u20133839 (Association for Computational Linguistics, 2020); https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.341","DOI":"10.18653\/v1\/2020.findings-emnlp.341"},{"key":"718_CR51","first-page":"387","volume":"7","author":"E Wallace","year":"2019","unstructured":"Wallace, E., Rodriguez, P., Feng, S., Yamada, I. & Boyd-Graber, J. Trick me if you can: human-in-the-loop generation of adversarial examples for question answering. Trans. Assoc. Comput. Ling. 7, 387\u2013401 (2019).","journal-title":"Trans. Assoc. Comput. Ling."},{"key":"718_CR52","doi-asserted-by":"publisher","unstructured":"Perez, E. et al. Red teaming language models with language models. In Proc.of the 2022 Conference on Empirical Methods in Natural Language Processing 3419\u20133448 (Association for Computational Linguistics, 2022); https:\/\/doi.org\/10.18653\/v1\/2022.emnlp-main.225","DOI":"10.18653\/v1\/2022.emnlp-main.225"},{"key":"718_CR53","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/S0010-0277(98)00034-1","volume":"68","author":"E Gibson","year":"1998","unstructured":"Gibson, E. Linguistic complexity: locality of syntactic dependencies. Cognition 68, 1\u201376 (1998).","journal-title":"Cognition"},{"key":"718_CR54","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1016\/0024-3841(75)90046-7","volume":"37","author":"WC Watt","year":"1975","unstructured":"Watt, W. C. The indiscreteness with which impenetrables are penetrated. Lingua 37, 95\u2013128 (1975).","journal-title":"Lingua"},{"key":"718_CR55","doi-asserted-by":"publisher","unstructured":"Sch\u00fctze, C. T. The Empirical Base of Linguistics, Classics in Linguistics Vol. 2 (Language Science Press, 2016); https:\/\/doi.org\/10.17169\/langsci.b89.100","DOI":"10.17169\/langsci.b89.100"},{"key":"718_CR56","unstructured":"Bird, S., Klein, E. & Loper, E. Natural Language Processing with Python: Analyzing Text with the Natural Language Toolkit (\u2018O\u2019Reilly Media, 2009)."},{"key":"718_CR57","unstructured":"Paszke, A. et al. PyTorch: an imperative style, high-performance deep learning library. In Advances in Neural Information Processing Systems (eds Wallach, H. et al.) Vol. 32, 8024\u20138035 (Curran Associates, 2019); http:\/\/papers.neurips.cc\/paper\/9015-pytorch-an-imperative-style-high-performance-deep-learning-library.pdf"},{"key":"718_CR58","doi-asserted-by":"publisher","unstructured":"Wolf, T. et al. Transformers: state-of-the-art natural language processing. In Proc. 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations 38\u201345 (Association for Computational Linguistics, 2020); https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-demos.6","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"718_CR59","doi-asserted-by":"publisher","unstructured":"Yamakoshi, T., Griffiths, T. & Hawkins, R. Probing BERT\u2019s priors with serial reproduction chains. In Findings of the Association for Computational Linguistics, ACL 2022 3977\u20133992 (Association for Computational Linguistics, 2022); https:\/\/doi.org\/10.18653\/v1\/2022.findings-acl.314","DOI":"10.18653\/v1\/2022.findings-acl.314"},{"key":"718_CR60","unstructured":"Chestnut, S. Perplexity https:\/\/drive.google.com\/uc?export=download&id=1gSNfGQ6LPxlNctMVwUKrQpUA7OLZ83PW (accessed 23 September 2022)."},{"key":"718_CR61","doi-asserted-by":"publisher","first-page":"1176","DOI":"10.1080\/17470218.2013.850521","volume":"67","author":"WJB Heuven","year":"2014","unstructured":"Heuven, W. J. B., Mandera, P., Keuleers, E. & Brysbaert, M. Subtlex-UK: a new and improved word frequency database for British English. Q. J. Exp. Psychol. 67, 1176\u20131190 (2014).","journal-title":"Q. J. Exp. Psychol."},{"key":"718_CR62","doi-asserted-by":"publisher","first-page":"8","DOI":"10.1167\/8.12.8","volume":"8","author":"Z Wang","year":"2008","unstructured":"Wang, Z. & Simoncelli, E. P. Maximum differentiation (MAD) competition: a methodology for comparing computational models of perceptual quantities. J. Vision 8, 8 (2008).","journal-title":"J. Vision"},{"key":"718_CR63","doi-asserted-by":"crossref","first-page":"289","DOI":"10.1111\/j.2517-6161.1995.tb02031.x","volume":"57","author":"Y Benjamini","year":"1995","unstructured":"Benjamini, Y. & Hochberg, Y. Controlling the false discovery rate: a practical and powerful approach to multiple testing. J. R. Stat. Soc. B (Methodol.) 57, 289\u2013300 (1995).","journal-title":"J. R. Stat. Soc. B"},{"key":"718_CR64","doi-asserted-by":"publisher","unstructured":"Wang, A. & Cho, K. BERT has a mouth, and it must speak: BERT as a Markov random field language model. In Proc. Workshop on Methods for Optimizing and Evaluating Neural Language Generation 30\u201336 (Association for Computational Linguistics, 2019); https:\/\/doi.org\/10.18653\/v1\/W19-2304","DOI":"10.18653\/v1\/W19-2304"},{"key":"718_CR65","unstructured":"Cho, K. BERT has a mouth and must speak, but it is not an MRF https:\/\/kyunghyuncho.me\/bert-has-a-mouth-and-must-speak-but-it-is-not-an-mrf\/ (accessed 28 September 2022)."},{"key":"718_CR66","doi-asserted-by":"publisher","unstructured":"Salazar, J., Liang, D., Nguyen, T. Q. & Kirchhoff, K. Masked language model scoring. In Proc. 58th Annual Meeting of the Association for Computational Linguistics 2699\u20132712 (Association for Computational Linguistics, 2020); https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.240","DOI":"10.18653\/v1\/2020.acl-main.240"},{"key":"718_CR67","doi-asserted-by":"publisher","unstructured":"Golan, T., Siegelman, M., Kriegeskorte, N. & Baldassano, C. Code and data for \u2018Testing the limits of natural language models for predicting human language judgments\u2019 (Zenodo, 2023); https:\/\/doi.org\/10.5281\/zenodo.8147166","DOI":"10.5281\/zenodo.8147166"}],"container-title":["Nature Machine Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00718-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00718-1","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00718-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T02:52:29Z","timestamp":1730083949000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s42256-023-00718-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,9,14]]},"references-count":67,"journal-issue":{"issue":"9","published-online":{"date-parts":[[2023,9]]}},"alternative-id":["718"],"URL":"https:\/\/doi.org\/10.1038\/s42256-023-00718-1","relation":{},"ISSN":["2522-5839"],"issn-type":[{"value":"2522-5839","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,9,14]]},"assertion":[{"value":"2 June 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 August 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 September 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}