{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T07:13:35Z","timestamp":1774077215271,"version":"3.50.1"},"reference-count":26,"publisher":"Association for Natural Language Processing","issue":"1","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Journal of Natural Language Processing"],"published-print":{"date-parts":[[2026]]},"DOI":"10.5715\/jnlp.33.109","type":"journal-article","created":{"date-parts":[[2026,3,14]],"date-time":"2026-03-14T22:12:51Z","timestamp":1773526371000},"page":"109-131","source":"Crossref","is-referenced-by-count":0,"title":["Inducing Grammatical Knowledge from Indirect Evidence in Language Models","\u8a00\u8a9e\u30e2\u30c7\u30eb\u306e\u9593\u63a5\u8a3c\u62e0\u304b\u3089\u306e\u6587\u6cd5\u77e5\u8b58\u306e\u7372\u5f97"],"prefix":"10.5715","volume":"33","author":[{"given":"Miyu","family":"Oba","sequence":"first","affiliation":[{"name":"Nara Institute of Science and Technology (NAIST)"}]},{"given":"Yohei","family":"Oseki","sequence":"additional","affiliation":[{"name":"The University of Tokyo"}]},{"given":"Akiyo","family":"Fukatsu","sequence":"additional","affiliation":[{"name":"The University of Tokyo"}]},{"given":"Akari","family":"Haga","sequence":"additional","affiliation":[{"name":"Nara Institute of Science and Technology (NAIST)"}]},{"given":"Hiroki","family":"Ouchi","sequence":"additional","affiliation":[{"name":"Nara Institute of Science and Technology (NAIST)"}]},{"given":"Taro","family":"Watanabe","sequence":"additional","affiliation":[{"name":"Nara Institute of Science and Technology (NAIST)"}]},{"given":"Saku","family":"Sugawara","sequence":"additional","affiliation":[{"name":"National Institute of Informatics"}]}],"member":"3685","reference":[{"key":"1","doi-asserted-by":"crossref","unstructured":"Ambridge, B., Kidd, E., Rowland, C. F., and Theakston, A. L. (2015). \u201cThe Ubiquity of Frequency Effects in First Language Acquisition.\u201d <i>Journal of Child Language<\/i>, 42 (2), pp. 239\u2013273.","DOI":"10.1017\/S030500091400049X"},{"key":"2","doi-asserted-by":"crossref","unstructured":"Berko, J. (1958). \u201cThe Child\u2019s Learning of English Morphology.\u201d <i>WORD<\/i>, 14 (2-3), pp. 150\u2013177.","DOI":"10.1080\/00437956.1958.11659661"},{"key":"3","doi-asserted-by":"crossref","unstructured":"Chomsky, N. (1993). <i>Lectures on Government and Binding: The Pisa Lectures<\/i>. De Gruyter Mouton, Berlin, New York.","DOI":"10.1515\/9783110884166"},{"key":"4","doi-asserted-by":"crossref","unstructured":"Giulianelli, M., Harding, J., Mohnert, F., Hupkes, D., and Zuidema, W. (2018). \u201cUnder the Hood: Using Diagnostic Classifiers to Investigate and Improve how Language Models Track Agreement Information.\u201d In Linzen, T., Chrupa\u019aa, G., and Alishahi, A. (Eds.), <i>Proceedings of the 2018 EMNLP Workshop BlackboxNLP: Analyzing and Interpreting Neural Networks for NLP<\/i>, pp. 240\u2013248, Brussels, Belgium. Association for Computational Linguistics.","DOI":"10.18653\/v1\/W18-5426"},{"key":"5","doi-asserted-by":"crossref","unstructured":"Hu, J., Gauthier, J., Qian, P., Wilcox, E., and Levy, R. (2020). \u201cA Systematic Assessment of Syntactic Generalization in Neural Language Models.\u201d In Jurafsky, D., Chai, J., Schluter, N., and Tetreault, J. (Eds.), <i>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics<\/i>, pp. 1725\u20131744, Online. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2020.acl-main.158"},{"key":"6","doi-asserted-by":"crossref","unstructured":"Huebner, P. A., Sulem, E., Cynthia, F., and Roth, D. (2021). \u201cBabyBERTa: Learning More Grammar With Small-Scale Child-Directed Language.\u201d In Bisazza, A. and Abend, O. (Eds.), <i>Proceedings of the 25th Conference on Computational Natural Language Learning<\/i>, pp. 624\u2013646, Online. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2021.conll-1.49"},{"key":"7","unstructured":"Leong, C. S.-Y. and Linzen, T. (2024). \u201cTesting Learning Hypotheses Using Neural Networks by Manipulating Learning Data.\u201d <i>arXiv preprint arXiv:2407.04593<\/i>."},{"key":"8","doi-asserted-by":"crossref","unstructured":"Li, B., Wisniewski, G., and Crabb\u00e9, B. (2023). \u201cAssessing the Capacity of Transformer to Abstract Syntactic Representations: A Contrastive Analysis Based on Long-distance Agreement.\u201d <i>Transactions of the Association for Computational Linguistics<\/i>, 11, pp. 18\u201333.","DOI":"10.1162\/tacl_a_00531"},{"key":"9","doi-asserted-by":"crossref","unstructured":"Linzen, T., Dupoux, E., and Goldberg, Y. (2016). \u201cAssessing the Ability of LSTMs to Learn Syntax-Sensitive Dependencies.\u201d <i>Transactions of the Association for Computational Linguistics<\/i>, 4, pp. 521\u2013535.","DOI":"10.1162\/tacl_a_00115"},{"key":"10","doi-asserted-by":"crossref","unstructured":"Marvin, R. and Linzen, T. (2018). \u201cTargeted Syntactic Evaluation of Language Models.\u201d In Riloff, E., Chiang, D., Hockenmaier, J., and Tsujii, J. (Eds.), <i>Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing<\/i>, pp. 1192\u20131202, Brussels, Belgium. Association for Computational Linguistics.","DOI":"10.18653\/v1\/D18-1151"},{"key":"11","doi-asserted-by":"crossref","unstructured":"McCoy, R. T., Frank, R., and Linzen, T. (2020). \u201cDoes Syntax Need to Grow on Trees? Sources of Hierarchical Inductive Bias in Sequence-to-Sequence Networks.\u201d <i>Transactions of the Association for Computational Linguistics<\/i>, 8, pp. 125\u2013140.","DOI":"10.1162\/tacl_a_00304"},{"key":"12","unstructured":"Meta (2024). \u201cThe Llama 3 Herd of Models.\u201d <i>arXiv preprint arXiv:2407.21783<\/i>."},{"key":"13","doi-asserted-by":"crossref","unstructured":"Misra, K. and Mahowald, K. (2024). \u201cLanguage Models Learn Rare Phenomena from Less Rare Phenomena: The Case of the Missing AANNs.\u201d <i>arXiv preprint arXiv:2403.19827<\/i>.","DOI":"10.18653\/v1\/2024.emnlp-main.53"},{"key":"14","doi-asserted-by":"crossref","unstructured":"Mueller, A., Frank, R., Linzen, T., Wang, L., and Schuster, S. (2022). \u201cColoring the Blank Slate: Pre-training Imparts a Hierarchical Inductive Bias to Sequence-to-sequence Models.\u201d In Muresan, S., Nakov, P., and Villavicencio, A. (Eds.), <i>Findings of the Association for Computational Linguistics: ACL 2022<\/i>, pp. 1352\u20131368, Dublin, Ireland. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2022.findings-acl.106"},{"key":"15","doi-asserted-by":"crossref","unstructured":"Oba, M., Oseki, Y., Fukatsu, A., Haga, A., Ouchi, H., Watanabe, T., and Sugawara, S. (2024). \u201cCan Language Models Induce Grammatical Knowledge from Indirect Evidence?\u201d In Al-Onaizan, Y., Bansal, M., and Chen, Y.-N. (Eds.), <i>Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing<\/i>, pp. 20591\u201320603, Miami, Florida, USA. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2024.emnlp-main.1146"},{"key":"16","unstructured":"OpenAI (2024). \u201cGPT-4 Technical Report.\u201d <i>arXiv preprint arXiv:2303.08774<\/i>."},{"key":"17","doi-asserted-by":"crossref","unstructured":"Patil, A., Jumelet, J., Chiu, Y. Y., Lapastora, A., Shen, P., Wang, L., Willrich, C., and Steinert-Threlkeld, S. (2024). \u201cFiltered Corpus Training (FiCT) Shows that Language Models can Generalize from Indirect Evidence.\u201d <i>arXiv preprint arXiv:2405.15750<\/i>.","DOI":"10.1162\/tacl_a_00720"},{"key":"18","doi-asserted-by":"crossref","unstructured":"Pearl, L. S. and Mis, B. (2016). \u201cTHE ROLE OF INDIRECT POSITIVE EVIDENCE IN SYNTACTIC ACQUISITION: A LOOK AT ANAPHORIC \u201cONE\u201d.\u201d <i>Language<\/i>, 92 (1), pp. 1\u201330.","DOI":"10.1353\/lan.2016.0006"},{"key":"19","doi-asserted-by":"crossref","unstructured":"Ri, R. and Tsuruoka, Y. (2022). \u201cPretraining with Artificial Language: Studying Transferable Knowledge in Language Models.\u201d In Muresan, S., Nakov, P., and Villavicencio, A. (Eds.), <i>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)<\/i>, pp. 7302\u20137315, Dublin, Ireland. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2022.acl-long.504"},{"key":"20","doi-asserted-by":"crossref","unstructured":"Salazar, J., Liang, D., Nguyen, T. Q., and Kirchhoff, K. (2020). \u201cMasked Language Model Scoring.\u201d In Jurafsky, D., Chai, J., Schluter, N., and Tetreault, J. (Eds.), <i>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics<\/i>, pp. 2699\u20132712, Online. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2020.acl-main.240"},{"key":"21","doi-asserted-by":"crossref","unstructured":"Ueda, N., Mita, M., Oka, T., and Komachi, M. (2024). \u201cToken-length Bias in Minimal-pair Paradigm Datasets.\u201d In Calzolari, N., Kan, M.-Y., Hoste, V., Lenci, A., Sakti, S., and Xue, N. (Eds.), <i>Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)<\/i>, pp. 16224\u201316236, Torino, Italia. ELRA and ICCL.","DOI":"10.63317\/38xk2sffxs5p"},{"key":"22","doi-asserted-by":"crossref","unstructured":"Warstadt, A., Mueller, A., Choshen, L., Wilcox, E., Zhuang, C., Ciro, J., Mosquera, R., Paranjabe, B., Williams, A., Linzen, T., and Cotterell, R. (2023). \u201cFindings of the BabyLM Challenge: Sample-Efficient Pretraining on Developmentally Plausible Corpora.\u201d In Warstadt, A., Mueller, A., Choshen, L., Wilcox, E., Zhuang, C., Ciro, J., Mosquera, R., Paranjabe, B., Williams, A., Linzen, T., and Cotterell, R. (Eds.), <i>Proceedings of the BabyLM Challenge at the 27th Conference on Computational Natural Language Learning<\/i>, pp. 1\u201334, Singapore. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2023.conll-babylm.1"},{"key":"23","doi-asserted-by":"crossref","unstructured":"Warstadt, A., Parrish, A., Liu, H., Mohananey, A., Peng, W., Wang, S.-F., and Bowman, S. R. (2020). \u201cBLiMP: The Benchmark of Linguistic Minimal Pairs for English.\u201d <i>Transactions of the Association for Computational Linguistics<\/i>, 8, pp. 377\u2013392.","DOI":"10.1162\/tacl_a_00321"},{"key":"24","doi-asserted-by":"crossref","unstructured":"Wei, J., Garrette, D., Linzen, T., and Pavlick, E. (2021). \u201cFrequency Effects on Syntactic Rule Learning in Transformers.\u201d In Moens, M.-F., Huang, X., Specia, L., and Yih, S. W.-t. (Eds.), <i>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing<\/i>, pp. 932\u2013948, Online and Punta Cana, Dominican Republic. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2021.emnlp-main.72"},{"key":"25","doi-asserted-by":"crossref","unstructured":"White, J. C. and Cotterell, R. (2021). \u201cExamining the Inductive Bias of Neural Language Models with Artificial Languages.\u201d In Zong, C., Xia, F., Li, W., and Navigli, R. (Eds.), <i>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)<\/i>, pp. 454\u2013463, Online. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2021.acl-long.38"},{"key":"26","doi-asserted-by":"crossref","unstructured":"Yu, C., Sie, R., Tedeschi, N., and Bergen, L. (2020). \u201cWord Frequency Does Not Predict Grammatical Knowledge in Language Models.\u201d In Webber, B., Cohn, T., He, Y., and Liu, Y. (Eds.), <i>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)<\/i>, pp. 4040\u20134054, Online. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2020.emnlp-main.331"}],"container-title":["Journal of Natural Language Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/jnlp\/33\/1\/33_109\/_pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,21]],"date-time":"2026-03-21T03:53:12Z","timestamp":1774065192000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.jstage.jst.go.jp\/article\/jnlp\/33\/1\/33_109\/_article\/-char\/ja\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":26,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026]]}},"URL":"https:\/\/doi.org\/10.5715\/jnlp.33.109","relation":{},"ISSN":["1340-7619","2185-8314"],"issn-type":[{"value":"1340-7619","type":"print"},{"value":"2185-8314","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}