{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T06:52:57Z","timestamp":1743058377085,"version":"3.40.3"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031332630"},{"type":"electronic","value":"9783031332647"}],"license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023]]},"DOI":"10.1007\/978-3-031-33264-7_1","type":"book-chapter","created":{"date-parts":[[2023,5,18]],"date-time":"2023-05-18T08:03:26Z","timestamp":1684397006000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Formal Languages and\u00a0the\u00a0NLP Black Box"],"prefix":"10.1007","author":[{"given":"William","family":"Merrill","sequence":"first","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,5,19]]},"reference":[{"key":"1_CR1","unstructured":"Ackerman, J., Cybenko, G.: A survey of neural networks and formal languages (2020)"},{"key":"1_CR2","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1007\/978-3-642-59136-5_3","volume-title":"Handbook of Formal Languages","author":"J-M Autebert","year":"1997","unstructured":"Autebert, J.-M., Berstel, J., Boasson, L.: Context-free languages and pushdown automata. In: Rozenberg, G., Salomaa, A. (eds.) Handbook of Formal Languages, pp. 111\u2013174. Springer, Heidelberg (1997). https:\/\/doi.org\/10.1007\/978-3-642-59136-5_3"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Bhattamishra, S., Ahuja, K., Goyal, N.: On the ability and limitations of transformers to recognize formal languages. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 7096\u20137116. Association for Computational Linguistics, November 2020. https:\/\/aclanthology.org\/2020.emnlp-main.576","DOI":"10.18653\/v1\/2020.emnlp-main.576"},{"key":"1_CR4","unstructured":"Chiang, D., Cholak, P., Pillay, A.: Tighter bounds on the expressivity of transformer encoders (2023)"},{"key":"1_CR5","doi-asserted-by":"crossref","unstructured":"Cho, K., van Merri\u00ebnboer, B., Bahdanau, D., Bengio, Y.: On the properties of neural machine translation: encoder-decoder approaches. In: Proceedings of SSST-8, Eighth Workshop on Syntax, Semantics and Structure in Statistical Translation, Doha, Qatar, pp. 103\u2013111. Association for Computational Linguistics, October 2014. https:\/\/aclanthology.org\/W14-4012","DOI":"10.3115\/v1\/W14-4012"},{"key":"1_CR6","unstructured":"Deletang, G., et al.: Neural networks and the chomsky hierarchy. In: The Eleventh International Conference on Learning Representations (2023). https:\/\/openreview.net\/forum?id=WbxHAzkeQcn"},{"key":"1_CR7","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), Minneapolis, Minnesota, pp. 4171\u20134186. Association for Computational Linguistics, June 2019. https:\/\/aclanthology.org\/N19-1423"},{"issue":"2","key":"1_CR8","doi-asserted-by":"publisher","first-page":"179","DOI":"10.1207\/s15516709cog1402_1","volume":"14","author":"JL Elman","year":"1990","unstructured":"Elman, J.L.: Finding structure in time. Cogn. Sci. 14(2), 179\u2013211 (1990)","journal-title":"Cogn. Sci."},{"key":"1_CR9","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1613\/jair.4992","volume":"57","author":"Y Goldberg","year":"2016","unstructured":"Goldberg, Y.: A primer on neural network models for natural language processing. J. Artif. Intell. Res. 57, 345\u2013420 (2016)","journal-title":"J. Artif. Intell. Res."},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"Hahn, M.: Theoretical limitations of self-attention in neural sequence models. Trans. Assoc. Comput. Linguist. 8, 156\u2013171 (2020). https:\/\/aclanthology.org\/2020.tacl-1.11","DOI":"10.1162\/tacl_a_00306"},{"key":"1_CR11","doi-asserted-by":"crossref","unstructured":"Hao, Y., Angluin, D., Frank, R.: Formal language recognition by hard attention transformers: perspectives from circuit complexity. Trans. Assoc. Comput. Linguist. 10, 800\u2013810 (2022). https:\/\/aclanthology.org\/2022.tacl-1.46","DOI":"10.1162\/tacl_a_00490"},{"issue":"8","key":"1_CR12","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"1_CR13","unstructured":"Liang, P., et al.: Holistic evaluation of language models (2022)"},{"key":"1_CR14","unstructured":"Lindner, D., Kram\u00e1r, J., Rahtz, M., McGrath, T., Mikulik, V.: Tracr: compiled transformers as a laboratory for interpretability (2023)"},{"key":"1_CR15","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1007\/BF02478259","volume":"5","author":"WS Mcculloch","year":"1943","unstructured":"Mcculloch, W.S., Pitts, W.: A logical calculus of the ideas immanent in nervous activity. Bull. Math. Biophys. 5, 115\u2013133 (1943). https:\/\/doi.org\/10.1007\/BF02478259","journal-title":"Bull. Math. Biophys."},{"key":"1_CR16","doi-asserted-by":"crossref","unstructured":"Merrill, W.: Sequential neural networks as automata. In: Proceedings of the Workshop on Deep Learning and Formal Languages: Building Bridges, Florence, pp. 1\u201313. Association for Computational Linguistics, August 2019. https:\/\/aclanthology.org\/W19-3901","DOI":"10.18653\/v1\/W19-3901"},{"key":"1_CR17","unstructured":"Merrill, W.: Formal language theory meets modern NLP (2021)"},{"key":"1_CR18","doi-asserted-by":"publisher","unstructured":"Merrill, W., Ramanujan, V., Goldberg, Y., Schwartz, R., Smith, N.A.: Effects of parameter norm growth during transformer training: inductive bias from gradient descent. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, Punta Cana, Dominican Republic, pp. 1766\u20131781. Association for Computational Linguistics, November 2021. https:\/\/doi.org\/10.18653\/v1\/2021.emnlp-main.133. https:\/\/aclanthology.org\/2021.emnlp-main.133","DOI":"10.18653\/v1\/2021.emnlp-main.133"},{"key":"1_CR19","doi-asserted-by":"crossref","unstructured":"Merrill, W., Sabharwal, A.: The parallelism tradeoff: limitations of log-precision transformers (2023)","DOI":"10.1162\/tacl_a_00562"},{"key":"1_CR20","unstructured":"Merrill, W., Sabharwal, A.: Transformers can be expressed in first-order logic with majority (2023)"},{"key":"1_CR21","doi-asserted-by":"crossref","unstructured":"Merrill, W., Sabharwal, A., Smith, N.A.: Saturated transformers are constant-depth threshold circuits. Trans. Assoc. Comput. Linguist. 10, 843\u2013856 (2022). https:\/\/aclanthology.org\/2022.tacl-1.49","DOI":"10.1162\/tacl_a_00493"},{"key":"1_CR22","doi-asserted-by":"publisher","unstructured":"Merrill, W., Weiss, G., Goldberg, Y., Schwartz, R., Smith, N.A., Yahav, E.: A formal hierarchy of RNN architectures. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 443\u2013459. Association for Computational Linguistics, July 2020. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.43. https:\/\/aclanthology.org\/2020.acl-main.43","DOI":"10.18653\/v1\/2020.acl-main.43"},{"key":"1_CR23","doi-asserted-by":"crossref","unstructured":"Mix Barrington, D.A., Immerman, N., Straubing, H.: On uniformity within NC1. J. Comput. Syst. Sci. 41(3), 274\u2013306 (1990). https:\/\/www.sciencedirect.com\/science\/article\/pii\/002200009090022D","DOI":"10.1016\/0022-0000(90)90022-D"},{"key":"1_CR24","doi-asserted-by":"crossref","unstructured":"Peters, M.E., et al.: Deep contextualized word representations. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers), New Orleans, Louisiana, pp. 2227\u20132237. Association for Computational Linguistics, June 2018. https:\/\/aclanthology.org\/N18-1202","DOI":"10.18653\/v1\/N18-1202"},{"key":"1_CR25","unstructured":"P\u00e9rez, J., Marinkovi\u0107, J., Barcel\u00f3, P.: On the turing completeness of modern neural network architectures. In: International Conference on Learning Representations (2019). https:\/\/openreview.net\/forum?id=HyGBdo0qFm"},{"key":"1_CR26","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I.: Language models are unsupervised multitask learners (2019)"},{"key":"1_CR27","doi-asserted-by":"crossref","unstructured":"Siegelmann, H., Sontag, E.: On the computational power of neural nets. J. Comput. Syst. Sci. 50(1), 132\u2013150 (1995). https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0022000085710136","DOI":"10.1006\/jcss.1995.1013"},{"key":"1_CR28","unstructured":"Srivastava, A., et al.: Beyond the imitation game: quantifying and extrapolating the capabilities of language models (2022)"},{"key":"1_CR29","doi-asserted-by":"publisher","unstructured":"Suzgun, M., Belinkov, Y., Shieber, S., Gehrmann, S.: LSTM networks can perform dynamic counting. In: Proceedings of the Workshop on Deep Learning and Formal Languages: Building Bridges, Florence, pp. 44\u201354. Association for Computational Linguistics, August 2019. https:\/\/doi.org\/10.18653\/v1\/W19-3905. https:\/\/aclanthology.org\/W19-3905","DOI":"10.18653\/v1\/W19-3905"},{"key":"1_CR30","doi-asserted-by":"crossref","unstructured":"Tenney, I., Das, D., Pavlick, E.: BERT rediscovers the classical NLP pipeline. In: Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, Florence, Italy, pp. 4593\u20134601. Association for Computational Linguistics, July 2019. https:\/\/aclanthology.org\/P19-1452","DOI":"10.18653\/v1\/P19-1452"},{"key":"1_CR31","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems, vol. 30 (2017)"},{"key":"1_CR32","doi-asserted-by":"crossref","unstructured":"Warstadt, A., Zhang, Y., Li, X., Liu, H., Bowman, S.R.: Learning which features matter: RoBERTa acquires a preference for linguistic generalizations (eventually). In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 217\u2013235. Association for Computational Linguistics, November 2020. https:\/\/aclanthology.org\/2020.emnlp-main.16","DOI":"10.18653\/v1\/2020.emnlp-main.16"},{"key":"1_CR33","unstructured":"Wei, J., et al.: Emergent abilities of large language models. Trans. Mach. Learn. Res. (2022). https:\/\/openreview.net\/forum?id=yzkSU5zdwD. Survey Certification"},{"key":"1_CR34","doi-asserted-by":"crossref","unstructured":"Weiss, G., Goldberg, Y., Yahav, E.: On the practical computational power of finite precision RNNs for language recognition. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), Melbourne, Australia, pp. 740\u2013745. Association for Computational Linguistics, July 2018. https:\/\/aclanthology.org\/P18-2117","DOI":"10.18653\/v1\/P18-2117"},{"key":"1_CR35","unstructured":"Weiss, G., Goldberg, Y., Yahav, E.: Thinking like transformers (2021). https:\/\/openreview.net\/forum?id=TmkN9JmDJx1"}],"container-title":["Lecture Notes in Computer Science","Developments in Language Theory"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-33264-7_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,12,13]],"date-time":"2023-12-13T07:02:35Z","timestamp":1702450955000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-33264-7_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"ISBN":["9783031332630","9783031332647"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-33264-7_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2023]]},"assertion":[{"value":"19 May 2023","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"DLT","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Developments in Language Theory","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Ume\u00e5","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Sweden","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 June 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16 June 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"dlt2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/dltwords2023.cs.umu.se\/dlt","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"32","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"19","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"59% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4- Invited papers and 32 submissions (31 regular ones and one invited)","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}