{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,25]],"date-time":"2026-03-25T19:36:11Z","timestamp":1774467371793,"version":"3.50.1"},"reference-count":57,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2023,11,4]],"date-time":"2023-11-04T00:00:00Z","timestamp":1699056000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,11,4]],"date-time":"2023-11-04T00:00:00Z","timestamp":1699056000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Lang Resources &amp; Evaluation"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1007\/s10579-023-09691-y","type":"journal-article","created":{"date-parts":[[2023,11,4]],"date-time":"2023-11-04T14:01:46Z","timestamp":1699106506000},"page":"427-458","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["LoNLI: An Extensible Framework for Testing Diverse Logical Reasoning Capabilities for NLI"],"prefix":"10.1007","volume":"58","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4258-5388","authenticated-orcid":false,"given":"Ishan","family":"Tarunesh","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Somak","family":"Aditya","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Monojit","family":"Choudhury","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,11,4]]},"reference":[{"key":"9691_CR1","unstructured":"Bhagavatula, C., Bras, R. L., Malaviya, C., Sakaguchi, K., Holtzman, A., Rashkin, H., Downey, D., tau, Wen, Y., & Yejin C. Abductive commonsense reasoning. In International Conference on Learning Representations2020. https:\/\/openreview.net\/forum?id=Byg1v1HKDB."},{"key":"9691_CR2","unstructured":"Bhardwaj, R., Majumder, N., & Poria, S. (2020). Investigating gender bias in BERT. CoRR, abs\/2009.05021 (2020). https:\/\/arxiv.org\/abs\/2009.05021."},{"key":"9691_CR3","doi-asserted-by":"crossref","unstructured":"Bowman, S., Angeli, G., Potts, C., Manning, C. D. (2015). A large annotated corpus for learning natural language inference (pp.\u00a0632\u2013642). In EMNLP 2015","DOI":"10.18653\/v1\/D15-1075"},{"key":"9691_CR4","doi-asserted-by":"publisher","unstructured":"Bowman, S. R., Dahl, & d George E. (2021). What will it take to fix benchmarking in natural language understanding? In NAACL-HLT 2021, Online, June 6\u201311, (2021), pp. 4843\u20134855. Association for Computational Linguistics, 2021. https:\/\/doi.org\/10.18653\/v1\/2021.naacl-main.385.","DOI":"10.18653\/v1\/2021.naacl-main.385"},{"key":"9691_CR5","doi-asserted-by":"crossref","unstructured":"de Vassimon Manela, D., Errington, D., Fisher, T., van Breugel, B., & Minervini, P. (2021). Stereotype and skew: Quantifying gender bias in pre-trained and fine-tuned language models. In Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume (pp. 2232\u20132242)","DOI":"10.18653\/v1\/2021.eacl-main.190"},{"key":"9691_CR6","unstructured":"Devlin, J., Chang, M.-W., Lee, K., & Toutanova, K. (2019). Bert: Pre-training of deep bidirectional transformers for language understanding. In NAACL-HLT 2019 (Vol.\u00a01) (Long and Short Papers, pp.\u00a04171\u20134186)"},{"key":"9691_CR7","unstructured":"Dolan, B., & Brockett, C. (2005). Automatically constructing a corpus of sentential paraphrases. In Third International Workshop on Paraphrasing (IWP2005). Asia Federation of Natural Language Processing, https:\/\/www.microsoft.com\/en-us\/research\/publication\/automatically-constructing-a-corpus-of-sentential-paraphrases\/."},{"key":"9691_CR8","doi-asserted-by":"publisher","unstructured":"Glockner, M., Shwartz, V., & Goldberg, Y. (2018). Breaking NLI systems with sentences that require simple lexical inferences. In ACL 2018 (Vol.\u00a02: Short Papers, pp.\u00a0650\u2013655, Melbourne, Australia, Association for Computational Linguistics). https:\/\/doi.org\/10.18653\/v1\/P18-2103. https:\/\/aclanthology.org\/P18-2103.","DOI":"10.18653\/v1\/P18-2103"},{"key":"9691_CR9","doi-asserted-by":"crossref","unstructured":"Grice, H. P. (1975). Logic and conversation. In P. Cole and J.L. Morgan (Eds.), Syntax and Semantics: Vol. 3: Speech Acts (pp. 41\u201358). Academic Press. http:\/\/www.ucl.ac.uk\/ls\/studypacks\/Grice-Logic.pdf.","DOI":"10.1163\/9789004368811_003"},{"key":"9691_CR10","doi-asserted-by":"crossref","unstructured":"Gupta, V., Mehta, M., Nokhiz, Pe., & Srikumar, V. (2020). INFOTABS: Inference on tables as semi-structured data. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (pp.\u00a02309\u20132324), Online, Association for Computational Linguistics. https:\/\/www.aclweb.org\/anthology\/2020.acl-main.210.","DOI":"10.18653\/v1\/2020.acl-main.210"},{"key":"9691_CR11","doi-asserted-by":"crossref","unstructured":"Gururangan, S., Swayamdipta, S., Levy, O., Schwartz, R., Bowman, S. R., & Smith, N.\u00a0A. (2018). Annotation artifacts in natural language inference data. In NAACL.","DOI":"10.18653\/v1\/N18-2017"},{"key":"9691_CR12","volume-title":"Decoding-enhanced bert with disentangled attention","author":"Pengcheng He","year":"2020","unstructured":"He, Pengcheng, Liu, Xiaodong, Gao, Jianfeng, & Chen, W. (2020). Decoding-enhanced bert with disentangled attention. Deberta."},{"key":"9691_CR13","doi-asserted-by":"publisher","unstructured":"Hewitt, J., & Manning, C.\u00a0D. (June 2019). A structural probe for finding syntax in word representations. In NAACL-HLT 2019 (Vol.\u00a01, Long and Short Papers, pp.\u00a04129\u20134138), Minneapolis, Minnesota, Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/N19-1419. https:\/\/www.aclweb.org\/anthology\/N19-1419.","DOI":"10.18653\/v1\/N19-1419"},{"key":"9691_CR14","unstructured":"Iyer, S., Dandekar, N., & Csernai, K. (2017). First quora dataset release: Question pairs. https:\/\/data.quora.com\/First-Quora-Dataset-Release-Question-Pairs."},{"key":"9691_CR15","doi-asserted-by":"publisher","unstructured":"Jawahar, G., Sagot, B., & Seddah, D. (2019). What does BERT learn about the structure of language? In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (pp.\u00a03651\u20133657). Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/P19-1356. https:\/\/www.aclweb.org\/anthology\/P19-1356.","DOI":"10.18653\/v1\/P19-1356"},{"key":"9691_CR16","doi-asserted-by":"crossref","unstructured":"Jeretic, P., Warstadt, A., Bhooshan, S., & Williams, A. (2020). Are natural language inference models IMPPRESsive? Learning IMPlicature and PRESupposition. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (pp.\u00a08690\u20138705), Online. Association for Computational Linguistics. https:\/\/www.aclweb.org\/anthology\/2020.acl-main.768.","DOI":"10.18653\/v1\/2020.acl-main.768"},{"key":"9691_CR17","doi-asserted-by":"crossref","unstructured":"Joshi, P., Aditya, S., Sathe, A., & Choudhury, M. ((2020) Taxinli: Taking a ride up the nlu hill. In CoNLL.","DOI":"10.18653\/v1\/2020.conll-1.4"},{"key":"9691_CR18","unstructured":"Jurafsky, D., & Martin, J. H. (2009). Speech and language processing: An introduction to natural language processing, computational linguistics, and speech recognition, 2nd Ed. Pearson Prentice Hall."},{"key":"9691_CR19","unstructured":"Kaushik, D., Hovy, E.\u00a0H., & Lipton, Z.\u00a0C. (2020). Learning the difference that makes a difference with counterfactually-augmented data. In ICLR 2020. OpenReview.net.."},{"key":"9691_CR20","doi-asserted-by":"crossref","unstructured":"Khot, T., Sabharwal, A & Clark, P. (2018) Scitail: A textual entailment dataset from science question answering. In S.A. McIlraith and K.Q. Weinberger (Eds.), AAAI 2018. New Orleans, Louisiana, USA, February 2\u20137, (pp.\u00a05189\u20135197). AAAI Press. https:\/\/www.aaai.org\/ocs\/index.php\/AAAI\/AAAI18\/paper\/view\/17368.","DOI":"10.1609\/aaai.v32i1.12022"},{"key":"9691_CR21","doi-asserted-by":"publisher","unstructured":"Kim, N., Patel, R., Poliak, A., Xia, P., Wang, A., McCoy, T., Tenney, I., Ross, A., Linzen, T., Van\u00a0Durme, B., Bowman, S.\u00a0R., & Pavlick, E. (2019). Probing what different NLP tasks teach machines about function word comprehension. In Proceedings of the Eighth Joint Conference on Lexical and Computational Semantics (*SEM ) (pp.\u00a0235\u2013249). Minneapolis, Minnesota, June 2019. Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/S19-1026. https:\/\/www.aclweb.org\/anthology\/S19-1026.","DOI":"10.18653\/v1\/S19-1026"},{"key":"9691_CR22","unstructured":"Levesque, H., Davis, E., & Morgenstern, L. (2012). The winograd schema challenge. In Thirteenth International Conference on the Principles of Knowledge Repreentation and Reasoning. Citeseer"},{"key":"9691_CR23","doi-asserted-by":"crossref","unstructured":"Liu, H., Cui, L., Liu, J., & Zhang, Y. (2021). Natural language inference in context\u2014investigating contextual reasoning over long texts. In Thirty-Fifth AAAI Conference on Artificial Intelligence, AAAI 2021, Thirty-Third Conference on Innovative Applications of Artificial Intelligence, IAAI 2021, The Eleventh Symposium on Educational Advances in Artificial Intelligence, EAAI 2021, Virtual Event (pp.\u00a013388\u201313396). AAAI Press, https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/17580.","DOI":"10.1609\/aaai.v35i15.17580"},{"key":"9691_CR24","doi-asserted-by":"crossref","unstructured":"Liu, N.\u00a0F, Gardner, M., Belinkov, Y., Peters, M.\u00a0E., & Smith, N.\u00a0A. (2019a). Linguistic knowledge and transferability of contextual representations. In NAACL-HLT 2019 (Vol.\u00a01, Long and Short Papers, pp.\u00a01073\u20131094).","DOI":"10.18653\/v1\/N19-1112"},{"key":"9691_CR25","unstructured":"Liu, Y., Ott, M., Goyal, N., Du, J., Joshi, M., Chen, D., Levy, O., Lewis, M., Zettlemoyer, L., & Stoyanov, V. (2019b). Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692"},{"key":"9691_CR26","doi-asserted-by":"publisher","unstructured":"McCoy, T., Pavlick, E., & Linzen, T. (2019). Right for the wrong reasons: Diagnosing syntactic heuristics in natural language inference. In ACL (pp.\u00a03428\u20133448). Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/P19-1334. https:\/\/aclanthology.org\/P19-1334.","DOI":"10.18653\/v1\/P19-1334"},{"key":"9691_CR27","unstructured":"Naik, A., Ravichander, A., Sadeh, N., Rose, C., & Neubig, G. (2018). Stress test evaluation for natural language inference. In Proceedings of the 27th International Conference on Computational Linguistics (pp.\u00a02340\u20132353). Association for Computational Linguistics. https:\/\/www.aclweb.org\/anthology\/C18-1198."},{"key":"9691_CR28","doi-asserted-by":"crossref","unstructured":"Nie, Y., Williams, A., Dinan, E., Bansal, M., Weston, J., & Kiela, D. (2020). Adversarial NLI: A new benchmark for natural language understanding. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics. Association for Computational Linguistics.","DOI":"10.18653\/v1\/2020.acl-main.441"},{"key":"9691_CR29","unstructured":"Pikekos, P., Michalewski, H., & Malinowski, M. (2021). Measuring and improving bert\u2019s mathematical abilities by predicting the order of reasoning. ArXiv, abs\/2106.03921."},{"key":"9691_CR30","doi-asserted-by":"publisher","unstructured":"Poliak, A., Haldar, A., Rudinger,R., Hu, J.\u00a0E., Pavlick, E., White, A.\u00a0S., & Van\u00a0Durme, B. (2018a) Collecting diverse natural language inference problems for sentence representation evaluation. In EMNLP 2018 (pp.\u00a067\u201381). Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/D18-1007. https:\/\/www.aclweb.org\/anthology\/D18-1007.","DOI":"10.18653\/v1\/D18-1007"},{"key":"9691_CR31","doi-asserted-by":"crossref","unstructured":"Poliak, A., Naradowsky, J., Haldar, A., Rudinger, R., & Durme, B.\u00a0V. (2018b). Hypothesis only baselines in natural language inference. In *SEMEVAL","DOI":"10.18653\/v1\/S18-2023"},{"key":"9691_CR32","doi-asserted-by":"publisher","unstructured":"Rajpurkar, Pranav, Z., Jian, L., Konstantin, & Liang, P. (2016). SQuAD: 100,000+ questions for machine comprehension of text. In Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing (pp.\u00a02383\u20132392). Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/D16-1264. https:\/\/aclanthology.org\/D16-1264.","DOI":"10.18653\/v1\/D16-1264"},{"key":"9691_CR33","doi-asserted-by":"crossref","unstructured":"Ribeiro, M.\u00a0T., Wu, T., Guestrin, C., & Singh, S. (2020). Beyond accuracy: Behavioral testing of NLP models with CheckList. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (pp.\u00a04902\u20134912). Association for Computational Linguistics. https:\/\/www.aclweb.org\/anthology\/2020.acl-main.442.","DOI":"10.18653\/v1\/2020.acl-main.442"},{"key":"9691_CR34","doi-asserted-by":"publisher","unstructured":"Richardson, K., Hu, H., Moss, L., & Sabharwal, A. (2020). Probing natural language inference models through semantic fragments. In Proceedings of the AAAI Conference on Artificial Intelligence (Vol.\u00a034, pp.\u00a08713\u20138721). https:\/\/doi.org\/10.1609\/aaai.v34i05.6397.","DOI":"10.1609\/aaai.v34i05.6397"},{"key":"9691_CR35","doi-asserted-by":"crossref","unstructured":"Rocchietti, G., Achena, F., Marziano, G., Salaris, S., & Lenci, A. (2022). FANCY: A diagnostic data-set for NLI models. In E. Fersini, M. Passarotti, V. Patti (Eds.), Proceedings of the Eighth Italian Conference on Computational Linguistics, CLiC-it 2021 Milan, Italy, January 26\u201328, Volume 3033 of CEUR Workshop Proceedings. CEUR-WS.org, 2021. http:\/\/ceur-ws.org\/Vol-3033\/paper76.pdf.","DOI":"10.4000\/books.aaccademia.10804"},{"key":"9691_CR36","unstructured":"Felipe Salvatore. (2019). Cross-lingual contradiction detection. https:\/\/github.com\/felipessalvatore\/CLCD. commit xxxxxxx."},{"key":"9691_CR37","doi-asserted-by":"crossref","unstructured":"Salvatore, F., Finger, M., & Hirata\u00a0Jr, R. (2019). A logical-based corpus for cross-lingual evaluation. In Proceedings of the 2nd Workshop on Deep Learning Approaches for Low-Resource NLP (DeepLo 2019) (pp.\u00a022\u201330).","DOI":"10.18653\/v1\/D19-6103"},{"key":"9691_CR38","doi-asserted-by":"publisher","unstructured":"Sap, M., Bras, R.\u00a0L., Allaway, E., Bhagavatula, C., Lourie, N., Rashkin, H., Roof, B., Smith, N.\u00a0A., & Choi, Y. (2019). ATOMIC: An atlas of machine commonsense for if-then reasoning. In AAAI 2019 (pp. 3027\u20133035). AAAI Press, https:\/\/doi.org\/10.1609\/aaai.v33i01.33013027.","DOI":"10.1609\/aaai.v33i01.33013027"},{"key":"9691_CR39","doi-asserted-by":"crossref","unstructured":"Schlangen, D. (2021). Targeting the benchmark: On methodology in current natural language processing research. In ACL: Short Papers.","DOI":"10.18653\/v1\/2021.acl-short.85"},{"key":"9691_CR40","doi-asserted-by":"publisher","unstructured":"Schuster, T., Shah, D., Jie\u00a0Serene Yeo, Y., Roberto Filizzola\u00a0O., Daniel, S., Enrico, & Barzilay, R. (2019). Towards debiasing fact verification models. In EMNLP-IJCNLP 2019 (pp.\u00a03419\u20133425), Hong Kong, China, Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/D19-1341. https:\/\/aclanthology.org\/D19-1341.","DOI":"10.18653\/v1\/D19-1341"},{"key":"9691_CR41","doi-asserted-by":"crossref","unstructured":"Sowa, J.F. (2010). The role of logic and ontology in language and reasoning. In Theory and applications of ontology: philosophical perspectives (pp.\u00a0231\u2013263). Springer.","DOI":"10.1007\/978-90-481-8845-1_11"},{"key":"9691_CR42","doi-asserted-by":"crossref","unstructured":"Speer, R., Chin, J., & Havasi, C. (2016). Conceptnet 5.5: An open multilingual graph of general knowledge. CoRR, abs\/1612.03975, http:\/\/arxiv.org\/abs\/1612.03975.","DOI":"10.1609\/aaai.v31i1.11164"},{"key":"9691_CR43","doi-asserted-by":"crossref","unstructured":"Talmor, A. Elazar, Y., Goldberg, Y., & Berant, J. (2019). Olmpics\u2014on what language model pre-training captures.","DOI":"10.1162\/tacl_a_00342"},{"key":"9691_CR44","doi-asserted-by":"publisher","first-page":"743","DOI":"10.1162\/tacl_a_00342","volume":"8","author":"Alon Talmor","year":"2020","unstructured":"Talmor, Alon, Elazar, Yanai, Goldberg, Yoav, & Berant, Jonathan. (2020). Olmpics\u2014On what language model pre-training captures. Transactions of the Association for Computational Linguistics, 8, 743\u2013758.","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"9691_CR45","unstructured":"Talmor, A., Tafjord, O., Clark, P., Goldberg, Y., & Berant, J. (2020b). Leap-of-thought: Teaching pre-trained models to systematically reason over implicit knowledge. In H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.\u00a0F. Balcan, and H.\u00a0Lin, editors, Advances in Neural Information Processing Systems (Vol.\u00a033, pp.\u00a020227\u201320237). Curran Associates, Inc., https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/e992111e4ab9985366e806733383bd8c-Paper.pdf."},{"key":"9691_CR46","doi-asserted-by":"publisher","unstructured":"Tenney, I., Das, D., & Pavlick, E. (2019a). BERT rediscovers the classical NLP pipeline. In ACL (pp.\u00a04593\u20134601). Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/P19-1452. https:\/\/www.aclweb.org\/anthology\/P19-1452.","DOI":"10.18653\/v1\/P19-1452"},{"key":"9691_CR47","unstructured":"Tenney, I., Xia, P., Chen, B., Wang, A., Poliak, A., McCoy, R.\u00a0T., Kim, N., Van\u00a0Durme, B., Bowman, S., Das, D., et\u00a0al. (2019b). What do you learn from context? probing for sentence structure in contextualized word representations. In ICLR 2019."},{"key":"9691_CR48","doi-asserted-by":"publisher","unstructured":"Thorne, J., Vlachos, A., Christodoulopoulos, C., & Mittal, A. (2018). FEVER: A large-scale dataset for fact extraction and verification. In M.A. Walker, H. Ji, A. Stent (Eds.), NAACL-HLT 2018, New Orleans, Louisiana, USA, June 1-6, 2018, Volume 1 (Long Papers), pp.\u00a0(809\u2013819). Association for Computational Linguistics, https:\/\/doi.org\/10.18653\/v1\/n18-1074.","DOI":"10.18653\/v1\/n18-1074"},{"key":"9691_CR49","doi-asserted-by":"publisher","unstructured":"Vashishtha, S., Poliak, A., Lal, Y.\u00a0K., Van\u00a0Durme, B., & White, A.\u00a0S. (2020). Temporal reasoning in natural language inference. In Findings of the Association for Computational Linguistics: EMNLP 2020 (pp.\u00a04070\u20134078). Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.363. https:\/\/aclanthology.org\/2020.findings-emnlp.363.","DOI":"10.18653\/v1\/2020.findings-emnlp.363"},{"key":"9691_CR50","unstructured":"Vig, J., Gehrmann, S., Belinkov, Y., Qian, S., Nevo, D., Singer, Y., & Shieber, S. (2020). Investigating gender bias in language models using causal mediation analysis. In H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.\u00a0F. Balcan, and H.\u00a0Lin (Eds.), Advances in Neural Information Processing Systems (Vol.\u00a033, pp.\u00a012388\u201312401). Curran Associates, Inc., https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/92650b2e92217715fe312e6fa7b90d82-Paper.pdf."},{"key":"9691_CR51","doi-asserted-by":"crossref","unstructured":"Wallace, E., Wang, Y., Li, S., Singh, S., & Gardner, M. (2019). Do nlp models know numbers? Probing numeracy in embeddings. In Empirical Methods in Natural Language Processing.","DOI":"10.18653\/v1\/D19-1534"},{"key":"9691_CR52","doi-asserted-by":"crossref","unstructured":"Wang, A., Singh, A., Michael, J., Hill, F., Levy, O., & Bowman, S.\u00a0R. (2018). Glue: A multi-task benchmark and analysis platform for natural language understanding. In EMNLP 2018 (p.\u00a0353).","DOI":"10.18653\/v1\/W18-5446"},{"key":"9691_CR53","doi-asserted-by":"publisher","unstructured":"Welleck, S., Weston, J., Szlam, A., & Cho, K. (2019). Dialogue natural language inference. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (pp.\u00a03731\u20133741), Florence, Italy, Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/P19-1363. https:\/\/aclanthology.org\/P19-1363.","DOI":"10.18653\/v1\/P19-1363"},{"key":"9691_CR54","doi-asserted-by":"crossref","unstructured":"Williams, A., Nangia, N., & Bowman, S. (2018). A broad-coverage challenge corpus for sentence understanding through inference. In NAACL-HLT (Vol.\u00a01, Long Papers, pp.\u00a01112\u20131122). Association for Computational Linguistics, http:\/\/aclweb.org\/anthology\/N18-1101.","DOI":"10.18653\/v1\/N18-1101"},{"key":"9691_CR55","unstructured":"Wittgenstein, L. (1922). Tractatus logico-philosophicus. London: Routledge, 1981, http:\/\/scholar.google.de\/scholar.bib?q=info:1G2GoIkyCZIJ:scholar.google.com\/ &output=citation &hl=de &ct=citation &cd=0."},{"key":"9691_CR56","doi-asserted-by":"crossref","unstructured":"Wolf, T., Debut, L., Sanh, V., Chaumond, J., Delangue, C., Moi, A., Cistac, P., Rault, T., Louf, R., Funtowicz, M., Davison, J., Shleifer, S., von Platen, P., Ma, C., Jernite, Y., Plu, J., Xu, C., Scao, T.\u00a0L., Gugger, S., Drame, M., Lhoest, Q., & Rush, A.\u00a0M. (2019). Huggingface\u2019s transformers: State-of-the-art natural language processing. ArXiv, abs\/1910.03771","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"9691_CR57","unstructured":"Yang, G., Haque, M., Song, Q., Yang, W., & Liu, X.. (2022). TestAug: A framework for augmenting capability-based NLP tests. In Proceedings of the 29th International Conference on Computational Linguistics, pages 3480\u20133495, Gyeongju, Republic of Korea. International Committee on Computational Linguistics. https:\/\/aclanthology.org\/2022.coling-1.307."}],"container-title":["Language Resources and Evaluation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-023-09691-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10579-023-09691-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-023-09691-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,5,28]],"date-time":"2024-05-28T18:33:51Z","timestamp":1716921231000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10579-023-09691-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,11,4]]},"references-count":57,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["9691"],"URL":"https:\/\/doi.org\/10.1007\/s10579-023-09691-y","relation":{},"ISSN":["1574-020X","1574-0218"],"issn-type":[{"value":"1574-020X","type":"print"},{"value":"1574-0218","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,11,4]]},"assertion":[{"value":"28 August 2023","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 November 2023","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}