{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T01:36:35Z","timestamp":1772847395813,"version":"3.50.1"},"reference-count":47,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2021,6,3]],"date-time":"2021-06-03T00:00:00Z","timestamp":1622678400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,6,3]],"date-time":"2021-06-03T00:00:00Z","timestamp":1622678400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001871","name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","doi-asserted-by":"publisher","award":["SFRH\/BD\/51916\/2012"],"award-info":[{"award-number":["SFRH\/BD\/51916\/2012"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001871","name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","doi-asserted-by":"publisher","award":["UIDB\/50021\/2020"],"award-info":[{"award-number":["UIDB\/50021\/2020"]}],"id":[{"id":"10.13039\/501100001871","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Lang Resources &amp; Evaluation"],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1007\/s10579-021-09545-5","type":"journal-article","created":{"date-parts":[[2021,6,3]],"date-time":"2021-06-03T18:02:23Z","timestamp":1622743343000},"page":"573-591","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Towards the benchmarking of question generation: introducing the Monserrate corpus"],"prefix":"10.1007","volume":"56","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2929-0286","authenticated-orcid":false,"given":"Hugo","family":"Rodrigues","sequence":"first","affiliation":[]},{"given":"Eric","family":"Nyberg","sequence":"additional","affiliation":[]},{"given":"Luisa","family":"Coheur","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,6,3]]},"reference":[{"key":"9545_CR1","unstructured":"Ali, H., Chali, Y., & Hasan, S. A. (2010). Automation of question generation from sentences. In: Proceedings of QG2010: The Third Workshop on Question Generation."},{"key":"9545_CR2","unstructured":"Amidei, J., Piwek, P., & Willis, A. (2018). Rethinking the agreement in human evaluation tasks. In: Proceedings of the 27th International Conference on Computational Linguistics, Association for Computational Linguistics, Santa Fe, New Mexico (pp. 3318\u20133329). USA"},{"key":"9545_CR3","unstructured":"Banerjee, S., & Lavie, A. (2005). METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization, Association for Computational Linguistics, Ann Arbor, Michigan, (pp. 65\u201372)."},{"key":"9545_CR4","doi-asserted-by":"crossref","unstructured":"Chaganty, A. T., Mussmann, S., & Liang, P. (2018). The price of debiasing automatic metrics in natural language evalaution. In: Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics, ACL 2018, Melbourne, Australia, July 15\u201320, 2018, Volume 1: Long Papers, (pp. 643\u2013653).","DOI":"10.18653\/v1\/P18-1060"},{"key":"9545_CR5","unstructured":"Chen, W., Aist, G., & mostow, J. (2009). Generating questions automatically from informational text. In: Proceedings of the 2nd Workshop on Question Generation (AIED 2009), (pp. 17\u201324)."},{"issue":"1","key":"9545_CR6","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1177\/001316446002000104","volume":"20","author":"J Cohen","year":"1960","unstructured":"Cohen, J. (1960). A coefficient of agreement for nominal scales. Educational and Psychological Measurement, 20(1), 37.","journal-title":"Educational and Psychological Measurement"},{"key":"9545_CR7","unstructured":"Curto, S., Mendes, A. C., & Coheur, L. (2011). Exploring linguistically-rich patterns for question generation. In: Proceedings of the UCNLG+Eval: Language Generation and Evaluation Workshop, Association for Computational Linguistics, Stroudsburg, PA, USA, UCNLG+EVAL \u201911, (pp. 33\u201338)."},{"issue":"2","key":"9545_CR8","doi-asserted-by":"publisher","first-page":"147","DOI":"10.5087\/dad.2012.207","volume":"3","author":"S Curto","year":"2012","unstructured":"Curto, S., Mendes, A. C., & Coheur, L. (2012). Question generation based on Lexico-syntactic patterns learned from the web. Dialogue & Discourse, 3(2), 147\u2013175.","journal-title":"Dialogue & Discourse"},{"key":"9545_CR9","doi-asserted-by":"crossref","unstructured":"Du, X., Shao, J., & Cardie, C. (2017). Learning to ask: Neural question generation for reading comprehension. In: Association for Computational Linguistics (ACL).","DOI":"10.18653\/v1\/P17-1123"},{"key":"9545_CR10","doi-asserted-by":"crossref","unstructured":"Flor, M., & Riordan, B. (2018). A semantic role-based approach to open-domain automatic question generation. In: Proceedings of the Thirteenth Workshop on Innovative Use of NLP for Building Educational Applications, Association for Computational Linguistics, New Orleans, Louisiana, (pp. 254\u2013263).","DOI":"10.18653\/v1\/W18-0530"},{"key":"9545_CR11","unstructured":"Forgues, G., Pineau, J., Larchev\u00eaque, J. M., & Tremblay, R. (2014). Bootstrapping dialog systems with word embeddings. In: Nips, modern Machine Learning and Natural Language Processing Workshop, vol\u00a02."},{"key":"9545_CR12","unstructured":"Heilman, M. (2011). Automatic factual question generation from text. PhD thesis, School of Computer Science, Carnegie Mellon University, Pittsburgh, PA."},{"key":"9545_CR13","doi-asserted-by":"crossref","unstructured":"Heilman, M., & Smith, N. A. (2009). Question generation via overgenerating transformations and ranking. Tech. rep., School of Computer Science, Carnegie Mellon University, Pittsburgh, PA.","DOI":"10.21236\/ADA531042"},{"key":"9545_CR14","unstructured":"Heilman, M., & Smith, N. A. (2010). Good question! statistical ranking for question generation. In: Human Language Technologies: The 2010 Annual Conference of the North American Chapter of the Association for Computational Linguistics, Association for Computational Linguistics, Stroudsburg, PA, USA, HLT \u201910, (pp. 609\u2013617)."},{"key":"9545_CR15","unstructured":"Indurthi, S., Raghu, D., Khapra, M. M., & Joshi, S. (2017). Generating natural language question-answer pairs from a knowledge graph using a RNN based question generation model. In: EACL, Association for Computational Linguistics, (pp. 376\u2013385)."},{"key":"9545_CR16","unstructured":"Kalady, S., Illikottil, A., & Das, R. (2010). Natural language question generation using syntax and keywords. In: Proceedings of QG2010: The Third Workshop on Question Generation."},{"key":"9545_CR17","unstructured":"Kiros, R., Zhu, Y., Salakhutdinov, R., Zemel, R. S., Torralba, A., Urtasun, R., & Fidler, S. (2015). Skip-thought vectors. CoRR abs\/1506.06726."},{"key":"9545_CR18","unstructured":"Kumar, V., Ramakrishnan, G., & Li, Y. F. (2018). A framework for automatic question generation from text using deep reinforcement learning. ArXiv."},{"key":"9545_CR19","doi-asserted-by":"crossref","unstructured":"Labutov, I., Basu, S., & Vanderwende, L. (2015). Deep questions without deep understanding. In: Proceedings of ACL.","DOI":"10.3115\/v1\/P15-1086"},{"issue":"1","key":"9545_CR20","doi-asserted-by":"publisher","first-page":"159","DOI":"10.2307\/2529310","volume":"33","author":"JR Landis","year":"1977","unstructured":"Landis, J. R., & Koch, G. G. (1977). The measurement of observer agreement for categorical data. Biometrics, 33(1), 159\u2013174.","journal-title":"Biometrics"},{"key":"9545_CR21","unstructured":"Levy, R., & Andrew, G. (2006). Tregex and tsurgeon: Tools for querying and manipulating tree data structures. In: In 5th International Conference on Language Resources and Evaluation."},{"key":"9545_CR22","unstructured":"Lin, C. Y. (2004). ROUGE: A package for automatic evaluation of summaries. Text Summarization Branches Out (pp. 74\u201381). Barcelona, Spain: Association for Computational Linguistics."},{"key":"9545_CR23","doi-asserted-by":"crossref","unstructured":"Liu, B., Zhao, M., Niu, D., Lai, K., He, Y., Wei, H., & Xu, Y. (2019). Learning to generate questions by learning what not to generate. CoRR abs\/1902.10418.","DOI":"10.1145\/3308558.3313737"},{"key":"9545_CR24","unstructured":"Mannem, P., Prasad, R., & Joshi, A. (2010). Question generation from paragraphs at upenn: Qgstec system description. In: Proceedings of QG2010: The Third Workshop on Question Generation, (pp. 84\u201391)."},{"key":"9545_CR25","doi-asserted-by":"crossref","unstructured":"Mazidi, K., & Nielsen, R. D. (2015). Leveraging multiple views of text for automatic question generation. In: Proceedings of Artificial Intelligence in Education - 17th International Conference, AIED 2015, Madrid, Spain, June 22\u201326, 2015. (pp. 257\u2013266).","DOI":"10.1007\/978-3-319-19773-9_26"},{"key":"9545_CR26","doi-asserted-by":"crossref","unstructured":"Mazidi, K., & Tarau, P. (2016). Infusing nlu into automatic question generation. In: Proceedings of the 9th International Natural Language Generation conference, ACL.","DOI":"10.18653\/v1\/W16-6609"},{"key":"9545_CR27","unstructured":"Nguyen, T., Rosenberg, M., Song, X., Gao, J., Tiwary, S., Majumder, R., & Deng, L. (2016). Ms marco: A human generated machine reading comprehension dataset. CoRR."},{"key":"9545_CR28","doi-asserted-by":"crossref","unstructured":"Novikova, J., Du\u0161ek, O., Cercas\u00a0Curry, A., & Rieser, V. (2017). Why we need new evaluation metrics for nlg. In: Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing, Association for Computational Linguistics, (pp. 2241\u20132252).","DOI":"10.18653\/v1\/D17-1238"},{"key":"9545_CR29","first-page":"76","volume":"2010","author":"S Pal","year":"2010","unstructured":"Pal, S., Mondal, T., Pakray, P., Das, D., & Bandyopadhyay, S. (2010). Qgstec system description-juqgg: A rule based approach. Boyer & Piwek, 2010, 76\u201379.","journal-title":"Boyer & Piwek"},{"key":"9545_CR30","doi-asserted-by":"publisher","unstructured":"Papineni, K., Roukos, S., Ward, T., & Zhu, W. J. (2002). Bleu: A method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, Association for Computational Linguistics, Philadelphia, Pennsylvania, USA, (pp. 311\u2013318). https:\/\/doi.org\/10.3115\/1073083.1073135.","DOI":"10.3115\/1073083.1073135."},{"key":"9545_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.5087\/dad.2012.201","volume":"3","author":"P Piwek","year":"2012","unstructured":"Piwek, P., & Boyer, K. (2012). Varieties of question generation: Introduction to this special issue. Dialogue & Discourse, 3, 1\u20139.","journal-title":"Dialogue & Discourse"},{"issue":"15","key":"9545_CR32","doi-asserted-by":"publisher","first-page":"4407","DOI":"10.1080\/01431161.2011.552923","volume":"32","author":"RG Pontius","year":"2011","unstructured":"Pontius, R. G., & Millones, M. (2011). Death to Kappa: Birth of quantity disagreement and allocation disagreement for accuracy assessment. International Journal of Remote Sensing, 32(15), 4407\u20134429.","journal-title":"International Journal of Remote Sensing"},{"key":"9545_CR33","doi-asserted-by":"crossref","unstructured":"Rajpurkar, P., Zhang, J., Lopyrev, K., & Liang, P. (2016). Squad: 100, 000+ questions for machine comprehension of text. CoRR abs\/1606.05250.","DOI":"10.18653\/v1\/D16-1264"},{"key":"9545_CR34","doi-asserted-by":"crossref","unstructured":"Rei, R., Stewart, C., Farinha, A. C., & Lavie, A. (2020). Comet: A neural framework for mt evaluation. arXiv:200909025.","DOI":"10.18653\/v1\/2020.emnlp-main.213"},{"key":"9545_CR35","doi-asserted-by":"crossref","unstructured":"Rodrigues, H., Coheur, L., & Nyberg, E. (2018). Improving question generation with the teacher\u2019s implicit feedback. In: International Conference on Artificial Intelligence in Education, Springer, (pp. 301\u2013306).","DOI":"10.1007\/978-3-319-93846-2_56"},{"key":"9545_CR36","doi-asserted-by":"crossref","unstructured":"Rus, V., & Lintean, M. (2012). A comparison of greedy and optimal assessment of natural language student input using word-to-word similarity metrics. In: Proceedings of the Seventh Workshop on Building Educational Applications Using NLP, Association for Computational Linguistics, Montr\u00e9al, Canada, (pp. 157\u2013162).","DOI":"10.1007\/978-3-642-30950-2_116"},{"key":"9545_CR37","unstructured":"Rus, V., Wyse, B., Piwek, P., Lintean, M., Stoyanchev, S., & Moldovan, C. (2010). Overview of the first question generation shared task evaluation challenge. In: Proceedings of the Sixth International Natural Language Generation Conference (INLG 2010)."},{"key":"9545_CR38","unstructured":"Rus, V., Piwek, P., Stoyanchev, S., Wyse, B., Lintean, M., & Moldovan, C. (2011). Question generation shared task and evaluation challenge: Status report. In: Proceedings of the 13th European Workshop on Natural Language Generation, Association for Computational Linguistics, Stroudsburg, PA, USA, ENLG \u201911, (pp. 318\u2013320)."},{"issue":"2","key":"9545_CR39","doi-asserted-by":"publisher","first-page":"177","DOI":"10.5087\/dad.2012.208","volume":"3","author":"V Rus","year":"2012","unstructured":"Rus, V., Wyse, B., Piwek, P., Lintean, M., Stoyanchev, S., & Moldovan, C. (2012). A detailed account of the first question generation shared task evaluation challenge. Dialogue & Discourse, 3(2), 177\u2013204.","journal-title":"Dialogue & Discourse"},{"key":"9545_CR40","doi-asserted-by":"crossref","unstructured":"Serban, I. V., Garc\u00eda-Dur\u00e1n, A., Gulcehre, C., Ahn, S., Chandar, S., Courville, A., & Bengio, Y. (2016). Generating factoid questions with recurrent neural networks: The 30M factoid question-answer corpus. In: Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), Association for Computational Linguistics, Berlin, Germany, (pp. 588\u2013598).","DOI":"10.18653\/v1\/P16-1056"},{"key":"9545_CR41","unstructured":"Sharma, S., El\u00a0Asri, L., Schulz, H., & Zumer, J. (2017). Relevance of unsupervised metrics in task-oriented dialogue for evaluating natural language generation. CoRR abs\/1706.09799, http:\/\/arxiv.org\/abs\/1706.09799."},{"key":"9545_CR42","doi-asserted-by":"crossref","unstructured":"Subramanian, S., Wang, T., Yuan, X., Zhang, S., Trischler, A., & Bengio, Y. (2018). Neural models for key phrase extraction and question generation. In: QA@ACL, Association for Computational Linguistics, (pp. 78\u201388).","DOI":"10.18653\/v1\/W18-2609"},{"key":"9545_CR43","unstructured":"Varga, A., & Ha, L. A. (2010). Wlv: A question generation system for the qgstec 2010 task b. In: Proceedings of QG2010: The Third Workshop on Question Generation."},{"key":"9545_CR44","unstructured":"Wang, T., Yuan, X., & Trischler, A. (2017). A joint model for question answering and question generation. CoRR abs\/1706.01450."},{"key":"9545_CR45","unstructured":"Wyse, B., & Piwek, P. (2009). Generating questions from openlearn study units. In: AIED 2009 Workshop Proceedings Volume 1: The 2nd Workshop on Question Generation."},{"key":"9545_CR46","doi-asserted-by":"crossref","unstructured":"Yuan, X., Wang, T., Gulcehre, C., Sordoni, A., Bachman, P., Zhang, S., Subramanian, S., & Trischler, A. (2017). Machine comprehension by text-to-text neural question generation. In: Proceedings of the 2nd Workshop on Representation Learning for NLP, Association for Computational Linguistics, Vancouver, Canada, (pp. 15\u201325).","DOI":"10.18653\/v1\/W17-2603"},{"key":"9545_CR47","doi-asserted-by":"publisher","first-page":"662","DOI":"10.1007\/978-3-319-73618-1_56","volume-title":"Natural language processing and Chinese computing","author":"Q Zhou","year":"2018","unstructured":"Zhou, Q., Yang, N., Wei, F., Tan, C., Bao, H., & Zhou, M. (2018). Neural question generation from text: A preliminary study. In X. Huang, J. Jiang, D. Zhao, Y. Feng, & Y. Hong (Eds.), Natural language processing and Chinese computing (pp. 662\u2013671). Cham: Springer International Publishing."}],"container-title":["Language Resources and Evaluation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-021-09545-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10579-021-09545-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10579-021-09545-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,11]],"date-time":"2022-06-11T10:09:05Z","timestamp":1654942145000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10579-021-09545-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,3]]},"references-count":47,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,6]]}},"alternative-id":["9545"],"URL":"https:\/\/doi.org\/10.1007\/s10579-021-09545-5","relation":{},"ISSN":["1574-020X","1574-0218"],"issn-type":[{"value":"1574-020X","type":"print"},{"value":"1574-0218","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,6,3]]},"assertion":[{"value":"13 May 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 June 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}