{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T10:35:31Z","timestamp":1777113331980,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2027,4,26]],"date-time":"2027-04-26T00:00:00Z","timestamp":1808697600000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2418656"],"award-info":[{"award-number":["2418656"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2301130"],"award-info":[{"award-number":["2301130"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Google Academic Research Award","award":["NA"],"award-info":[{"award-number":["NA"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,27]]},"DOI":"10.1145\/3785022.3785100","type":"proceedings-article","created":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T09:39:01Z","timestamp":1777109941000},"page":"491-502","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Generate-Then-Validate: A Novel Question Generation Approach Using Small Language Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-1364-8300","authenticated-orcid":false,"given":"Yumou","family":"Wei","sequence":"first","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2291-1468","authenticated-orcid":false,"given":"John","family":"Stamper","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0449-3733","authenticated-orcid":false,"given":"Paulo F.","family":"Carvalho","sequence":"additional","affiliation":[{"name":"Carnegie Mellon University, Pittsburgh, USA"}]}],"member":"320","published-online":{"date-parts":[[2026,4,26]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-39112-5_9"},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"crossref","unstructured":"Michael\u00a0W. Asher Joshua\u00a0D. Hartman Mark Blaser Jack\u00a0F. Eichler and Paulo\u00a0F. Carvalho. 2025. The promise of mastery-based testing for promoting student engagement self-regulated learning and performance in gateway STEM courses. Computers & Education 237 (2025) 105387.","DOI":"10.1016\/j.compedu.2025.105387"},{"key":"e_1_3_3_2_4_2","unstructured":"Peter Belcak Greg Heinrich Shizhe Diao Yonggan Fu Xin Dong Saurav Muralidharan Yingyan\u00a0Celine Lin and Pavlo Molchanov. 2025. Small Language Models are the Future of Agentic AI. arxiv:https:\/\/arXiv.org\/abs\/2506.02153\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2506.02153"},{"key":"e_1_3_3_2_5_2","unstructured":"Yoshua Bengio R\u00e9jean Ducharme Pascal Vincent and Christian Janvin. 2003. A neural probabilistic language model. J. Mach. Learn. Res. 3 (2003)."},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-98465-5_7"},{"key":"e_1_3_3_2_7_2","unstructured":"Benjamin\u00a0S. Bloom. 1968. Learning for Mastery. Instruction and Curriculum. Evaluation Comment 1 2 (May 1968). Reprinted by the Regional Education Laboratory for the Carolinas and Virginia Topical Papers and Reprints Number 1. ERIC Number: ED053419."},{"key":"e_1_3_3_2_8_2","first-page":"1877","volume-title":"Advances in Neural Information Processing Systems","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared\u00a0D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel Ziegler, Jeffrey Wu, Clemens Winter, Chris Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language Models are Few-Shot Learners. In Advances in Neural Information Processing Systems, H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.F. Balcan, and H.\u00a0Lin (Eds.), Vol.\u00a033. Curran Associates, Inc., 1877\u20131901."},{"key":"e_1_3_3_2_9_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-36272-9_27"},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"crossref","unstructured":"Jacob Cohen. 1960. A Coefficient of Agreement for Nominal Scales. Educational and Psychological Measurement 20 1 (1960) 37\u201346.","DOI":"10.1177\/001316446002000104"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"crossref","unstructured":"J.\u00a0B. Collins Amanda Harsy Jarod Hart Katie\u00a0Anne Haymaker Alyssa Marie\u00a0(Armstrong) Hoofnagle Mike\u00a0Kuyper Janssen Jessica\u00a0Stewart Kelly Austin\u00a0Tyler Mohr and Jessica OShaughnessy. 2019. Mastery-Based Testing in Undergraduate Mathematics Courses. PRIMUS 29 5 (2019) 441\u2013460.","DOI":"10.1080\/10511970.2018.1488317"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"publisher","DOI":"10.1145\/3636243.3636256"},{"key":"e_1_3_3_2_13_2","doi-asserted-by":"crossref","unstructured":"Joseph\u00a0L Fleiss. 1971. Measuring nominal scale agreement among many raters. Psychological Bulletin 76 5 (1971) 378\u2013382.","DOI":"10.1037\/h0031619"},{"key":"e_1_3_3_2_14_2","unstructured":"Google Gemini\u00a0Team. 2025. Gemini 2.5: Pushing the Frontier with Advanced Reasoning Multimodality Long Context and Next Generation Agentic Capabilities. arxiv:https:\/\/arXiv.org\/abs\/2507.06261\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2507.06261"},{"key":"e_1_3_3_2_15_2","unstructured":"Jiawei Gu Xuhui Jiang Zhichao Shi Hexiang Tan Xuehao Zhai Chengjin Xu Wei Li Yinghan Shen Shengjie Ma Honghao Liu et\u00a0al. 2024. A Survey on LLM-as-a-Judge. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.15594 (2024)."},{"key":"e_1_3_3_2_16_2","unstructured":"Suriya Gunasekar Yi Zhang Jyoti Aneja Caio C\u00e9sar\u00a0Teodoro Mendes Allie\u00a0Del Giorno Sivakanth Gopi Mojan Javaheripi Piero Kauffmann Gustavo de Rosa Olli Saarikivi Adil Salim Shital Shah Harkirat\u00a0Singh Behl Xin Wang S\u00e9bastien Bubeck Ronen Eldan Adam\u00a0Tauman Kalai Yin\u00a0Tat Lee and Yuanzhi Li. 2023. Textbooks Are All You Need. arxiv:https:\/\/arXiv.org\/abs\/2306.11644\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2306.11644"},{"key":"e_1_3_3_2_17_2","doi-asserted-by":"publisher","DOI":"10.4324\/9780203825945"},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"publisher","DOI":"10.5555\/1857999.1858085"},{"key":"e_1_3_3_2_19_2","volume-title":"International Conference on Learning Representations","author":"Holtzman Ari","year":"2020","unstructured":"Ari Holtzman, Jan Buys, Li Du, Maxwell Forbes, and Yejin Choi. 2020. The Curious Case of Neural Text Degeneration. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=rygGQyrFvH"},{"key":"e_1_3_3_2_20_2","unstructured":"Mojan Javaheripi S\u00e9bastien Bubeck Marah Abdin Jyoti Aneja Sebastien Bubeck Caio C\u00e9sar\u00a0Teodoro Mendes Weizhu Chen Allie\u00a0Del Giorno Ronen Eldan Sivakanth Gopi Suriya Gunasekar Mojan Javaheripi Piero Kauffmann Yin\u00a0Tat Lee Yuanzhi Li Anh Nguyen Gustavo de Rosa Olli Saarikivi Adil Salim Shital Shah Michael Santacroce Harkirat\u00a0Singh Behl Adam\u00a0Taumann Kalai Xin Wang Rachel Ward Philipp Witte Cyril Zhang and Yi Zhang. 2023. Phi-2: The surprising power of small language models. https:\/\/www.microsoft.com\/en-us\/research\/blog\/phi-2-the-surprising-power-of-small-language-models\/"},{"key":"e_1_3_3_2_21_2","doi-asserted-by":"publisher","unstructured":"Enkelejda Kasneci Kathrin Sessler Stefan K\u00fcchemann Maria Bannert Daryna Dementieva Frank Fischer Urs Gasser Georg Groh Stephan G\u00fcnnemann Eyke H\u00fcllermeier Stephan Krusche Gitta Kutyniok Tilman Michaeli Claudia Nerdel J\u00fcrgen Pfeffer Oleksandra Poquet Michael Sailer Albrecht Schmidt Tina Seidel Matthias Stadler Jochen Weller Jochen Kuhn and Gjergji Kasneci. 2023. ChatGPT for good? On opportunities and challenges of large language models for education. Learning and Individual Differences 103 (2023) 102274. 10.1016\/j.lindif.2023.102274","DOI":"10.1016\/j.lindif.2023.102274"},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"J.\u00a0Richard Landis and Gary\u00a0G. Koch. 1977. The measurement of observer agreement for categorical data. Biometrics 33 1 (1977) 159\u2013174.","DOI":"10.2307\/2529310"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3706468.3706487"},{"key":"e_1_3_3_2_24_2","volume-title":"LAK \u201925: Proceedings of the 15th International Learning Analytics and Knowledge Conference","author":"Mart\u00ednez\u00a0Mon\u00e9s Alejandra","year":"2025","unstructured":"Alejandra Mart\u00ednez\u00a0Mon\u00e9s, Caitlin Mills, Jelena Jovanovic, and Serbia Xavier\u00a0Ochoa (Eds.). 2025. LAK \u201925: Proceedings of the 15th International Learning Analytics and Knowledge Conference. Association for Computing Machinery, New York, NY, USA."},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Mary\u00a0L McHugh. 2012. Interrater reliability: the kappa statistic. Biochemia medica 22 3 (2012) 276\u2013282.","DOI":"10.11613\/BM.2012.031"},{"key":"e_1_3_3_2_26_2","volume-title":"An Ethical and Equitable Vision of AI in Education: Learning across 28 Exploratory Projects","author":"Noakes Sierra","year":"2024","unstructured":"Sierra Noakes, Alison Shell, Alexis\u00a0M. Murillo, Parker Van\u00a0Nostrand, Pati Ruiz, Shayla Cornick, and Sana Karim. 2024. An Ethical and Equitable Vision of AI in Education: Learning across 28 Exploratory Projects. Technical Report ED671302. Digital Promise. https:\/\/eric.ed.gov\/?id=ED671302"},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-64299-9_12"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"publisher","unstructured":"Marie Tarrant Aimee Knierim Sasha\u00a0K. Hayes and James Ware. 2006. The frequency of item writing flaws in multiple-choice questions used in high stakes nursing assessments. Nurse Education Today 26 8 (2006) 662\u2013671. 10.1016\/j.nedt.2006.07.006Proceedings from the 1st Nurse Education International Conference.","DOI":"10.1016\/j.nedt.2006.07.006"},{"key":"e_1_3_3_2_29_2","doi-asserted-by":"publisher","DOI":"10.3115\/1073336.1073339"},{"key":"e_1_3_3_2_30_2","unstructured":"Shen Wang Tianlong Xu Hang Li Chaoli Zhang Joleen Liang Jiliang Tang Philip\u00a0S. Yu and Qingsong Wen. 2024. Large Language Models for Education: A Survey and Outlook. arxiv:https:\/\/arXiv.org\/abs\/2403.18105\u00a0[cs.CL] https:\/\/arxiv.org\/abs\/2403.18105"},{"key":"e_1_3_3_2_31_2","doi-asserted-by":"publisher","DOI":"10.5281\/zenodo.15870197"},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"e_1_3_3_2_33_2","series-title":"(NIPS \u201923)","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Zheng Lianmin","year":"2023","unstructured":"Lianmin Zheng, Wei-Lin Chiang, Ying Sheng, Siyuan Zhuang, Zhanghao Wu, Yonghao Zhuang, Zi Lin, Zhuohan Li, Dacheng Li, Eric\u00a0P. Xing, Hao Zhang, Joseph\u00a0E. Gonzalez, and Ion Stoica. 2023. Judging LLM-as-a-judge with MT-bench and Chatbot Arena. In Proceedings of the 37th International Conference on Neural Information Processing Systems (New Orleans, LA, USA) (NIPS \u201923). Curran Associates Inc., Red Hook, NY, USA, Article 2020."}],"event":{"name":"LAK 2026: LAK26: 16th International Learning Analytics and Knowledge Conference","location":"Bergen Norway","acronym":"LAK 2026"},"container-title":["Proceedings of the LAK26: 16th International Learning Analytics and Knowledge Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3785022.3785100","content-type":"text\/html","content-version":"vor","intended-application":"syndication"}],"deposited":{"date-parts":[[2026,4,25]],"date-time":"2026-04-25T09:45:00Z","timestamp":1777110300000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3785022.3785100"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,26]]},"references-count":32,"alternative-id":["10.1145\/3785022.3785100","10.1145\/3785022"],"URL":"https:\/\/doi.org\/10.1145\/3785022.3785100","relation":{},"subject":[],"published":{"date-parts":[[2026,4,26]]},"assertion":[{"value":"2026-04-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}