{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,18]],"date-time":"2025-12-18T19:55:46Z","timestamp":1766087746585,"version":"3.40.3"},"publisher-location":"Cham","reference-count":44,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030782917"},{"type":"electronic","value":"9783030782924"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-78292-4_18","type":"book-chapter","created":{"date-parts":[[2021,6,10]],"date-time":"2021-06-10T21:03:59Z","timestamp":1623359039000},"page":"215-227","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Evaluating Critical Reinforcement Learning Framework in the Field"],"prefix":"10.1007","author":[{"given":"Song","family":"Ju","sequence":"first","affiliation":[]},{"given":"Guojing","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Mark","family":"Abdelshiheed","sequence":"additional","affiliation":[]},{"given":"Tiffany","family":"Barnes","sequence":"additional","affiliation":[]},{"given":"Min","family":"Chi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,6,11]]},"reference":[{"key":"18_CR1","unstructured":"Andrychowicz, M., Baker, B., et al.: Learning dexterous in-hand manipulation. arXiv preprint arXiv:1808.00177 (2018)"},{"key":"18_CR2","unstructured":"Ausin, M.S., Azizsoltani, H., Barnes, T., Chi, M.: Leveraging deep reinforcement learning for pedagogical policy induction in an intelligent tutoring system. In: EDM (2019)"},{"key":"18_CR3","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"472","DOI":"10.1007\/978-3-030-52237-7_38","volume-title":"Artificial Intelligence in Education","author":"M Sanz Ausin","year":"2020","unstructured":"Sanz Ausin, M., Maniktala, M., Barnes, T., Chi, M.: Exploring the impact of simple explanations and agency on batch deep reinforcement learning induced pedagogical Policies. In: Bittencourt, I.I., Cukurova, M., Muldner, K., Luckin, R., Mill\u00e1n, E. (eds.) AIED 2020. LNCS (LNAI), vol. 12163, pp. 472\u2013485. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-52237-7_38"},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Ausin, M.S., Maniktala, M., Barnes, T., Chi, M.: Tackling the credit assignment problem in reinforcement learning-induced pedagogical policies with neural networks. In: AIED (2021)","DOI":"10.1007\/978-3-030-78292-4_29"},{"issue":"1\u20132","key":"18_CR5","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1023\/A:1022140919877","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto, A.G., Mahadevan, S.: Recent advances in hierarchical reinforcement learning. Discret. Event Dyn. Syst. 13(1\u20132), 41\u201377 (2003). https:\/\/doi.org\/10.1023\/A:1022140919877","journal-title":"Discret. Event Dyn. Syst."},{"key":"18_CR6","unstructured":"Beck, J., Woolf, B.P., Beal, C.R.: Advisor: a machine learning architecture for intelligent tutor construction. In: AAAI\/IAAI, pp. 552\u2013557 (2000)"},{"issue":"1\u20132","key":"18_CR7","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1007\/s11257-010-9093-1","volume":"21","author":"M Chi","year":"2011","unstructured":"Chi, M., VanLehn, K., Litman, D., Jordan, P.: Empirically evaluating the application of reinforcement learning to the induction of effective and adaptive pedagogical strategies. User Model. User-Adap. Inter. 21(1\u20132), 137\u2013180 (2011). https:\/\/doi.org\/10.1007\/s11257-010-9093-1","journal-title":"User Model. User-Adap. Inter."},{"key":"18_CR8","unstructured":"Clouse, J.A.: On integrating apprentice learning and reinforcement learning. Ph.D. thesis (1996)"},{"key":"18_CR9","doi-asserted-by":"publisher","first-page":"21","DOI":"10.3390\/make1010002","volume":"1","author":"A Fachantidis","year":"2017","unstructured":"Fachantidis, A., Taylor, M.E., Vlahavas, I.P.: Learning to teach reinforcement learning agents. Mach. Learn. Knowl. Extract. 1, 21\u201342 (2017)","journal-title":"Mach. Learn. Knowl. Extract."},{"issue":"4","key":"18_CR10","doi-asserted-by":"publisher","first-page":"266","DOI":"10.1016\/j.knosys.2009.01.007","volume":"22","author":"A Iglesias","year":"2009","unstructured":"Iglesias, A., Mart\u00ednez, P., Aler, R., Fern\u00e1ndez, F.: Reinforcement learning of pedagogical policies in adaptive and intelligent educational systems. Knowl.-Based Syst. 22(4), 266\u2013270 (2009)","journal-title":"Knowl.-Based Syst."},{"key":"18_CR11","unstructured":"Ju, S., Zhou, G., Azizsoltani, H., Barnes, T., Chi, M.: Identifying critical pedagogical decisions through adversarial deep reinforcement learning. In: EDM (2019)"},{"key":"18_CR12","unstructured":"Ju, S., Zhou, G., Barnes, T., Chi, M.: Pick the moment: identifying critical pedagogical decisions using long-short term rewards. In: EDM (2020)"},{"key":"18_CR13","doi-asserted-by":"crossref","unstructured":"Li, J., Daw, N.D.: Signals in human striatum are appropriate for policy update rather than value prediction, 31 (2011)","DOI":"10.1523\/JNEUROSCI.6316-10.2011"},{"key":"18_CR14","unstructured":"Mandel, T., Liu, Y.E., Levine, S., Brunskill, E., Popovic, Z.: Offline policy evaluation across representations with applications to educational games. In: AAMAS, pp. 1077\u20131084 (2014)"},{"key":"18_CR15","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1126\/science.1100907","volume":"306","author":"SM McClure","year":"2004","unstructured":"McClure, S.M., Laibson, D.I., Loewenstein, G., Cohen, J.D.: Separate neural systems value immediate and delayed monetary rewards. Science 306, 503\u2013507 (2004)","journal-title":"Science"},{"key":"18_CR16","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1007\/978-3-642-21869-9_30","volume-title":"Artificial Intelligence in Education","author":"BM McLaren","year":"2011","unstructured":"McLaren, B.M., Isotani, S.: When is it best to learn with all worked examples? In: Biswas, G., Bull, S., Kay, J., Mitrovic, A. (eds.) AIED 2011. LNCS (LNAI), vol. 6738, pp. 222\u2013229. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-21869-9_30"},{"key":"18_CR17","unstructured":"McLaren, B.M., Lim, S.J., Koedinger, K.R.: When and how often should worked examples be given to students? New results and a summary of the current state of research. In: CogSci, pp. 2176\u20132181 (2008)"},{"key":"18_CR18","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015)","journal-title":"Nature"},{"issue":"8","key":"18_CR19","doi-asserted-by":"publisher","first-page":"1057","DOI":"10.1038\/nn1743","volume":"9","author":"G Morris","year":"2006","unstructured":"Morris, G., Nevet, A., Arkadir, D., Vaadia, E., Bergman, H.: Midbrain dopamine neurons encode decisions for future action. Nat. Neurosci. 9(8), 1057\u20131063 (2006)","journal-title":"Nat. Neurosci."},{"key":"18_CR20","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1007\/978-3-319-08786-3_15","volume-title":"User Modeling, Adaptation, and Personalization","author":"AS Najar","year":"2014","unstructured":"Najar, A.S., Mitrovic, A., McLaren, B.M.: Adaptive support versus alternating worked examples and tutored problems: which leads to better learning? In: Dimitrova, V., Kuflik, T., Chin, D., Ricci, F., Dolog, P., Houben, G.-J. (eds.) UMAP 2014. LNCS, vol. 8538, pp. 171\u2013182. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-08786-3_15"},{"key":"18_CR21","doi-asserted-by":"crossref","unstructured":"Narasimhan, K., Kulkarni, T., Barzilay, R.: Language understanding for text-based games using deep reinforcement learning. arXiv preprint arXiv:1506.08941 (2015)","DOI":"10.18653\/v1\/D15-1001"},{"issue":"6","key":"18_CR22","doi-asserted-by":"publisher","first-page":"1290","DOI":"10.1111\/cogs.12290","volume":"40","author":"AN Rafferty","year":"2016","unstructured":"Rafferty, A.N., Brunskill, E., et al.: Faster teaching via POMDP planning. Cogn. Sci. 40(6), 1290\u20131332 (2016)","journal-title":"Cogn. Sci."},{"issue":"4","key":"18_CR23","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1080\/00220970209599510","volume":"70","author":"A Renkl","year":"2002","unstructured":"Renkl, A., Atkinson, R.K., Maier, U.H., Staley, R.: From example study to problem solving: smooth transitions help learning. J. Exp. Educ. 70(4), 293\u2013315 (2002)","journal-title":"J. Exp. Educ."},{"issue":"12","key":"18_CR24","doi-asserted-by":"publisher","first-page":"1615","DOI":"10.1038\/nn2013","volume":"10","author":"MR Roesch","year":"2007","unstructured":"Roesch, M.R., Calu, D.J., Schoenbaum, G.: Dopamine neurons encode the better option in rats deciding between different delayed or sized rewards. Nat. Neurosci. 10(12), 1615\u20131624 (2007)","journal-title":"Nat. Neurosci."},{"key":"18_CR25","unstructured":"Rowe, J., Mott, B., Lester, J.: Optimizing player experience in interactive narrative planning: a modular reinforcement learning approach. In: Tenth Artificial Intelligence and Interactive Digital Entertainment Conference (2014)"},{"key":"18_CR26","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"419","DOI":"10.1007\/978-3-319-19773-9_42","volume-title":"Artificial Intelligence in Education","author":"JP Rowe","year":"2015","unstructured":"Rowe, J.P., Lester, J.C.: Improving student problem solving in narrative-centered learning environments: a modular reinforcement learning framework. In: Conati, C., Heffernan, N., Mitrovic, A., Verdejo, M.F. (eds.) AIED 2015. LNCS (LNAI), vol. 9112, pp. 419\u2013428. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-19773-9_42"},{"key":"18_CR27","doi-asserted-by":"crossref","unstructured":"Shen, S., Ausin, M.S., Mostafavi, B., Chi, M.: Improving learning & reducing time: a constrained action-based reinforcement learning approach. In: UMAP (2018)","DOI":"10.1145\/3209219.3209232"},{"key":"18_CR28","unstructured":"Shen, S., Chi, M.: Aim low: correlation-based feature selection for model-based reinforcement learning. In: EDM (2016)"},{"key":"18_CR29","doi-asserted-by":"crossref","unstructured":"Shen, S., Chi, M.: Reinforcement learning: the sooner the better, or the later the better? In: Proceedings of the 2016 Conference on User Modeling Adaptation and Personalization, pp. 37\u201344. ACM (2016)","DOI":"10.1145\/2930238.2930247"},{"key":"18_CR30","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"327","DOI":"10.1007\/978-3-319-93846-2_61","volume-title":"Artificial Intelligence in Education","author":"S Shen","year":"2018","unstructured":"Shen, S., Mostafavi, B., Lynch, C., Barnes, T., Chi, M.: Empirically evaluating the effectiveness of POMDP vs. MDP towards the pedagogical strategies induction. In: Penstein Ros\u00e9, C., et al. (eds.) AIED 2018. LNCS (LNAI), vol. 10948, pp. 327\u2013331. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-93846-2_61"},{"issue":"7587","key":"18_CR31","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016)","journal-title":"Nature"},{"issue":"6419","key":"18_CR32","doi-asserted-by":"publisher","first-page":"1140","DOI":"10.1126\/science.aar6404","volume":"362","author":"D Silver","year":"2018","unstructured":"Silver, D., Hubert, T., Schrittwieser, J., et al.: A general reinforcement learning algorithm that masters chess, shogi, and go through self-play. Science 362(6419), 1140\u20131144 (2018)","journal-title":"Science"},{"key":"18_CR33","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1007\/978-3-642-21869-9_45","volume-title":"Artificial Intelligence in Education","author":"JC Stamper","year":"2011","unstructured":"Stamper, J.C., Eagle, M., Barnes, T., Croy, M.: Experimental evaluation of automatic hint generation for a logic tutor. In: Biswas, G., Bull, S., Kay, J., Mitrovic, A. (eds.) AIED 2011. LNCS (LNAI), vol. 6738, pp. 345\u2013352. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-21869-9_45"},{"issue":"9","key":"18_CR34","doi-asserted-by":"publisher","first-page":"1202","DOI":"10.1038\/nn.2881","volume":"14","author":"JH Sul","year":"2011","unstructured":"Sul, J.H., Jo, S., Lee, D., Jung, M.W.: Role of rodent secondary motor cortex in value-based action selection. Nat. Neurosci. 14(9), 1202\u20131208 (2011)","journal-title":"Nat. Neurosci."},{"issue":"1\u20132","key":"18_CR35","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPs and semi-MDPs: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112(1\u20132), 181\u2013211 (1999)","journal-title":"Artif. Intell."},{"key":"18_CR36","unstructured":"Torrey, L., Taylor, M.E.: Teaching on a budget: agents advising agents in reinforcement learning. In: International conference on Autonomous Agents and Multi-Agent Systems, AAMAS 2013, pp. 1053\u20131060 (2013)"},{"issue":"3","key":"18_CR37","doi-asserted-by":"publisher","first-page":"212","DOI":"10.1016\/j.cedpsych.2010.10.004","volume":"36","author":"T Van Gog","year":"2011","unstructured":"Van Gog, T., Kester, L., Paas, F.: Effects of worked examples, example-problem, and problem-example pairs on novices\u2019 learning. Contemp. Educ. Psychol. 36(3), 212\u2013218 (2011)","journal-title":"Contemp. Educ. Psychol."},{"key":"18_CR38","doi-asserted-by":"crossref","unstructured":"Wang, P., Rowe, J., Min, W., Mott, B., Lester, J.: Interactive narrative personalization with deep reinforcement learning. In: Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence (2017)","DOI":"10.24963\/ijcai.2017\/538"},{"key":"18_CR39","unstructured":"Zhou, G.: Big, little, or both? Exploring the impact of granularity on learning for students with different incoming competence. In: CogSci (2019)"},{"key":"18_CR40","unstructured":"Zhou, G., et al.: Towards closing the loop: bridging machine-induced pedagogical policies to learning theories. In: EDM (2017)"},{"key":"18_CR41","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"544","DOI":"10.1007\/978-3-030-23204-7_45","volume-title":"Artificial Intelligence in Education","author":"G Zhou","year":"2019","unstructured":"Zhou, G., Azizsoltani, H., Ausin, M.S., Barnes, T., Chi, M.: Hierarchical reinforcement learning for pedagogical policy induction. In: Isotani, S., Mill\u00e1n, E., Ogan, A., Hastings, P., McLaren, B., Luckin, R. (eds.) AIED 2019. LNCS (LNAI), vol. 11625, pp. 544\u2013556. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-23204-7_45"},{"key":"18_CR42","unstructured":"Zhou, G., Price, T.W., Lynch, C., Barnes, T., Chi, M.: The impact of granularity on worked examples and problem solving. In: CogSci, pp. 2817\u20132822 (2015)"},{"key":"18_CR43","doi-asserted-by":"crossref","unstructured":"Zhou, G., Yang, X., Azizsoltani, H., Barnes, T., Chi, M.: Improving student-tutor interaction through data-driven explanation of hierarchical reinforcement induced pedagogical policies. In: UMAP. ACM (2020)","DOI":"10.1145\/3340631.3394848"},{"key":"18_CR44","unstructured":"Zimmer, M., Viappiani, P., Weng, P.: Teacher-student framework: a reinforcement learning approach. In: AAMAS Workshop Autonomous Robots and Multirobot Systems (2013)"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-78292-4_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,30]],"date-time":"2022-12-30T17:22:36Z","timestamp":1672420956000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-78292-4_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030782917","9783030782924"],"references-count":44,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-78292-4_18","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"11 June 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIED","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence in Education","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Utrecht","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"The Netherlands","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 June 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 June 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aied2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aied2021.science.uu.nl\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"209","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"40","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"76","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"19% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Due to the COVID-19 pandemic the conference was held online.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}