{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T21:15:01Z","timestamp":1769634901759,"version":"3.49.0"},"publisher-location":"Cham","reference-count":39,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030232030","type":"print"},{"value":"9783030232047","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-23204-7_45","type":"book-chapter","created":{"date-parts":[[2019,6,20]],"date-time":"2019-06-20T21:05:01Z","timestamp":1561064701000},"page":"544-556","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Hierarchical Reinforcement Learning for Pedagogical Policy Induction"],"prefix":"10.1007","author":[{"given":"Guojing","family":"Zhou","sequence":"first","affiliation":[]},{"given":"Hamoon","family":"Azizsoltani","sequence":"additional","affiliation":[]},{"given":"Markel Sanz","family":"Ausin","sequence":"additional","affiliation":[]},{"given":"Tiffany","family":"Barnes","sequence":"additional","affiliation":[]},{"given":"Min","family":"Chi","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,6,21]]},"reference":[{"issue":"July","key":"45_CR1","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1016\/j.engappai.2018.06.007","volume":"74","author":"H Azizsoltani","year":"2018","unstructured":"Azizsoltani, H., Sadeghi, E.: Adaptive sequential strategy for risk estimation of engineering systems using gaussian process regression active learning. Eng. Appl. Artif. Intell. 74(July), 146\u2013165 (2018)","journal-title":"Eng. Appl. Artif. Intell."},{"issue":"1\u20132","key":"45_CR2","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1023\/A:1022140919877","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto, A.G., Mahadevan, S.: Recent advances in hierarchical reinforcement learning. Discrete Event Dyn. Syst. 13(1\u20132), 41\u201377 (2003)","journal-title":"Discrete Event Dyn. Syst."},{"issue":"552\u2013557","key":"45_CR3","first-page":"1","volume":"2000","author":"J Beck","year":"2000","unstructured":"Beck, J., Woolf, B.P., Beal, C.R.: ADVISOR: a machine learning architecture for intelligent tutor construction. AAAI\/IAAI 2000(552\u2013557), 1\u20132 (2000)","journal-title":"AAAI\/IAAI"},{"issue":"1\u20132","key":"45_CR4","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1007\/s11257-010-9093-1","volume":"21","author":"M Chi","year":"2011","unstructured":"Chi, M., VanLehn, K., Litman, D., Jordan, P.: Empirically evaluating the application of reinforcement learning to the induction of effective and adaptive pedagogical strategies. User Model. User Adap. Inter. 21(1\u20132), 137\u2013180 (2011)","journal-title":"User Model. User Adap. Inter."},{"key":"45_CR5","unstructured":"Clement, B., Oudeyer, P.Y., Lopes, M.: A comparison of automatic teaching strategies for heterogeneous student populations. In: EDM 2016\u20139th International Conference on Educational Data Mining (2016)"},{"key":"45_CR6","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1007\/978-3-642-14749-4_27","volume-title":"Spatial Cognition VII","author":"H Cuay\u00e1huitl","year":"2010","unstructured":"Cuay\u00e1huitl, H., Dethlefs, N., Frommberger, L., Richter, K.-F., Bateman, J.: Generating adaptive route instructions using hierarchical reinforcement learning. In: H\u00f6lscher, C., Shipley, T.F., Olivetti Belardinelli, M., Bateman, J.A., Newcombe, N.S. (eds.) Spatial Cognition 2010. LNCS (LNAI), vol. 6222, pp. 319\u2013334. Springer, Heidelberg (2010). https:\/\/doi.org\/10.1007\/978-3-642-14749-4_27"},{"key":"45_CR7","doi-asserted-by":"crossref","unstructured":"Evens, M., Michael, J.: One-on-One Tutoring by Humans and Computers. Psychology Press (2006)","DOI":"10.4324\/9781410617071"},{"issue":"4","key":"45_CR8","doi-asserted-by":"publisher","first-page":"1261","DOI":"10.1109\/TIT.2005.844072","volume":"51","author":"D Guo","year":"2005","unstructured":"Guo, D., Shamai, S., Verd\u00fa, S.: Mutual information and minimum mean-square error in Gaussian channels. IEEE Trans. Inf. Theor. 51(4), 1261\u20131282 (2005)","journal-title":"IEEE Trans. Inf. Theor."},{"issue":"1","key":"45_CR9","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1007\/s10489-008-0115-1","volume":"31","author":"A Iglesias","year":"2009","unstructured":"Iglesias, A., Mart\u00ednez, P., Aler, R., Fern\u00e1ndez, F.: Learning teaching strategies in an adaptive and intelligent educational system through reinforcement learning. Appl. Intell. 31(1), 89\u2013106 (2009)","journal-title":"Appl. Intell."},{"issue":"4","key":"45_CR10","doi-asserted-by":"publisher","first-page":"266","DOI":"10.1016\/j.knosys.2009.01.007","volume":"22","author":"A Iglesias","year":"2009","unstructured":"Iglesias, A., Mart\u00ednez, P., Aler, R., Fern\u00e1ndez, F.: Reinforcement learning of pedagogical policies in adaptive and intelligent educational systems. Knowl. Psychol. Press-Based Syst. 22(4), 266\u2013270 (2009)","journal-title":"Knowl. Psychol. Press-Based Syst."},{"key":"45_CR11","unstructured":"Kulkarni, T.D., Narasimhan, K., Saeedi, A., Tenenbaum, J.: Hierarchical deep reinforcement learning: integrating temporal abstraction and intrinsic motivation. In: Advances in Neural Information Processing Systems, pp. 3675\u20133683 (2016)"},{"key":"45_CR12","doi-asserted-by":"crossref","unstructured":"Lajoie, S.P., Derry, S.J.: Motivational techniques of expert human tutors: lessons for the design of computer-based tutors. In: Computers as Cognitive Tools, pp. 83\u2013114. Routledge (2013)","DOI":"10.4324\/9780203052594-8"},{"key":"45_CR13","unstructured":"Mandel, T., Liu, Y.E., Levine, S., Brunskill, E., Popovic, Z.: Offline policy evaluation across representations with applications to educational games. In: Proceedings of the 2014 International Conference on Autonomous Agents and Multi-agent Systems, pp. 1077\u20131084. International Foundation for Autonomous Agents and Multiagent Systems (2014)"},{"key":"45_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1007\/978-3-319-07221-0_44","volume-title":"Intelligent Tutoring Systems","author":"BM McLaren","year":"2014","unstructured":"McLaren, B.M., van Gog, T., Ganoe, C., Yaron, D., Karabinos, M.: Exploring the assistance dilemma: comparing instructional support in examples and problems. In: Trausan-Matu, S., Boyer, K.E., Crosby, M., Panourgia, K. (eds.) ITS 2014. LNCS, vol. 8474, pp. 354\u2013361. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-07221-0_44"},{"key":"45_CR15","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1007\/978-3-642-21869-9_30","volume-title":"Artificial Intelligence in Education","author":"BM McLaren","year":"2011","unstructured":"McLaren, B.M., Isotani, S.: When is it best to learn with all worked examples? In: Biswas, G., Bull, S., Kay, J., Mitrovic, A. (eds.) AIED 2011. LNCS (LNAI), vol. 6738, pp. 222\u2013229. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-21869-9_30"},{"key":"45_CR16","unstructured":"McLaren, B.M., Lim, S.J., Koedinger, K.R.: When and how often should worked examples be given to students? New results and a summary of the current state of research. In: CogSci, pp. 2176\u20132181 (2008)"},{"key":"45_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1007\/978-3-319-08786-3_15","volume-title":"User Modeling, Adaptation, and Personalization","author":"AS Najar","year":"2014","unstructured":"Najar, A.S., Mitrovic, A., McLaren, B.M.: Adaptive support versus alternating worked examples and tutored problems: which leads to better learning? In: Dimitrova, V., Kuflik, T., Chin, D., Ricci, F., Dolog, P., Houben, G.-J. (eds.) UMAP 2014. LNCS, vol. 8538, pp. 171\u2013182. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-08786-3_15"},{"issue":"4","key":"45_CR18","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1145\/3072959.3073602","volume":"36","author":"XB Peng","year":"2017","unstructured":"Peng, X.B., Berseth, G., Yin, K., Van De Panne, M.: DeepLoco: dynamic locomotion skills using hierarchical deep reinforcement learning. ACM Trans. Graph. (TOG) 36(4), 41 (2017)","journal-title":"ACM Trans. Graph. (TOG)"},{"issue":"6","key":"45_CR19","doi-asserted-by":"publisher","first-page":"1290","DOI":"10.1111\/cogs.12290","volume":"40","author":"AN Rafferty","year":"2016","unstructured":"Rafferty, A.N., Brunskill, E., Griffiths, T.L., Shafto, P.: Faster teaching via POMDP planning. Cogn. Sci. 40(6), 1290\u20131332 (2016)","journal-title":"Cogn. Sci."},{"key":"45_CR20","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1007\/978-3-540-28650-9_4","volume-title":"Advanced Lectures on Machine Learning","author":"CE Rasmussen","year":"2004","unstructured":"Rasmussen, C.E.: Gaussian processes in machine learning. In: Bousquet, O., von Luxburg, U., R\u00e4tsch, G. (eds.) ML -2003. LNCS (LNAI), vol. 3176, pp. 63\u201371. Springer, Heidelberg (2004). https:\/\/doi.org\/10.1007\/978-3-540-28650-9_4"},{"issue":"4","key":"45_CR21","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1080\/00220970209599510","volume":"70","author":"A Renkl","year":"2002","unstructured":"Renkl, A., Atkinson, R.K., Maier, U.H., Staley, R.: From example study to problem solving: smooth transitions help learning. J. Exp. Educ. 70(4), 293\u2013315 (2002)","journal-title":"J. Exp. Educ."},{"key":"45_CR22","unstructured":"Ryan, M., Reid, M.: Learning to fly: an application of hierarchical reinforcement learning. In: Proceedings of the 17th International Conference on Machine Learning. Citeseer (2000)"},{"issue":"3","key":"45_CR23","doi-asserted-by":"publisher","first-page":"289","DOI":"10.1007\/s11251-009-9107-8","volume":"38","author":"RJ Salden","year":"2010","unstructured":"Salden, R.J., Aleven, V., Schwonke, R., Renkl, A.: The expertise reversal effect and worked examples in tutored problem solving. Instr. Sci. 38(3), 289\u2013307 (2010)","journal-title":"Instr. Sci."},{"key":"45_CR24","unstructured":"Schaul, T., Quan, J., Antonoglou, I., Silver, D.: Prioritized experience replay. arXiv preprint arXiv:1511.05952 (2015)"},{"issue":"9\u201310","key":"45_CR25","doi-asserted-by":"publisher","first-page":"1569","DOI":"10.1007\/s10994-017-5650-8","volume":"106","author":"D Schwab","year":"2017","unstructured":"Schwab, D., Ray, S.: Offline reinforcement learning with task hierarchies. Mach. Learn. 106(9\u201310), 1569\u20131598 (2017)","journal-title":"Mach. Learn."},{"issue":"2","key":"45_CR26","doi-asserted-by":"publisher","first-page":"258","DOI":"10.1016\/j.chb.2008.12.011","volume":"25","author":"R Schwonke","year":"2009","unstructured":"Schwonke, R., Renkl, A., Krieg, C., Wittwer, J., Aleven, V., Salden, R.: The worked-example effect: not an artefact of lousy control conditions. Comput. Hum. Behav. 25(2), 258\u2013266 (2009)","journal-title":"Comput. Hum. Behav."},{"key":"45_CR27","doi-asserted-by":"crossref","unstructured":"Shen, S., Ausin, M.S., Mostafavi, B., Chi, M.: Improving learning & reducing time: a constrained action-based reinforcement learning approach. In: Proceedings of the 26th Conference on User Modeling, Adaptation and Personalization, pp. 43\u201351. ACM (2018)","DOI":"10.1145\/3209219.3209232"},{"key":"45_CR28","doi-asserted-by":"crossref","unstructured":"Shen, S., Chi, M.: Reinforcement learning: the sooner the better, or the later the better? In: Proceedings of the 2016 Conference on User Modeling Adaptation and Personalization, pp. 37\u201344. ACM (2016)","DOI":"10.1145\/2930238.2930247"},{"key":"45_CR29","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1007\/978-3-642-21869-9_45","volume-title":"Artificial Intelligence in Education","author":"JC Stamper","year":"2011","unstructured":"Stamper, J.C., Eagle, M., Barnes, T., Croy, M.: Experimental evaluation of automatic hint generation for a logic tutor. In: Biswas, G., Bull, S., Kay, J., Mitrovic, A. (eds.) AIED 2011. LNCS (LNAI), vol. 6738, pp. 345\u2013352. Springer, Heidelberg (2011). https:\/\/doi.org\/10.1007\/978-3-642-21869-9_45"},{"issue":"1\u20132","key":"45_CR30","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between MDPs and semi-MDPs: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112(1\u20132), 181\u2013211 (1999)","journal-title":"Artif. Intell."},{"issue":"1","key":"45_CR31","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1207\/s1532690xci0201_3","volume":"2","author":"J Sweller","year":"1985","unstructured":"Sweller, J., Cooper, G.A.: The use of worked examples as a substitute for problem solving in learning algebra. Cogn. Instr. 2(1), 59\u201389 (1985)","journal-title":"Cogn. Instr."},{"key":"45_CR32","unstructured":"Thomas, P., Brunskill, E.: Data-efficient off-policy policy evaluation for reinforcement learning. In: International Conference on Machine Learning, pp. 2139\u20132148 (2016)"},{"issue":"3","key":"45_CR33","doi-asserted-by":"publisher","first-page":"212","DOI":"10.1016\/j.cedpsych.2010.10.004","volume":"36","author":"T Van Gog","year":"2011","unstructured":"Van Gog, T., Kester, L., Paas, F.: Effects of worked examples, example-problem, and problem-example pairs on novices learning. Contemp. Educ. Psychol. 36(3), 212\u2013218 (2011)","journal-title":"Contemp. Educ. Psychol."},{"key":"45_CR34","doi-asserted-by":"crossref","unstructured":"Van Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double Q-learning. In: AAAI, vol. 2, p. 5. Phoenix, Nairobi (2016)","DOI":"10.1609\/aaai.v30i1.10295"},{"issue":"3","key":"45_CR35","first-page":"227","volume":"16","author":"K Vanlehn","year":"2006","unstructured":"Vanlehn, K.: The behavior of tutoring systems. IJAIED 16(3), 227\u2013265 (2006)","journal-title":"IJAIED"},{"key":"45_CR36","doi-asserted-by":"crossref","unstructured":"Wang, P., Rowe, J., Min, W., Mott, B., Lester, J.: Interactive narrative personalization with deep reinforcement learning. In: Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence (2017)","DOI":"10.24963\/ijcai.2017\/538"},{"key":"45_CR37","doi-asserted-by":"crossref","unstructured":"Wang, X., Chen, W., Wu, J., Wang, Y.F., Yang Wang, W.: Video captioning via hierarchical reinforcement learning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4213\u20134222 (2018)","DOI":"10.1109\/CVPR.2018.00443"},{"key":"45_CR38","doi-asserted-by":"crossref","unstructured":"Williams, J.D.: The best of both worlds: unifying conventional dialog systems and POMDPs. In: Interspeech, pp. 1173\u20131176 (2008)","DOI":"10.21437\/Interspeech.2008-355"},{"key":"45_CR39","unstructured":"Zhou, G., Wang, J., Lynch, C., Chi, M.: Towards closing the loop: bridging machine-induced pedagogical policies to learning theories. In: EDM (2017)"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in Education"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-23204-7_45","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,7]],"date-time":"2024-03-07T14:04:22Z","timestamp":1709820262000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-23204-7_45"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030232030","9783030232047"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-23204-7_45","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"21 June 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"AIED","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Intelligence in Education","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chicago, IL","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"USA","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25 June 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 June 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aied2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/caed-lab.com\/aied2019\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}