{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,3]],"date-time":"2025-07-03T04:21:53Z","timestamp":1751516513833,"version":"3.41.0"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030778569"},{"type":"electronic","value":"9783030778576"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-3-030-77857-6_8","type":"book-chapter","created":{"date-parts":[[2021,7,2]],"date-time":"2021-07-02T23:04:56Z","timestamp":1625267096000},"page":"120-136","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Teaching Reinforcement Learning Agents with Adaptive Instructional Systems"],"prefix":"10.1007","author":[{"given":"Joost","family":"van Oijen","sequence":"first","affiliation":[]},{"given":"Armon","family":"Toubman","sequence":"additional","affiliation":[]},{"given":"Olivier","family":"Claessen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,7,3]]},"reference":[{"key":"8_CR1","unstructured":"Sottilare, R., Brawner, K.: Exploring standardization opportunities by examining interaction between common adaptive instructional system components. In: Proceedings of the First Adaptive Instructional Systems (AIS) Standards Workshop, Orlando, Florida (2018)"},{"key":"8_CR2","unstructured":"Berner, C., et al.: Dota 2 with large scale deep reinforcement learning. arXiv preprint arXiv:1912.06680 (2019)"},{"key":"8_CR3","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction. MIT Press, Cambridge (2018)"},{"key":"8_CR4","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518, 529\u2013533 (2015)","journal-title":"Nature"},{"key":"8_CR5","unstructured":"Brockman, G., et al.: OpenAI Gym (2016)"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Lake, B.M., Ullman, T.D., Tenenbaum, J.B., Gershman, S.J.: Building machines that learn and think like people. Behav. Brain Sci. 40 (2017)","DOI":"10.1017\/S0140525X16001837"},{"key":"8_CR7","doi-asserted-by":"crossref","unstructured":"Taylor, M.E., Stone, P.: Transfer learning for reinforcement learning domains: a survey. J. Mach. Learn. Res. 10 (2009)","DOI":"10.1007\/978-3-642-01882-4_2"},{"key":"8_CR8","doi-asserted-by":"publisher","unstructured":"van Oijen, J., Roessingh, J.J., Poppinga, G., Garc\u00eda, V.: Learning analytics of playing space fortress with reinforcement learning. In: International Conference on Human-Computer Interaction. pp. 363\u2013378. Springer (2019). https:\/\/doi.org\/10.1007\/978-3-030-22341-0_29","DOI":"10.1007\/978-3-030-22341-0_29"},{"key":"8_CR9","doi-asserted-by":"crossref","unstructured":"Oquab, M., Bottou, L., Laptev, I., Sivic, J.: Learning and transferring mid-level image representations using convolutional neural networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1717\u20131724 (2014)","DOI":"10.1109\/CVPR.2014.222"},{"key":"8_CR10","unstructured":"Brown, T.B., et al.: Language models are few-shot learners. arXiv preprint arXiv:2005.14165 (2020)"},{"key":"8_CR11","doi-asserted-by":"crossref","unstructured":"M Spronck, P.H., Ponsen, M.J.V., Sprinkhuizen-Kuyper, I.G., Postma, E.O.: Adaptive game AI with dynamic scripting. Mach. Learn. 63, 217\u2013248 (2006)","DOI":"10.1007\/s10994-006-6205-6"},{"key":"8_CR12","first-page":"3675","volume":"29","author":"TD Kulkarni","year":"2016","unstructured":"Kulkarni, T.D., Narasimhan, K., Saeedi, A., Tenenbaum, J.: Hierarchical deep reinforcement learning: Integrating temporal abstraction and intrinsic motivation. Adv. Neural. Inf. Process. Syst. 29, 3675\u20133683 (2016)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"8_CR13","doi-asserted-by":"publisher","unstructured":"Branch, R.M.: Instructional Design: The ADDIE Approach. Springer, Boston (2009). https:\/\/doi.org\/10.1007\/978-0-387-09506-6","DOI":"10.1007\/978-0-387-09506-6"},{"key":"8_CR14","doi-asserted-by":"crossref","unstructured":"Albawi, S., Mohammed, T.A., Al-Zawi, S.: Understanding of a convolutional neural network. In: 2017 International Conference on Engineering and Technology (ICET), pp. 1\u20136. IEEE (2017)","DOI":"10.1109\/ICEngTechnol.2017.8308186"},{"key":"8_CR15","unstructured":"Bakker, B.: Reinforcement learning with long short-term memory. In: NIPS, pp. 1475\u20131482 (2001)"},{"key":"8_CR16","unstructured":"Rae, J.W., Potapenko, A., Jayakumar, S.M., Lillicrap, T.P.: Compressive transformers for long-range sequence modelling. arXiv preprint arXiv:1911.05507 (2019)"},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Hedegaard, M.: The zone of proximal development as basis for instruction. In: Moll, L.C.E. (ed.) Vygotsky and Education: Instructional Implications and Applications of Sociohistorical Psychology, pp. 349\u2013371. Cambridge University Press (1990)","DOI":"10.1017\/CBO9781139173674.017"},{"key":"8_CR18","unstructured":"Fleer, S.: Scaffolding for learning from reinforcement: Improving interaction learning (2020)"},{"key":"8_CR19","unstructured":"Laud, A.D.: Theory and application of reward shaping in reinforcement learning (2004)"},{"key":"8_CR20","unstructured":"Niehaus, J., Riedl, M.O.: Scenario adaptation: An approach to customizing computer-based training games and simulations. In: Proceedings of the AIED 2009 Workshop on intelligent Educational Games, pp. 89\u201398 (2009)"},{"key":"8_CR21","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1016\/0010-0277(93)90058-4","volume":"48","author":"JL Elman","year":"1993","unstructured":"Elman, J.L.: Learning and development in neural networks: the importance of starting small. Cognition 48, 71\u201399 (1993)","journal-title":"Cognition"},{"key":"8_CR22","first-page":"1","volume":"21","author":"S Narvekar","year":"2020","unstructured":"Narvekar, S., Peng, B., Leonetti, M., Sinapov, J., Taylor, M.E., Stone, P.: Curriculum learning for reinforcement learning domains: a framework and survey. J. Mach. Learn. Res. 21, 1\u201350 (2020)","journal-title":"J. Mach. Learn. Res."},{"key":"8_CR23","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3054912","volume":"50","author":"A Hussein","year":"2017","unstructured":"Hussein, A., Gaber, M.M., Elyan, E., Jayne, C.: Imitation learning: a survey of learning methods. ACM Comput. Surv. (CSUR) 50, 1\u201335 (2017)","journal-title":"ACM Comput. Surv. (CSUR)"},{"key":"8_CR24","unstructured":"Borsa, D., Piot, B., Munos, R., Pietquin, O.: Observational learning by reinforcement learning. arXiv preprint arXiv:1706.06617 (2017)"},{"key":"8_CR25","unstructured":"Levine, S., Kumar, A., Tucker, G., Fu, J.: Offline reinforcement learning: Tutorial, review, and perspectives on open problems. arXiv preprint arXiv:2005.01643 (2020)"},{"key":"8_CR26","unstructured":"Fu, J., Kumar, A., Nachum, O., Tucker, G., Levine, S.: D4rl: datasets for deep data-driven reinforcement learning. arXiv preprint arXiv:2004.07219 (2020)"},{"key":"8_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"557","DOI":"10.1007\/978-3-030-22341-0_44","volume-title":"Adaptive Instructional Systems","author":"A Toubman","year":"2019","unstructured":"Toubman, A.: Validating air combat behaviour models for adaptive training of teams. In: Sottilare, R.A., Schwarz, J. (eds.) HCII 2019. LNCS, vol. 11597, pp. 557\u2013571. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-22341-0_44"},{"key":"8_CR28","first-page":"1437","volume":"16","author":"J Garc\u0131a","year":"2015","unstructured":"Garc\u0131a, J., Fern\u00e1ndez, F.: A comprehensive survey on safe reinforcement learning. J. Mach. Learn. Res. 16, 1437\u20131480 (2015)","journal-title":"J. Mach. Learn. Res."},{"key":"8_CR29","doi-asserted-by":"crossref","unstructured":"Mondesire, S.C., Wiegand, R.P.: A demonstration of stability-plasticity imbalance in multi-agent, decomposition-based learning. In: 2015 IEEE 14th International Conference on Machine Learning and Applications (ICMLA), pp. 1070\u20131075. IEEE (2015)","DOI":"10.1109\/ICMLA.2015.106"},{"key":"8_CR30","unstructured":"Sottilare, R.: Understanding the AIS problem space. In: Proceedings of the 2nd Adaptive Instructional Systems (AIS) Standards Workshop (2019)"},{"key":"8_CR31","unstructured":"Aubret, A., Matignon, L., Hassas, S.: A survey on intrinsic motivation in reinforcement learning. arXiv preprint arXiv:1908.06976 (2019)"},{"key":"8_CR32","unstructured":"Gupta, A., Eysenbach, B., Finn, C., Levine, S.: Unsupervised meta-learning for reinforcement learning. arXiv preprint arXiv:1806.04640 (2018)"},{"key":"8_CR33","doi-asserted-by":"crossref","unstructured":"Karli, M., Efe, M.\u00d6., Sever, H.: Air combat learning from F-16 flight information. In: 2017 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE), pp. 1\u20136. IEEE (2017)","DOI":"10.1109\/FUZZ-IEEE.2017.8015615"},{"key":"8_CR34","unstructured":"Toubman, A.: Calculated moves: Generating air combat behaviour. Ph.D. dissertation (2020)"},{"key":"8_CR35","doi-asserted-by":"publisher","first-page":"279","DOI":"10.3390\/electronics7110279","volume":"7","author":"X Zhang","year":"2018","unstructured":"Zhang, X., Liu, G., Yang, C., Wu, J.: Research on air confrontation maneuver decision-making method based on reinforcement learning. Electronics 7, 279 (2018)","journal-title":"Electronics"},{"key":"8_CR36","unstructured":"Doyle, M.J., Portrey, A.M.: Rapid adaptive realistic behavior modeling is viable for use in training. In: Proceedings of the 23rd Conference on Behavior Representation in Modeling and Simulation (BRIMS), pp. 73\u201380 (2014)"},{"key":"8_CR37","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"493","DOI":"10.1007\/978-3-030-22341-0_39","volume-title":"Adaptive Instructional Systems","author":"J Freeman","year":"2019","unstructured":"Freeman, J., Watz, E., Bennett, W.: Adaptive agents for adaptive tactical training: the state of the art and emerging requirements. In: Sottilare, R.A., Schwarz, J. (eds.) HCII 2019. LNCS, vol. 11597, pp. 493\u2013504. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-22341-0_39"},{"key":"8_CR38","unstructured":"van Oijen, J., Toubman, A., Poppinga, G.: Effective behaviour modelling for computer generated forces. In: Interservice\/Industry Training, Simulation and Education Conference (I\/ITSEC). I\/ITSEC (2019)"},{"key":"8_CR39","doi-asserted-by":"publisher","unstructured":"Warwick, W., Rodgers, S.: Wrong in the right way: balancing realism against other constraints in simulation-based training. In: Sottilare, R., Schwarz, J. (eds.) Adaptive Instructional Systems. HCII 2019. LNCS, vol 11597, pp. 379\u2013388. Springer (2019). https:\/\/doi.org\/10.1007\/978-3-030-22341-0_30","DOI":"10.1007\/978-3-030-22341-0_30"},{"key":"8_CR40","doi-asserted-by":"publisher","unstructured":"Ludwig, J., Presnell, B.: Developing an adaptive opponent for tactical training. In: Sottilare, R., Schwarz, J. (eds.) Adaptive Instructional Systems. HCII 2019. LNCS, vol 11597. pp. 532\u2013541. Springer (2019). https:\/\/doi.org\/10.1007\/978-3-030-22341-0_42","DOI":"10.1007\/978-3-030-22341-0_42"},{"key":"8_CR41","unstructured":"Luotsinen, L.J., L\u00f8vlid, R.A.: Data-driven behavior modeling for computer generated forces. In: NATO Modelling and Simulation Group Symposium M&S Support to Operational Tasks Including War Gaming, Logistics, Cyber Defence (MSG-133), pp. 1\u201313 (2015)"},{"key":"8_CR42","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"227","DOI":"10.1007\/978-3-030-22341-0_19","volume-title":"Adaptive Instructional Systems","author":"R Sottilare","year":"2019","unstructured":"Sottilare, R.: Exploring methods to promote interoperability in adaptive instructional systems. In: Sottilare, R.A., Schwarz, J. (eds.) HCII 2019. LNCS, vol. 11597, pp. 227\u2013238. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-22341-0_19"},{"key":"8_CR43","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1007\/978-3-030-50788-6_3","volume-title":"Adaptive Instructional Systems","author":"K Brawner","year":"2020","unstructured":"Brawner, K.: Bridging conceptual models and architectural interchange for adaptive instructional systems. In: Sottilare, R.A., Schwarz, J. (eds.) HCII 2020. LNCS, vol. 12214, pp. 34\u201344. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-50788-6_3"}],"container-title":["Lecture Notes in Computer Science","Adaptive Instructional Systems. Design and Evaluation"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-77857-6_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,2]],"date-time":"2025-07-02T22:21:13Z","timestamp":1751494873000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-77857-6_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9783030778569","9783030778576"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-77857-6_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"3 July 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"HCII","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Human-Computer Interaction","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24 July 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 July 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"hcii2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2021.hci.international\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}