{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,2]],"date-time":"2026-06-02T23:09:46Z","timestamp":1780441786410,"version":"3.54.1"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030304836","type":"print"},{"value":"9783030304843","type":"electronic"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-30484-3_48","type":"book-chapter","created":{"date-parts":[[2019,9,8]],"date-time":"2019-09-08T23:02:47Z","timestamp":1567983767000},"page":"595-607","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Leveraging Domain Knowledge for Reinforcement Learning Using MMC Architectures"],"prefix":"10.1007","author":[{"given":"Rajkumar","family":"Ramamurthy","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Christian","family":"Bauckhage","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rafet","family":"Sifa","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jannis","family":"Sch\u00fccker","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Stefan","family":"Wrobel","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2019,9,9]]},"reference":[{"key":"48_CR1","unstructured":"Brockman, G., et al.: OpenAI Gym. \n                      arXiv:1606.01540\n                      \n                     (2016)"},{"key":"48_CR2","doi-asserted-by":"crossref","unstructured":"Clavera, I., Held, D., Abbeel, P.: Policy transfer via modularity and reward guiding. In: Proceedings Intelligent Robots and Systems (2017)","DOI":"10.1109\/IROS.2017.8205959"},{"key":"48_CR3","unstructured":"Conti, E., Madhavan, V., Such, F.P., Lehman, J., Stanley, K.O., Clune, J.: Improving exploration in evolution strategies for deep reinforcement learning via a population of novelty-seeking agents. arXiv preprint \n                      arXiv:1712.06560\n                      \n                     (2017)"},{"issue":"7\u20138","key":"48_CR4","doi-asserted-by":"publisher","first-page":"1435","DOI":"10.1016\/S0893-6080(98)00067-7","volume":"11","author":"H Cruse","year":"1998","unstructured":"Cruse, H., Kindermann, T., Schumm, M., Dean, J., Schmitz, J.: Walknet-a-biologically inspired network to control six-legged walking. Neural Networks 11(7\u20138), 1435\u20131447 (1998)","journal-title":"Neural Networks"},{"key":"48_CR5","doi-asserted-by":"crossref","unstructured":"Devin, C., Gupta, A., Darrell, T., Abbeel, P., Levine, S.: Learning modular neural network policies for multi-task and multi-robot transfer. In: Proceedings International Conference on Robotics and Automation (2017)","DOI":"10.1109\/ICRA.2017.7989250"},{"issue":"2","key":"48_CR6","doi-asserted-by":"publisher","first-page":"321","DOI":"10.1016\/0004-3702(94)90047-7","volume":"71","author":"M Dorigo","year":"1994","unstructured":"Dorigo, M., Colombetti, M.: Robot shaping: developing autonomous agents through learning. Artif. Intell. 71(2), 321\u2013370 (1994)","journal-title":"Artif. Intell."},{"key":"48_CR7","doi-asserted-by":"crossref","unstructured":"Gu, S., Holly, E., Lillicrap, T., Levine, S.: Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: Proceedings International Conference on Robotics and Automation (2017)","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"48_CR8","unstructured":"Ho, J., Ermon, S.: Generative adversarial imitation learning. In: Proceedings Neural Information Processing Systems (2016)"},{"key":"48_CR9","doi-asserted-by":"crossref","unstructured":"Lehman, J., Stanley, K.O.: Evolving a diversity of virtual creatures through novelty search and local competition. In: Proceedings International Conference on Genetic and Evolutionary Computation (2011)","DOI":"10.1145\/2001576.2001606"},{"issue":"1","key":"48_CR10","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1007\/s10458-015-9283-7","volume":"30","author":"R Loftin","year":"2016","unstructured":"Loftin, R., et al.: Learning behaviors via human-delivered discrete feedback: modeling implicit feedback strategies to speed up learning. Auton. Agent. Multi-Agent Syst. 30(1), 30\u201359 (2016)","journal-title":"Auton. Agent. Multi-Agent Syst."},{"key":"48_CR11","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1007\/978-3-642-33492-4_6","volume-title":"Discovery Science","author":"F Maes","year":"2012","unstructured":"Maes, F., Fonteneau, R., Wehenkel, L., Ernst, D.: Policy search in a space of simple closed-form formulas: towards interpretability of reinforcement learning. In: Ganascia, J.-G., Lenca, P., Petit, J.-M. (eds.) DS 2012. LNCS (LNAI), vol. 7569, pp. 37\u201351. Springer, Heidelberg (2012). \n                      https:\/\/doi.org\/10.1007\/978-3-642-33492-4_6"},{"key":"48_CR12","doi-asserted-by":"crossref","unstructured":"Mataric, M.J.: Reward Functions for Accelerated Learning. In: Machine Learning Proceedings 1994 (1994)","DOI":"10.1016\/B978-1-55860-335-6.50030-1"},{"key":"48_CR13","unstructured":"Mirowski, P., et al.: Learning to navigate in complex environments. arXiv preprint \n                      arXiv:1611.03673\n                      \n                     (2016)"},{"issue":"7540","key":"48_CR14","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529 (2015)","journal-title":"Nature"},{"key":"48_CR15","unstructured":"M\u00fcller, M., Dosovitskiy, A., Ghanem, B., Koltun, V.: Driving policy transfer via modularity and abstraction. arXiv preprint \n                      arXiv:1804.09364\n                      \n                     (2018)"},{"key":"48_CR16","unstructured":"Peng, B., MacGlashan, J., Loftin, R., Littman, M.L., Roberts, D.L., Taylor, M.E.: A need for speed: adapting agent action speed to improve task learning from non-expert humans. In: Proceedings of the 2016 International Conference on Autonomous Agents & Multiagent Systems (2016)"},{"key":"48_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-01424-7_1","volume-title":"Artificial Neural Networks and Machine Learning \u2013 ICANN 2018","author":"R Ramamurthy","year":"2018","unstructured":"Ramamurthy, R., Bauckhage, C., Sifa, R., Wrobel, S.: Policy learning using SPSA. In: K\u016frkov\u00e1, V., Manolopoulos, Y., Hammer, B., Iliadis, L., Maglogiannis, I. (eds.) ICANN 2018. LNCS, vol. 11141, pp. 3\u201312. Springer, Cham (2018). \n                      https:\/\/doi.org\/10.1007\/978-3-030-01424-7_1"},{"issue":"3","key":"48_CR18","doi-asserted-by":"publisher","first-page":"400","DOI":"10.1214\/aoms\/1177729586","volume":"22","author":"H Robbins","year":"1951","unstructured":"Robbins, H., Monro, S.: A stochastic approximation method. Annals of Mathematical Statistics 22(3), 400\u2013407 (1951)","journal-title":"Annals of Mathematical Statistics"},{"key":"48_CR19","unstructured":"Salimans, T., Ho, J., Chen, X., Sutskever, I.: Evolution strategies as a scalable alternative to reinforcement learning. \n                      arXiv:1703.03864\n                      \n                     (2017)"},{"key":"48_CR20","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust Region Policy Optimization. In: Proceedings International Conference on Machine Learning (2015)"},{"key":"48_CR21","unstructured":"Silver, D., Lever, G., Heess, N., Degris, T., Wierstra, D., Riedmiller, M.: Deterministic policy gradient algorithms. In: Proceedings International Conference on Machine Learning (2014)"},{"issue":"7676","key":"48_CR22","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver, D., et al.: Mastering the game of go without human knowledge. Nature 550(7676), 354 (2017)","journal-title":"Nature"},{"issue":"3","key":"48_CR23","doi-asserted-by":"publisher","first-page":"332","DOI":"10.1109\/9.119632","volume":"37","author":"JC Spall","year":"1992","unstructured":"Spall, J.C.: Multivariate stochastic approximation using a simultaneous perturbation gradient approximation. IEEE Trans. Autom. Control 37(3), 332\u2013341 (1992)","journal-title":"IEEE Trans. Autom. Control"},{"issue":"6","key":"48_CR24","doi-asserted-by":"publisher","first-page":"457","DOI":"10.1007\/s004220050495","volume":"79","author":"U Steink\u00fchler","year":"1998","unstructured":"Steink\u00fchler, U., Cruse, H.: A holistic model for an internal representation to control the movement of a manipulator with redundant degrees of freedom. Biol. Cybern. 79(6), 457\u2013466 (1998)","journal-title":"Biol. Cybern."},{"key":"48_CR25","unstructured":"Suay, H.B., Brys, T., Taylor, M.E., Chernova, S.: Learning from demonstration for shaping through inverse reinforcement learning. In: Proceedings of the 2016 International Conference on Autonomous Agents & Multiagent Systems (2016)"},{"key":"48_CR26","unstructured":"Tassa, Y., et al.: DeepMind Control Suite. arXiv preprint \n                      arXiv:1801.00690\n                      \n                     (2018)"},{"key":"48_CR27","unstructured":"Verma, A., Murali, V., Singh, R., Kohli, P., Chaudhuri, S.: Programmatically interpretable reinforcement learning. arXiv preprint \n                      arXiv:1804.02477\n                      \n                     (2018)"},{"key":"48_CR28","doi-asserted-by":"crossref","unstructured":"Zhu, Y., et al.: Target-driven visual navigation in indoor scenes using deep reinforcement learning. In: Proceedings International Conference on Robotics and Automation (2017)","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"48_CR29","doi-asserted-by":"crossref","unstructured":"Zhu, Y., et al.: Reinforcement and imitation learning for diverse visuomotor skills. arXiv preprint \n                      arXiv:1802.09564\n                      \n                     (2018)","DOI":"10.15607\/RSS.2018.XIV.009"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2019: Deep Learning"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-30484-3_48","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,8]],"date-time":"2019-09-08T23:06:23Z","timestamp":1567983983000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-30484-3_48"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030304836","9783030304843"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-30484-3_48","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"9 September 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/e-nns.org\/icann2019\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}