{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,25]],"date-time":"2025-09-25T15:53:31Z","timestamp":1758815611408},"publisher-location":"Berlin, Heidelberg","reference-count":31,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642352881"},{"type":"electronic","value":"9783642352898"}],"license":[{"start":{"date-parts":[[2012,1,1]],"date-time":"2012-01-01T00:00:00Z","timestamp":1325376000000},"content-version":"unspecified","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2012]]},"DOI":"10.1007\/978-3-642-35289-8_39","type":"book-chapter","created":{"date-parts":[[2012,11,14]],"date-time":"2012-11-14T12:03:17Z","timestamp":1352894597000},"page":"735-757","source":"Crossref","is-referenced-by-count":15,"title":["10 Steps and Some Tricks to Set up Neural Reinforcement Controllers"],"prefix":"10.1007","author":[{"given":"Martin","family":"Riedmiller","sequence":"first","affiliation":[]}],"member":"297","reference":[{"key":"39_CR1","unstructured":"Bertsekas, D.P.: Dynamic Programming and Optimal Control, vol. I, II. Athena Scientific, Belmont (1995)"},{"key":"39_CR2","doi-asserted-by":"crossref","unstructured":"Blum, M., Springenberg, J.T., W\u00fclfing, J., Riedmiller, M.: A Learned Feature Descriptor for Object Recognition in RGB-D Data. In: Proceedings of the IEEE International Conference on Robotics and Automation (ICRA), St. Paul, Minnesota, USA (2012)","DOI":"10.1109\/ICRA.2012.6225188"},{"key":"39_CR3","unstructured":"Bertsekas, D.P., Tsitsiklis, J.N.: Neuro Dynamic Programming. Athena Scientific, Belmont (1996)"},{"issue":"7\u20139","key":"39_CR4","doi-asserted-by":"publisher","first-page":"1508","DOI":"10.1016\/j.neucom.2008.12.019","volume":"72","author":"M.P. Deisenroth","year":"2009","unstructured":"Deisenroth, M.P., Rasmussen, C.E., Peters, J.: Gaussian Process Dynamic Programming. Neurocomputing\u00a072(7\u20139), 1508\u20131524 (2009)","journal-title":"Neurocomputing"},{"key":"39_CR5","first-page":"503","volume":"6","author":"D. Ernst","year":"2005","unstructured":"Ernst, D., Wehenkel, L., Geurts, P.: Tree-based batch mode reinforcement learning. Journal of Machine Learning Research\u00a06, 503\u2013556 (2005)","journal-title":"Journal of Machine Learning Research"},{"key":"39_CR6","doi-asserted-by":"crossref","unstructured":"Gabel, T., Lutz, C., Riedmiller, M.: Improved Neural Fitted Q Iteration Applied to a Novel Computer Gaming and Learning Benchmark. In: Proceedings of the IEEE Symposium on Approximate Dynamic Programming and Reinforcement Learning (ADPRL 2011), Paris, France. IEEE Press (April 2011)","DOI":"10.1109\/ADPRL.2011.5967361"},{"key":"39_CR7","doi-asserted-by":"crossref","unstructured":"Gabel, T., Riedmiller, M.: On Experiences in a Complex and Competitive Gaming Domain: Reinforcement Learning Meets RoboCup. In: Proceedings of the IEEE Symposium on Computational Intelligence and Games, Honolulu, USA (2007)","DOI":"10.1109\/CIG.2007.368074"},{"key":"39_CR8","unstructured":"Gabel, T., Riedmiller, M.: Adaptive Reactive Job-Shop Scheduling with Reinforcement Learning Agents. International Journal of Information Technology and Intelligent Computing\u00a024(4) (2008)"},{"key":"39_CR9","unstructured":"Hafner, R., Riedmiller, M.: Reinforcement learning on an omnidirectional mobile robot. In: Proceedings of the 2003 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS 2003), Las Vegas (2003)"},{"key":"39_CR10","doi-asserted-by":"crossref","unstructured":"Hafner, R., Riedmiller, M.: Neural Reinforcement Learning Controllers for a Real Robot Application. In: Proceedings of the IEEE International Conference on Robotics and Automation (ICRA 2007), Rome, Italy (2007)","DOI":"10.1109\/ROBOT.2007.363631"},{"key":"39_CR11","doi-asserted-by":"crossref","unstructured":"Hafner, R., Riedmiller, M.: Reinforcement learning in feedback control. Machine Learning\u00a027(1), 55\u201374 (2011), 10.1007\/s10994-011-5235-x","DOI":"10.1007\/s10994-011-5235-x"},{"key":"39_CR12","unstructured":"Hans, A., Schneegass, D., Sch\u00e4fer, A.M., Udluft, S.: Safe exploration for reinforcement learning. In: ESANN, pp. 143\u2013148 (2008)"},{"key":"39_CR13","doi-asserted-by":"crossref","unstructured":"Kietzmann, T., Riedmiller, M.: The Neuro Slot Car Racer: Reinforcement Learning in a Real World Setting. In: Proceedings of the Int. Conference on Machine Learning Applications (ICMLA 2009), Miami, Florida. Springer (December 2009)","DOI":"10.1109\/ICMLA.2009.15"},{"key":"39_CR14","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"9","DOI":"10.1007\/3-540-49430-8_2","volume-title":"Neural Networks: Tricks of the Trade","author":"Y. LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Orr, G.B., M\u00fcller, K.-R.: Efficient backProp. In: Orr, G.B., M\u00fcller, K.-R. (eds.) NIPS-WS 1996. LNCS, vol.\u00a01524, pp. 9\u201350. Springer, Heidelberg (1998)"},{"key":"39_CR15","doi-asserted-by":"crossref","unstructured":"Lange, S., Riedmiller, M.: Deep auto-encoder neural networks in reinforcement learning. In: International Joint Conference on Neural Networks (IJCNN 2010), Barcelona, Spain (2010)","DOI":"10.1109\/IJCNN.2010.5596468"},{"key":"39_CR16","unstructured":"Lange, S., Riedmiller, M.: Deep learning of visual control policies. In: European Symposium on Artificial Neural Networks, Computational Intelligence and Machine Learning (ESANN 2010), Brugge, Belgium (2010)"},{"key":"39_CR17","unstructured":"Riedmiller, M., Braun, H.: A direct adaptive method for faster backpropagation learning: The RPROP algorithm. In: Ruspini, H. (ed.) Proceedings of the IEEE International Conference on Neural Networks (ICNN), San Francisco, pp. 586\u2013591 (1993)"},{"key":"39_CR18","doi-asserted-by":"crossref","unstructured":"Riedmiller, M., Gabel, T.: Distributed Policy Search Reinforcement Learning for Job-Shop Scheduling Tasks. TPRS International Journal of Production Research\u00a050(1) (2012); Available online from (May 2011)","DOI":"10.1080\/00207543.2011.571443"},{"issue":"1","key":"39_CR19","doi-asserted-by":"publisher","first-page":"55","DOI":"10.1007\/s10514-009-9120-4","volume":"27","author":"M. Riedmiller","year":"2009","unstructured":"Riedmiller, M., Gabel, T., Hafner, R., Lange, S.: Reinforcement Learning for Robot Soccer. Autonomous Robots\u00a027(1), 55\u201374 (2009)","journal-title":"Autonomous Robots"},{"key":"39_CR20","doi-asserted-by":"crossref","unstructured":"Riedmiller, M., Hafner, R., Lange, S., Lauer, M.: Learning to Dribble on a Real Robot by Success and Failure. In: Proceedings of the 2008 International Conference on Robotics and Automation (ICRA 2008), Pasadena CA. Springer (2008) (video presentation)","DOI":"10.1109\/ROBOT.2008.4543536"},{"key":"39_CR21","unstructured":"Riedmiller, M.: Learning to control dynamic systems. In: Trappl, R. (ed.) Proceedings of the 13th European Meeting on Cybernetics and Systems Research, EMCSR 1996 (1996)"},{"key":"39_CR22","unstructured":"Riedmiller, M.: Generating continuous control signals for reinforcement controllers using dynamic output elements. In: European Symposium on Artificial Neural Networks, ESANN 1997, Bruges (1997)"},{"key":"39_CR23","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"317","DOI":"10.1007\/11564096_32","volume-title":"Machine Learning: ECML 2005","author":"M. Riedmiller","year":"2005","unstructured":"Riedmiller, M.: Neural Fitted Q Iteration - First Experiences with a Data Efficient Neural Reinforcement Learning Method. In: Gama, J., Camacho, R., Brazdil, P.B., Jorge, A.M., Torgo, L. (eds.) ECML 2005. LNCS (LNAI), vol.\u00a03720, pp. 317\u2013328. Springer, Heidelberg (2005)"},{"key":"39_CR24","unstructured":"Riedmiller, M.: Neural reinforcement learning to swing-up and balance a real pole. In: Proc. of the Int. Conference on Systems, Man and Cybernetics, 2005, Big Island, USA (October 2005)"},{"key":"39_CR25","unstructured":"Riedmiller, M., Lange, S., Voigtl\u00e4nder, A.: Autonomous reinforcement learning on raw visual input data in a real world application. In: Proceedings of the International Joint Conference on Neural Networks, Brisbane, Australia (2012)"},{"key":"39_CR26","doi-asserted-by":"crossref","unstructured":"Riedmiller, M., Montemerlo, M., Dahlkamp, H.: Learning to Drive in 20 Minutes. In: Proceedings of the FBIT 2007 Conference, Jeju, Korea. Springer (2007) (Best Paper Award)","DOI":"10.1109\/FBIT.2007.37"},{"key":"39_CR27","volume-title":"Reinforcement Learning","author":"R.S. Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning. MIT Press, Cambridge (1998)"},{"key":"39_CR28","first-page":"1038","volume-title":"Advances in Neural Information Processing Systems","author":"R.S. Sutton","year":"1996","unstructured":"Sutton, R.S.: Generalization in reinforcement learning: Successful examples using sparse coarse coding. In: Touretzky, D.S., Mozer, M.C., Hasselmo, M.E. (eds.) Advances in Neural Information Processing Systems, vol.\u00a08, pp. 1038\u20131044. MIT Press, Cambridge (1996)"},{"key":"39_CR29","doi-asserted-by":"crossref","unstructured":"Timmer, S., Riedmiller, M.: Fitted Q Iteration with CMACs. In: Proceedings of the IEEE International Symposium on Approximate Dynamic Programming and Reinforcement Learning (ADPRL 2007), Honolulu, USA (2007)","DOI":"10.1109\/ADPRL.2007.368162"},{"key":"39_CR30","unstructured":"Watkins, C.J.: Learning from Delayed Rewards. Phd thesis, Cambridge University (1989)"},{"key":"39_CR31","series-title":"Lecture Notes in Artificial Intelligence","doi-asserted-by":"publisher","first-page":"442","DOI":"10.1007\/978-3-540-74958-5_41","volume-title":"Machine Learning: ECML 2007","author":"T.J. Walsh","year":"2007","unstructured":"Walsh, T.J., Nouri, A., Li, L., Littman, M.L.: Planning and Learning in Environments with Delayed Feedback. In: Kok, J.N., Koronacki, J., Lopez de Mantaras, R., Matwin, S., Mladeni\u010d, D., Skowron, A. (eds.) ECML 2007. LNCS (LNAI), vol.\u00a04701, pp. 442\u2013453. Springer, Heidelberg (2007)"}],"container-title":["Lecture Notes in Computer Science","Neural Networks: Tricks of the Trade"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-35289-8_39","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,31]],"date-time":"2022-01-31T08:43:18Z","timestamp":1643618598000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-35289-8_39"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012]]},"ISBN":["9783642352881","9783642352898"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-35289-8_39","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2012]]}}}