{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T06:21:14Z","timestamp":1742970074096,"version":"3.40.3"},"publisher-location":"Cham","reference-count":33,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319462264"},{"type":"electronic","value":"9783319462271"}],"license":[{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2016,1,1]],"date-time":"2016-01-01T00:00:00Z","timestamp":1451606400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2016]]},"DOI":"10.1007\/978-3-319-46227-1_30","type":"book-chapter","created":{"date-parts":[[2016,9,3]],"date-time":"2016-09-03T05:34:10Z","timestamp":1472880850000},"page":"475-491","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["Planning with Information-Processing Constraints and Model Uncertainty in Markov Decision Processes"],"prefix":"10.1007","author":[{"given":"Jordi","family":"Grau-Moya","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Felix","family":"Leibfried","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tim","family":"Genewein","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniel A.","family":"Braun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2016,9,4]]},"reference":[{"key":"30_CR1","volume-title":"Adaptive control","author":"KJ \u00c5str\u00f6m","year":"2013","unstructured":"\u00c5str\u00f6m, K.J., Wittenmark, B.: Adaptive control. Courier Corporation, Mineola (2013)"},{"key":"30_CR2","unstructured":"Bellman, R.: Dynamic Programming, 1st edn. Princeton University Press, Princeton (1957). http:\/\/books.google.com\/books?id=fyVtp3EMxasC&pg=PR5&dq=dynamic+programming+richard+e+bellman&client=firefox-a#v=onepage&q=dynamic%20programming%20richard%20e%20bellman&f=false"},{"key":"30_CR3","volume-title":"Neuro-Dynamic Programming","author":"D Bertsekas","year":"1996","unstructured":"Bertsekas, D., Tsitsiklis, J.: Neuro-Dynamic Programming. Athena Scientific, Belmont (1996)"},{"key":"30_CR4","doi-asserted-by":"crossref","unstructured":"Braun, D.A., Ortega, P.A., Theodorou, E., Schaal, S.: Path integral control and bounded rationality. In: 2011 IEEE Symposium on Adaptive Dynamic Programming And Reinforcement Learning (ADPRL), pp. 202\u2013209. IEEE (2011)","DOI":"10.1109\/ADPRL.2011.5967366"},{"key":"30_CR5","unstructured":"van den Broek, B., Wiegerinck, W., Kappen, H.J.: Risk sensitive path integral control. In: UAI (2010)"},{"key":"30_CR6","unstructured":"Chow, Y., Tamar, A., Mannor, S., Pavone, M.: Risk-sensitive and robust decision-making: a CVaR optimization approach. In: Advances in Neural Information Processing Systems, pp. 1522\u20131530 (2015)"},{"key":"30_CR7","unstructured":"Duff, M.O.: Optimal learning: computational procedures for Bayes-adaptive Markov decision processes. Ph.d. thesis, University of Massachusetts Amherst (2002)"},{"key":"30_CR8","unstructured":"Fox, R., Pakman, A., Tishby, N.: G-learning: Taming the noise in reinforcement learning via soft updates. arXiv preprint (2015). arXiv:1512.08562"},{"key":"30_CR9","first-page":"1573","volume":"16","author":"A Geramifard","year":"2015","unstructured":"Geramifard, A., Dann, C., Klein, R.H., Dabney, W., How, J.P.: Rlpy: a value-function-based reinforcement learning framework for education and research. J. Mach. Learn. Res. 16, 1573\u20131578 (2015)","journal-title":"J. Mach. Learn. Res."},{"key":"30_CR10","unstructured":"Guez, A., Silver, D., Dayan, P.: Efficient Bayes-adaptive reinforcement learning using sample-based search. In: Advances in Neural Information Processing Systems, pp. 1025\u20131033 (2012)"},{"key":"30_CR11","doi-asserted-by":"crossref","first-page":"841","DOI":"10.1613\/jair.4117","volume":"48","author":"A Guez","year":"2013","unstructured":"Guez, A., Silver, D., Dayan, P.: Scalable and efficient Bayes-adaptive reinforcement learning based on Monte-Carlo tree search. J. Artif. Intell. Res. 48, 841\u2013883 (2013)","journal-title":"J. Artif. Intell. Res."},{"key":"30_CR12","doi-asserted-by":"publisher","DOI":"10.1515\/9781400829385","volume-title":"Robustness","author":"LP Hansen","year":"2008","unstructured":"Hansen, L.P., Sargent, T.J.: Robustness. Princeton University Press, Princeton (2008)"},{"issue":"2","key":"30_CR13","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1287\/moor.1040.0129","volume":"30","author":"GN Iyengar","year":"2005","unstructured":"Iyengar, G.N.: Robust dynamic programming. Math. Oper. Res. 30(2), 257\u2013280 (2005)","journal-title":"Math. Oper. Res."},{"issue":"20","key":"30_CR14","doi-asserted-by":"publisher","first-page":"200201","DOI":"10.1103\/PhysRevLett.95.200201","volume":"95","author":"HJ Kappen","year":"2005","unstructured":"Kappen, H.J.: Linear theory for control of nonlinear stochastic systems. Phys. Rev. Lett. 95(20), 200201 (2005)","journal-title":"Phys. Rev. Lett."},{"issue":"2","key":"30_CR15","doi-asserted-by":"publisher","first-page":"308","DOI":"10.1287\/mnsc.1060.0614","volume":"53","author":"S Mannor","year":"2007","unstructured":"Mannor, S., Simester, D., Sun, P., Tsitsiklis, J.N.: Bias and variance approximation in value function estimates. Manag. Sci. 53(2), 308\u2013322 (2007)","journal-title":"Manag. Sci."},{"issue":"5","key":"30_CR16","doi-asserted-by":"publisher","first-page":"780","DOI":"10.1287\/opre.1050.0216","volume":"53","author":"A Nilim","year":"2005","unstructured":"Nilim, A., El Ghaoui, L.: Robust control of Markov decision processes with uncertain transition matrices. Oper. Res. 53(5), 780\u2013798 (2005)","journal-title":"Oper. Res."},{"key":"30_CR17","doi-asserted-by":"crossref","unstructured":"Ortega, P.A., Braun, D.A.: A Bayesian rule for adaptive control based on causal interventions. In: 3rd Conference on Artificial General Intelligence (AGI-2010), Atlantis Press (2010)","DOI":"10.2991\/agi.2010.39"},{"issue":"11","key":"30_CR18","doi-asserted-by":"crossref","first-page":"475","DOI":"10.1613\/jair.3062","volume":"38","author":"PA Ortega","year":"2010","unstructured":"Ortega, P.A., Braun, D.A.: A minimum relative entropy principle for learning and acting. J. Artif. Intell. Res. 38(11), 475\u2013511 (2010)","journal-title":"J. Artif. Intell. Res."},{"key":"30_CR19","doi-asserted-by":"publisher","first-page":"20120683","DOI":"10.1098\/rspa.2012.0683","volume":"469","author":"PA Ortega","year":"2013","unstructured":"Ortega, P.A., Braun, D.A.: Thermodynamics as a theory of decision-making with information-processing costs. Proc. R. Soc. A. 469, 20120683 (2013). The Royal Society","journal-title":"Proc. R. Soc. A."},{"issue":"1","key":"30_CR20","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1186\/2194-3206-2-2","volume":"2","author":"PA Ortega","year":"2014","unstructured":"Ortega, P.A., Braun, D.A.: Generalized Thompson sampling for sequential decision-making and causal inference. Complex Adapt. Syst. Model. 2(1), 2 (2014)","journal-title":"Complex Adapt. Syst. Model."},{"key":"30_CR21","doi-asserted-by":"crossref","unstructured":"Ortega, P.A., Braun, D.A., Tishby, N.: Monte Carlo methods for exact & efficient solution of the generalized optimality equations. In: 2014 IEEE International Conference on Robotics and Automation (ICRA), pp. 4322\u20134327. IEEE (2014)","DOI":"10.1109\/ICRA.2014.6907488"},{"key":"30_CR22","unstructured":"Osogami, T.: Robustness and risk-sensitivity in Markov decision processes. In: Advances in Neural Information Processing Systems, pp. 233\u2013241 (2012)"},{"key":"30_CR23","doi-asserted-by":"crossref","unstructured":"Peters, J., M\u00fclling, K., Altun, Y., Poole, F.D., et al.: Relative entropy policy search. In: Twenty-Fourth National Conference on Artificial Intelligence (AAAI-10), pp. 1607\u20131612. AAAI Press (2010)","DOI":"10.1609\/aaai.v24i1.7727"},{"key":"30_CR24","first-page":"1729","volume":"12","author":"S Ross","year":"2011","unstructured":"Ross, S., Pineau, J., Chaib-draa, B., Kreitmann, P.: A Bayesian approach for learning and planning in partially observable Markov decision processes. J. Mach. Learn. Res. 12, 1729\u20131770 (2011)","journal-title":"J. Mach. Learn. Res."},{"key":"30_CR25","series-title":"Intelligent Systems Reference Library","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1007\/978-3-642-24647-0_3","volume-title":"Decision Making with Imperfect Decision Makers","author":"J Rubin","year":"2012","unstructured":"Rubin, J., Shamir, O., Tishby, N.: Trading value and information in MDPs. In: Guy, T.V., K\u00e1rn\u00fd, M., Wolpert, D.H. (eds.) Decision Making with Imperfect Decision Makers. Intelligent Systems Reference Library, vol. 28, pp. 57\u201374. Springer, Heidelberg (2012)"},{"issue":"7","key":"30_CR26","doi-asserted-by":"publisher","first-page":"1298","DOI":"10.1162\/NECO_a_00600","volume":"26","author":"Y Shen","year":"2014","unstructured":"Shen, Y., Tobia, M.J., Sommer, T., Obermayer, K.: Risk-sensitive reinforcement learning. Neural Comput. 26(7), 1298\u20131328 (2014)","journal-title":"Neural Comput."},{"key":"30_CR27","first-page":"2413","volume":"10","author":"AL Strehl","year":"2009","unstructured":"Strehl, A.L., Li, L., Littman, M.L.: Reinforcement learning in finite MDPs: Pac analysis. J. Mach. Learn. Res. 10, 2413\u20132444 (2009)","journal-title":"J. Mach. Learn. Res."},{"key":"30_CR28","doi-asserted-by":"crossref","unstructured":"Szita, I., L\u0151rincz, A.: The many faces of optimism: a unifying approach. In: Proceedings of the 25th International Conference on Machine Learning, pp. 1048\u20131055. ACM (2008)","DOI":"10.1145\/1390156.1390288"},{"key":"30_CR29","unstructured":"Szita, I., Szepesv\u00e1ri, C.: Model-based reinforcement learning with nearly tight exploration complexity bounds. In: Proceedings of the 27th International Conference on Machine Learning (ICML-10), pp. 1031\u20131038 (2010)"},{"key":"30_CR30","series-title":"Springer Series in Cognitive and Neural Systems","doi-asserted-by":"publisher","first-page":"601","DOI":"10.1007\/978-1-4419-1452-1_19","volume-title":"Perception-Action Cycle","author":"N Tishby","year":"2011","unstructured":"Tishby, N., Polani, D.: Information theory of decisions and actions. In: Cutsuridis, V., Hussain, A., Taylor, J.G. (eds.) Perception-Action Cycle. Springer Series in Cognitive and Neural Systems, pp. 601\u2013636. Springer, New York (2011)"},{"key":"30_CR31","doi-asserted-by":"crossref","unstructured":"Todorov, E.: Linearly-solvable Markov decision problems. In: Advances in Neural Information Processing Systems, pp. 1369\u20131376 (2006)","DOI":"10.7551\/mitpress\/7503.003.0176"},{"issue":"28","key":"30_CR32","doi-asserted-by":"publisher","first-page":"11478","DOI":"10.1073\/pnas.0710743106","volume":"106","author":"E Todorov","year":"2009","unstructured":"Todorov, E.: Efficient computation of optimal actions. Proc. Nat. Acad. Sci. 106(28), 11478\u201311483 (2009)","journal-title":"Proc. Nat. Acad. Sci."},{"issue":"1","key":"30_CR33","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1287\/moor.1120.0566","volume":"38","author":"W Wiesemann","year":"2013","unstructured":"Wiesemann, W., Kuhn, D., Rustem, B.: Robust Markov decision processes. Math. Oper. Res. 38(1), 153\u2013183 (2013)","journal-title":"Math. Oper. Res."}],"container-title":["Lecture Notes in Computer Science","Machine Learning and Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-46227-1_30","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,19]],"date-time":"2023-08-19T23:24:14Z","timestamp":1692487454000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-319-46227-1_30"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016]]},"ISBN":["9783319462264","9783319462271"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-46227-1_30","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2016]]},"assertion":[{"value":"4 September 2016","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Riva del Garda","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2016","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2016","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 September 2016","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2016","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}