{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T12:14:59Z","timestamp":1742991299462,"version":"3.40.3"},"publisher-location":"Cham","reference-count":25,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031746390"},{"type":"electronic","value":"9783031746406"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-74640-6_18","type":"book-chapter","created":{"date-parts":[[2024,12,31]],"date-time":"2024-12-31T23:00:32Z","timestamp":1735686032000},"page":"242-257","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning When to\u00a0Observe: A Frugal Reinforcement Learning Framework for\u00a0a\u00a0High-Cost World"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3567-7834","authenticated-orcid":false,"given":"Colin","family":"Bellinger","sequence":"first","affiliation":[]},{"given":"Isaac","family":"Tamblyn","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3921-4762","authenticated-orcid":false,"given":"Mark","family":"Crowley","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,1]]},"reference":[{"issue":"1\u20132","key":"18_CR1","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1023\/A:1022140919877","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto, A.G., Mahadevan, S.: Recent advances in hierarchical reinforcement learning. Discr. Event Dyn. Syst. 13(1\u20132), 41\u201377 (2003)","journal-title":"Discr. Event Dyn. Syst."},{"key":"18_CR2","unstructured":"Beeler, C., et\u00a0al.: Chemgymrl: An interactive framework for reinforcement learning for digital chemistry. arXiv preprint arXiv:2305.14177 (2023)"},{"key":"18_CR3","doi-asserted-by":"publisher","first-page":"253","DOI":"10.1613\/jair.3912","volume":"47","author":"MG Bellemare","year":"2013","unstructured":"Bellemare, M.G., Naddaf, Y., Veness, J., Bowling, M.: The arcade learning environment: an evaluation platform for general agents. J. Artif. Intell. Res. 47, 253\u2013279 (2013)","journal-title":"J. Artif. Intell. Res."},{"key":"18_CR4","doi-asserted-by":"crossref","unstructured":"Bellinger, C., Coles, R., Crowley, M., Tamblyn, I.: Active measure reinforcement learning for observation cost minimization. In: Proceedings of the Canadian Conference on Artificial Intelligence. Canadian Artificial Intelligence Association (CAIAC) (Jun 8 2021). https:\/\/caiac.pubpub.org\/pub\/3hn8s5v9","DOI":"10.21428\/594757db.72846d04"},{"key":"18_CR5","doi-asserted-by":"crossref","unstructured":"Bellinger, C., Drozdyuk, A., Crowley, M., Tamblyn, I.: Balancing information with observation costs in deep reinforcement learning. In: Proceedings of the Canadian Conference on Artificial Intelligence. Canadian Artificial Intelligence Association (CAIAC) (may 27 2022). https:\/\/caiac.pubpub.org\/pub\/0jmy7gpd","DOI":"10.21428\/594757db.8e09102d"},{"key":"18_CR6","doi-asserted-by":"crossref","unstructured":"Bellinger, C., Drozdyuk, A., Crowley, M., Tamblyn, I.: Scientific discovery and the cost of measurement \u2013 balancing information and cost in reinforcement learning. In: ICML 2nd Annual AAAI Workshop on AI to Accelerate Science and Engineering (AI2ASE) (Feb 13 2023)","DOI":"10.21428\/594757db.8e09102d"},{"key":"18_CR7","unstructured":"Brockman, G., et al.: Openai gym. arXiv preprint arXiv:1606.01540 (2016)"},{"key":"18_CR8","unstructured":"Daniel, K.: Thinking fast and slow. United States of America (2011)"},{"key":"18_CR9","unstructured":"Fu, Y.: The Cost of OPS in Reinforcement Learning. Master\u2019s thesis, University of California, Berkeley (2021)"},{"key":"18_CR10","unstructured":"Gal, Y., McAllister, R., Rasmussen, C.E.: Improving pilco with bayesian neural network dynamics models. In: Data-Efficient Machine Learning workshop, ICML. vol.\u00a04, p.\u00a034 (2016)"},{"key":"18_CR11","unstructured":"Koseoglu, M., \u00d6zcelikkale, A.: How to miss data?: Reinforcement learning for environments with high observation cost. In: 2020 International Conference on Machine Learning (ICML) Workshop, Wien, \u00d6sterrike, 12-18 juli (2020)"},{"key":"18_CR12","doi-asserted-by":"crossref","unstructured":"Lakshminarayanan, A., Sharma, S., Ravindran, B.: Dynamic action repetition for deep reinforcement learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a031 (2017)","DOI":"10.1609\/aaai.v31i1.10918"},{"issue":"9","key":"18_CR13","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1038\/s42256-020-0226-x","volume":"2","author":"K Mills","year":"2020","unstructured":"Mills, K., Ronagh, P., Tamblyn, I.: Finding the ground state of spin Hamiltonians with reinforcement learning. Nature Mach. Intell. 2(9), 509\u2013517 (2020). https:\/\/doi.org\/10.1038\/s42256-020-0226-x","journal-title":"Nature Mach. Intell."},{"key":"18_CR14","doi-asserted-by":"crossref","unstructured":"Mnih, V., et\u00a0al.: Human-level control through deep reinforcement learning. Nature 518(7540), 529\u2013533 (2015)","DOI":"10.1038\/nature14236"},{"key":"18_CR15","first-page":"15650","volume":"34","author":"HA Nam","year":"2021","unstructured":"Nam, H.A., Fleming, S., Brunskill, E.: Reinforcement learning with state observation costs in action-contingent noiselessly observable markov decision processes. Adv. Neural. Inf. Process. Syst. 34, 15650\u201315666 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"8","key":"18_CR16","doi-asserted-by":"publisher","first-page":"1053","DOI":"10.1177\/0278364910369861","volume":"29","author":"SC Ong","year":"2010","unstructured":"Ong, S.C., Png, S.W., Hsu, D., Lee, W.S.: Planning under uncertainty for robotic tasks with mixed observability. Int. J. Robot. Res. 29(8), 1053\u20131068 (2010)","journal-title":"Int. J. Robot. Res."},{"issue":"3","key":"18_CR17","doi-asserted-by":"publisher","first-page":"441","DOI":"10.1287\/moor.12.3.441","volume":"12","author":"CH Papadimitriou","year":"1987","unstructured":"Papadimitriou, C.H., Tsitsiklis, J.N.: The complexity of Markov decision processes. Math. Oper. Res. 12(3), 441\u2013450 (1987)","journal-title":"Math. Oper. Res."},{"key":"18_CR18","doi-asserted-by":"crossref","unstructured":"Pathak, D., Agrawal, P., Efros, A.A., Darrell, T.: Curiosity-driven exploration by self-supervised prediction. In: International Conference on Machine Learning, pp. 2778\u20132787. PMLR (2017)","DOI":"10.1109\/CVPRW.2017.70"},{"key":"18_CR19","unstructured":"Schaul, T., Quan, J., Antonoglou, I., Silver, D.: Prioritized experience replay. arXiv preprint arXiv:1511.05952 (2015)"},{"key":"18_CR20","doi-asserted-by":"publisher","unstructured":"Shann, T.Y.A.: Reinforcement learning in the presence of sensing costs. Master\u2019s thesis, University of British Columbia (2022). https:\/\/doi.org\/10.14288\/1.0413129, https:\/\/open.library.ubc.ca\/collections\/ubctheses\/24\/items\/1.0413129","DOI":"10.14288\/1.0413129"},{"key":"18_CR21","unstructured":"Sharma, S., Srinivas, A., Ravindran, B.: Learning to repeat: Fine grained action repetition for deep reinforcement learning. arXiv preprint arXiv:1702.06054 (2017)"},{"key":"18_CR22","doi-asserted-by":"crossref","unstructured":"Simon, H.A.: Bounded rationality. Utility and probability, pp. 15\u201318 (1990)","DOI":"10.1007\/978-1-349-20568-4_5"},{"key":"18_CR23","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement learning: An introduction. MIT press (2018)"},{"issue":"1\u20132","key":"18_CR24","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1016\/S0004-3702(99)00052-1","volume":"112","author":"RS Sutton","year":"1999","unstructured":"Sutton, R.S., Precup, D., Singh, S.: Between mdps and semi-mdps: a framework for temporal abstraction in reinforcement learning. Artif. Intell. 112(1\u20132), 181\u2013211 (1999)","journal-title":"Artif. Intell."},{"key":"18_CR25","doi-asserted-by":"crossref","unstructured":"Van\u00a0Hasselt, H., Guez, A., Silver, D.: Deep reinforcement learning with double q-learning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a030 (2016)","DOI":"10.1609\/aaai.v30i1.10295"}],"container-title":["Communications in Computer and Information Science","Machine Learning and Principles and Practice of Knowledge Discovery in Databases"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-74640-6_18","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,31]],"date-time":"2025-01-31T08:18:16Z","timestamp":1738311496000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-74640-6_18"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031746390","9783031746406"],"references-count":25,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-74640-6_18","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"1 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECML PKDD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Turin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2023","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 September 2023","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22 September 2023","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ecml2023","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2023.ecmlpkdd.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}