{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T05:18:37Z","timestamp":1769577517440,"version":"3.49.0"},"reference-count":36,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2012,7,12]],"date-time":"2012-07-12T00:00:00Z","timestamp":1342051200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Theory Biosci."],"published-print":{"date-parts":[[2012,9]]},"DOI":"10.1007\/s12064-011-0142-z","type":"journal-article","created":{"date-parts":[[2012,7,11]],"date-time":"2012-07-11T17:27:46Z","timestamp":1342027666000},"page":"139-148","source":"Crossref","is-referenced-by-count":94,"title":["An information-theoretic approach to curiosity-driven reinforcement learning"],"prefix":"10.1007","volume":"131","author":[{"given":"Susanne","family":"Still","sequence":"first","affiliation":[]},{"given":"Doina","family":"Precup","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2012,7,12]]},"reference":[{"key":"142_CR1","doi-asserted-by":"crossref","first-page":"329","DOI":"10.1140\/epjb\/e2008-00175-0","volume":"63","author":"N Ay","year":"2008","unstructured":"Ay N, Bertschinger N, Der R, Guttler F, Olbrich E (2008) Predictive information and explorative behavior of autonomous robots. Eur Phys J B 63:329\u2013339","journal-title":"European Physical Journal B"},{"key":"142_CR2","first-page":"1","volume":"2027","author":"MG Azar","year":"2010","unstructured":"Azar MG, Kappen HJ (2010) Dynamic policy programming. J Mach Learn Res arXiv:1004.2027:1\u201326","journal-title":"Journal for Machine Learning Research. arXiv:1004"},{"key":"142_CR3","unstructured":"Bagnell JA, Schneider J (2003) Covariant policy search. In: International Joint Conference on Artificial Intelligence (IJCAI), Acapulco, Mexico"},{"key":"142_CR4","doi-asserted-by":"crossref","first-page":"2409","DOI":"10.1162\/089976601753195969","volume":"13","author":"W Bialek","year":"2001","unstructured":"Bialek W, Nemenman I, Tishby N (2001) Predictability, complexity and learning. Neural Comput 13:2409\u20132463","journal-title":"Neural Comput"},{"key":"142_CR5","unstructured":"Brafman RI, Tennenholtz M (2002) R-max\u2014a general polynomial time algorithm for near-optimal reinforcement learning. J Mach Learn Res 3:213\u2013231"},{"key":"142_CR6","first-page":"165","volume":"6","author":"G Chechnik","year":"2005","unstructured":"Chechnik G, Globerson A, Tishby N, Weiss Y (2005) Information bottleneck for gaussian variables. J Mach Learn Res 6:165\u2013188","journal-title":"Journal of Machine Learning Research"},{"key":"142_CR7","doi-asserted-by":"crossref","unstructured":"Chigirev DV, Bialek W (2004) Optimal manifold representation of data: an information theoretic perspective. In: Thrun S, Saul L, Sch\u00f6lkopf B (eds) Advances in neural information processing systems 16. MIT Press, Cambridge, MA","DOI":"10.1162\/0899766042321751"},{"issue":"2","key":"142_CR8","doi-asserted-by":"crossref","first-page":"251","DOI":"10.1142\/S021952590100019X","volume":"4","author":"JP Crutchfield","year":"2001","unstructured":"Crutchfield JP, Feldman DP (2001) Synchronizing to the environment: information theoretic limits on agent learning. Adv Complex Syst 4(2):251\u2013264","journal-title":"Adv in Complex Systems"},{"issue":"1","key":"142_CR9","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1063\/1.1530990","volume":"13","author":"JP Crutchfield","year":"2003","unstructured":"Crutchfield JP, Feldman DP (2003) Regularities unseen, randomness observed: levels of entropy convergence. Chaos 13(1):25\u201354","journal-title":"Chaos"},{"issue":"4","key":"142_CR10","doi-asserted-by":"crossref","first-page":"620","DOI":"10.1103\/PhysRev.106.620","volume":"106","author":"ET Jaynes","year":"1957","unstructured":"Jaynes ET (1957) Information theory and statistical mechanics. Phys Rev 106(4):620\u2013630","journal-title":"Phys Rev"},{"key":"142_CR11","unstructured":"Kearns M, Singh S (Eds) (1998) Near-optimal reinforcement learning in polynomial time. In: Proceedings of the 15th International Conference on Machine Learning, pp 260\u2013268"},{"key":"142_CR36","unstructured":"Little DY, Sommer FT (2011) Learning in embodied action-perception loops through exploration. arXiv:1112.1125v2"},{"issue":"2","key":"142_CR12","doi-asserted-by":"crossref","first-page":"265","DOI":"10.1109\/TEVC.2006.890271","volume":"11","author":"P-Y Oudeyer","year":"2007","unstructured":"Oudeyer P-Y, Kaplan F, Hafner V (2007) Intrinsic motivation systems for autonomous mental development. IEEE Trans Evol Comput 11(2):265\u2013286","journal-title":"IEEE Transactions on Evolutionary Computation"},{"key":"142_CR13","doi-asserted-by":"crossref","unstructured":"Pereira F, Tishby N, Lee L (1993) Distributional clustering of english words. In 30th Annual Meeting of the Association for Computational Linguistics, Association for Computational Linguistics, pp 183\u2013190. http:\/\/xxx.lanl.gov\/pdf\/cmp-lg\/9408011","DOI":"10.3115\/981574.981598"},{"key":"142_CR14","doi-asserted-by":"crossref","unstructured":"Peters J, Muelling K, Altun Y (2010) Relative entropy policy search. In: Proceedings of the Twenty-Fourth National Conference on Artificial Intelligence (AAAI). AAAI Press, Menlo Park","DOI":"10.1609\/aaai.v24i1.7727"},{"issue":"4","key":"142_CR15","doi-asserted-by":"crossref","first-page":"682","DOI":"10.1016\/j.neunet.2008.02.003","volume":"21","author":"J Peters","year":"2008","unstructured":"Peters J, Schaal S (2008) Reinforcement learning of motor skills with policy gradients. Neural Netw 21(4):682\u2013697","journal-title":"Neural Networks"},{"key":"142_CR16","doi-asserted-by":"crossref","unstructured":"Ratitch B, Precup D (2003) Using MDP characteristics to guide exploration in reinforcement learning. In: Proceedings of ECML, pp 313\u2013324","DOI":"10.1007\/978-3-540-39857-8_29"},{"issue":"11","key":"142_CR17","doi-asserted-by":"crossref","first-page":"2210","DOI":"10.1109\/5.726788","volume":"86","author":"K Rose","year":"1998","unstructured":"Rose K (1998) Deterministic annealing for clustering, compression, classification, regression, and related optimization problems. Proc IEEE 86(11):2210\u20132239","journal-title":"Proc. IEEE"},{"issue":"8","key":"142_CR18","doi-asserted-by":"crossref","first-page":"945","DOI":"10.1103\/PhysRevLett.65.945","volume":"65","author":"K Rose","year":"1990","unstructured":"Rose K, Gurewitz E, Fox GC (1990) Statistical mechanics and phase transitions in clustering. Phys Rev Lett 65(8):945\u2013948","journal-title":"Phys. Rev. Lett"},{"key":"142_CR19","doi-asserted-by":"crossref","unstructured":"Schmidhuber J (1991) Curious model-building control systems. In Proceedings of IJCNN, pp 1458\u20131463","DOI":"10.1109\/IJCNN.1991.170605"},{"key":"142_CR20","unstructured":"Schmidhuber J (2009) Art and science as by-products of the search for novel patterns, or data compressible in unknown yet learnable ways. In: Multiple ways to design research. Research cases that reshape the design discipline. Swiss Design Network\u2014et\u00a0al. Edizioni, 2009, pp 98\u2013112"},{"key":"142_CR21","doi-asserted-by":"crossref","unstructured":"Shannon CE (1948) A mathematical theory of communication. Bell Syst Tech J 27:379\u2013423, 623\u2013656","DOI":"10.1002\/j.1538-7305.1948.tb01338.x"},{"key":"142_CR22","volume-title":"The dripping faucet as a model chaotic system","author":"R Shaw","year":"1984","unstructured":"Shaw R (1984) The dripping faucet as a model chaotic system. Aerial Press, Santa Cruz, California"},{"key":"142_CR23","doi-asserted-by":"crossref","unstructured":"Singh S, Barto AG, Chentanez N (2005) Intrinsically motivated reinforcement learning. In Proceedings of NIPS, pp 1281\u20131288","DOI":"10.21236\/ADA440280"},{"key":"142_CR24","doi-asserted-by":"crossref","unstructured":"Still S (2009) Information-theoretic approach to interactive learning. EPL 85 28005. doi: 10.1209\/0295-5075\/85\/28005","DOI":"10.1209\/0295-5075\/85\/28005"},{"issue":"12","key":"142_CR25","doi-asserted-by":"crossref","first-page":"2483","DOI":"10.1162\/0899766042321751","volume":"16","author":"S Still","year":"2004","unstructured":"Still S, Bialek W (2004) How many clusters? An information theoretic perspective. Neural Computation 16(12):2483\u20132506","journal-title":"Neural Computation"},{"key":"142_CR26","doi-asserted-by":"crossref","unstructured":"Still S, Bialek W, Bottou L (2004) Geometric clustering using the information bottleneck method. In: Thrun S, Saul LK, Sch\u00f6lkopf B (eds) Advances in neural information processing systems 16. MIT Press, Cambridge, MA","DOI":"10.1162\/0899766042321751"},{"key":"142_CR27","unstructured":"Strehl AL, Li L, Littman ML (2006) Incremental model-based learners with formal learning-time guarantees. In: Proceedings of the 22nd Conference on Uncertainty in Artificial Intelligence, Cambridge, MA"},{"key":"142_CR28","doi-asserted-by":"crossref","unstructured":"Sutton RS, Barto AG (1998) Reinforcement learning: an introduction. MIT Press, Cambridge","DOI":"10.1109\/TNN.1998.712192"},{"issue":"1","key":"142_CR29","first-page":"1633","volume":"10","author":"ME Taylor","year":"2009","unstructured":"Taylor ME, Stone P (2009) Transfer learning for reinforcement learning domains: a survey. J Mach Learn Res 10(1):1633\u20131685","journal-title":"Journal of Machine Learning Research"},{"key":"142_CR30","unstructured":"Thrun S, Moeller K (1992) Active exploration in dynamic environments. In: Advances in Neural Information Processing Systems (NIPS) 4, San Mateo, CA, pp 531\u2013538"},{"key":"142_CR31","unstructured":"Tishby N, Pereira F, Bialek W (1999) The information bottleneck method. In: Proceedings of the 37th Annual Allerton Conference, pp 363\u2013377"},{"key":"142_CR32","doi-asserted-by":"crossref","unstructured":"Tishby N, Polani D (2010) Information theory of decisions and actions. In: Perception-reason-action cycle: models, algorithms and systems. Springer, New York","DOI":"10.1007\/978-1-4419-1452-1_19"},{"issue":"28","key":"142_CR33","doi-asserted-by":"crossref","first-page":"11478","DOI":"10.1073\/pnas.0710743106","volume":"106","author":"E Todorov","year":"2009","unstructured":"Todorov E (2009) Efficient computation of optimal actions. Proc Nat Acad Sci USA 106(28):11478\u201311483","journal-title":"PNAS"},{"key":"142_CR34","unstructured":"Watkins CJCH (1989) Learning from delayed rewards. PhD thesis, Cambridge University"},{"key":"142_CR35","doi-asserted-by":"crossref","unstructured":"Wingate D, Singh S (2007) On discovery and learning of models with predictive representations of state for agents with continuous actions and observations. In Proceedings of International Conference on Autonomous Agents and Multiagent Systems (AAMAS), pp 1128\u20131135","DOI":"10.1145\/1329125.1329352"}],"container-title":["Theory in Biosciences"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12064-011-0142-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s12064-011-0142-z\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s12064-011-0142-z","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,6,24]],"date-time":"2023-06-24T06:54:38Z","timestamp":1687589678000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s12064-011-0142-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2012,7,12]]},"references-count":36,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2012,9]]}},"alternative-id":["142"],"URL":"https:\/\/doi.org\/10.1007\/s12064-011-0142-z","relation":{},"ISSN":["1431-7613","1611-7530"],"issn-type":[{"value":"1431-7613","type":"print"},{"value":"1611-7530","type":"electronic"}],"subject":[],"published":{"date-parts":[[2012,7,12]]}}}