{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,18]],"date-time":"2026-04-18T16:28:08Z","timestamp":1776529688793,"version":"3.51.2"},"reference-count":83,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["DGE-2125362"],"award-info":[{"award-number":["DGE-2125362"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11127713","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"6510-6518","source":"Crossref","is-referenced-by-count":3,"title":["SR-AIF: Solving Sparse-Reward Robotic Tasks From Pixels with Active Inference and World Models"],"prefix":"10.1109","author":[{"given":"Viet Dung","family":"Nguyen","sequence":"first","affiliation":[{"name":"Rochester Institute of Technology,Department of Computer Science,Rochester,NY,USA,14623"}]},{"given":"Zhizhuo","family":"Yang","sequence":"additional","affiliation":[{"name":"Rochester Institute of Technology,Department of Computer Science,Rochester,NY,USA,14623"}]},{"given":"Christopher L.","family":"Buckley","sequence":"additional","affiliation":[{"name":"University of Sussex,Department of Informatics,Brighton,UK,BN1 9RH"}]},{"given":"Alexander","family":"Ororbia","sequence":"additional","affiliation":[{"name":"Rochester Institute of Technology,Department of Computer Science,Rochester,NY,USA,14623"}]}],"member":"263","reference":[{"key":"ref1","volume-title":"Artificial Intelligence: A Modern Approach","author":"Russell","year":"2010"},{"issue":"7","key":"ref2","doi-asserted-by":"crossref","DOI":"10.3390\/s23073762","article-title":"A survey on deep reinforcement learning algorithms for robotic manipulation","volume":"23","author":"Han","year":"2023","journal-title":"Sensors"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/s11370-021-00398-z"},{"key":"ref4","first-page":"1856","article-title":"Soft actor-critic: Offpolicy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"Proceedings of the 35th International Conference on Machine Learning, ICML 2018","volume":"80","author":"Haarnoja","year":"2018"},{"key":"ref5","volume-title":"Soft actor-critic algorithms and applications","author":"Haarnoja","year":"2018"},{"key":"ref6","article-title":"Playing atari with deep reinforcement learning","volume":"abs\/1312.5602","author":"Mnih","year":"2013","journal-title":"CoRR"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/122344.122377"},{"key":"ref8","volume-title":"Pattern Recognition and Machine Learning (Information Science and Statistics)","author":"Bishop","year":"2006"},{"key":"ref9","article-title":"Imagination-augmented agents for deep reinforcement learning","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Racani\u00e8re","year":"2017"},{"key":"ref10","article-title":"Model based reinforcement learning for atari","volume-title":"International Conference on Learning Representations","author":"Kaiser","year":"2020"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.7551\/mitpress\/12441.001.0001"},{"issue":"9","key":"ref12","doi-asserted-by":"crossref","first-page":"1325","DOI":"10.1016\/j.neunet.2003.06.005","article-title":"Learning and inference in the brain","volume":"16","author":"Friston","year":"2003","journal-title":"Neural Networks"},{"key":"ref13","article-title":"Reinforcement learning through active inference","volume":"abs\/2002.12636","author":"Tschantz","year":"2020","journal-title":"CoRR"},{"key":"ref14","article-title":"Online reinforcement learning with sparse rewards through an active inference capsule","volume":"abs\/2106.02390","author":"Noel","year":"2021","journal-title":"ArXiv"},{"key":"ref15","article-title":"Mastering diverse domains through world models","volume":"abs\/2301.04104","author":"Hafner","year":"2023","journal-title":"ArXiv"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-64919-7_8"},{"key":"ref17","first-page":"11662","article-title":"Deep active inference agents using monte-carlo methods","volume":"33","author":"Fountas","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"102348","DOI":"10.1016\/j.jmp.2020.102348","article-title":"Deep active inference as variational policy gradients","volume":"96","author":"Millidge","year":"2020","journal-title":"Journal of Mathematical Psychology"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-93736-2_60"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.3389\/fncom.2020.574372"},{"key":"ref21","article-title":"Contrastive active inference","volume-title":"Advances in Neural Information Processing Systems","author":"Mazzaglia","year":"2021"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref23","article-title":"TD-MPC2: Scalable, robust world models for continuous control","volume-title":"The Twelfth International Conference on Learning Representations","author":"Hansen","year":"2024"},{"key":"ref24","article-title":"Learning and querying fast generative models for reinforcement learning","volume":"abs\/1802.03006","author":"Buesing","year":"2018","journal-title":"CoRR"},{"key":"ref25","article-title":"Embed to control: A locally linear latent dynamics model for control from raw images","volume-title":"Advances in Neural Information Processing Systems","volume":"28","author":"Watter","year":"2015"},{"key":"ref26","first-page":"8583","article-title":"Planning to explore via self-supervised world models","volume-title":"Proceedings of the 37th International Conference on Machine Learning, ser. Proceedings of Machine Learning Research","volume":"119","author":"Sekar","year":"2020"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9560734"},{"key":"ref28","first-page":"2555","article-title":"Learning latent dynamics for planning from pixels","volume-title":"Proceedings of the 36th International Conference on Machine Learning, ICML 2019","volume":"97","author":"Hafner","year":"2019"},{"key":"ref29","article-title":"Dream to control: Learning behaviors by latent imagination","volume-title":"International Conference on Learning Representations","author":"Hafner","year":"2020"},{"key":"ref30","article-title":"Action and perception as divergence minimization","volume":"abs\/2009.01791","author":"Hafner","year":"2020","journal-title":"ArXiv"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref32","article-title":"Model-based active exploration","volume-title":"International Conference on Machine Learning","author":"Shyam","year":"2018"},{"issue":"1","key":"ref33","doi-asserted-by":"crossref","first-page":"45","DOI":"10.1111\/nyas.15118","article-title":"Neural representation in active inference: Using generative models to interact with-and understand-the lived world","volume":"1534","author":"Pezzulo","year":"2024","journal-title":"Annals of the New York Academy of Sciences"},{"issue":"2","key":"ref34","doi-asserted-by":"crossref","DOI":"10.3390\/e24020301","article-title":"The free energy principle for perception and action: A deep learning perspective","volume":"24","author":"Mazzaglia","year":"2022","journal-title":"Entropy"},{"key":"ref35","article-title":"World models","volume":"abs\/1803.10122","author":"Ha","year":"2018","journal-title":"ArXiv"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1162\/NECO_a_00912"},{"key":"ref37","volume-title":"Reinforcement Learning: An Introduction","author":"Sutton","year":"2018"},{"key":"ref38","doi-asserted-by":"crossref","first-page":"127","DOI":"10.1038\/nrn2787","article-title":"The free-energy principle: a unified brain theory?","volume":"11","author":"Friston","year":"2010","journal-title":"Nature Reviews Neuroscience"},{"key":"ref39","doi-asserted-by":"crossref","first-page":"102632","DOI":"10.1016\/j.jmp.2021.102632","article-title":"A step-by-step tutorial on active inference and its application to empirical data","volume":"107","author":"Smith","year":"2022","journal-title":"Journal of Mathematical Psychology"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1080\/17588928.2015.1020053"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.3389\/fncom.2023.1099593"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1098\/rsif.2013.0475"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1162\/neco_a_01354"},{"key":"ref44","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","volume":"abs\/1412.3555","author":"Chung","year":"2014","journal-title":"ArXiv"},{"key":"ref45","article-title":"Mastering atari with discrete world models","volume-title":"International Conference on Learning Representations","author":"Hafner","year":"2021"},{"key":"ref46","article-title":"Deep variational bayes filters: Unsupervised learning of state space models from raw data","volume-title":"International Conference on Learning Representations","author":"Karl","year":"2017"},{"key":"ref47","first-page":"1280","article-title":"Probabilistic recurrent state-space models","volume-title":"Proceedings of the 35th International Conference on Machine Learning, ser. Proceedings of Machine Learning Research","volume":"80","author":"Doerr","year":"2018"},{"key":"ref48","article-title":"Auto-encoding variational bayes","volume-title":"2nd International Conference on Learning Representations, ICLR 2014","author":"Kingma","year":"2014"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref50","volume-title":"Deep Learning","author":"Goodfellow","year":"2016"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-018-38246-3"},{"issue":"40","key":"ref52","first-page":"1303","article-title":"Stochastic variational inference","volume":"14","author":"Hoffman","year":"2013","journal-title":"Journal of Machine Learning Research"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1162\/neco_a_01357"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1162\/neco_a_00999"},{"key":"ref55","first-page":"5639","article-title":"CURL: Contrastive unsupervised representations for reinforcement learning","volume-title":"Proceedings of the 37th International Conference on Machine Learning, ser. Proceedings of Machine Learning Research","volume":"119","author":"Laskin","year":"2020"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref57","article-title":"Representation learning with contrastive predictive coding","volume":"abs\/1807.03748","author":"van den Oord","year":"2018","journal-title":"CoRR"},{"key":"ref58","article-title":"Hindsight experience replay","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Andrychowicz","year":"2017"},{"key":"ref59","first-page":"21271","article-title":"Bootstrap your own latent - a new approach to self-supervised learning","volume-title":"Advances in Neural Information Processing Systems","volume":"33","author":"Grill","year":"2020"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1162\/NETN_a_00018"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1016\/j.neubiorev.2016.06.022"},{"key":"ref62","article-title":"Mastering the unsupervised reinforcement learning benchmark from pixels","volume-title":"40th International Conference on Machine Learning","author":"Rajeswar","year":"2023"},{"key":"ref63","doi-asserted-by":"crossref","first-page":"e41703","DOI":"10.7554\/eLife.41703","article-title":"Computational mechanisms of curiosity and goal-directed exploration","volume":"8","author":"Schwartenbeck","year":"2019","journal-title":"eLife"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1214\/aoms\/1177728069"},{"key":"ref65","volume-title":"Information Theory, Inference, and Learning Algorithms","author":"MacKay","year":"2003"},{"issue":"1","key":"ref66","doi-asserted-by":"crossref","first-page":"70","DOI":"10.1016\/j.jphysparis.2006.10.001","article-title":"A free energy principle for the brain","volume":"100","author":"Friston","year":"2006","journal-title":"Journal of Physiology-Paris"},{"key":"ref67","article-title":"Mortal computation: A foundation for biomimetic intelligence","author":"Ororbia","year":"2023","journal-title":"arXiv preprint"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2013.00907"},{"key":"ref69","doi-asserted-by":"crossref","DOI":"10.3389\/neuro.12.006.2007","article-title":"What is intrinsic motivation? a typology of computational approaches","volume":"1","author":"Oudeyer","year":"2007","journal-title":"Frontiers in Neurorobotics"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4899-2271-7"},{"key":"ref71","article-title":"High-dimensional continuous control using generalized advantage estimation","volume":"abs\/1506.02438","author":"Schulman","year":"2015","journal-title":"CoRR"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1162\/neco_a_01351"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2024.124315"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10160530"},{"key":"ref75","article-title":"Estimating or propagating gradients through stochastic neurons for conditional computation","volume":"abs\/1308.3432","author":"Bengio","year":"2013","journal-title":"ArXiv"},{"key":"ref76","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Proceedings of the Conference on Robot Learning, ser. Proceedings of Machine Learning Research","volume":"100","author":"Yu","year":"2020"},{"key":"ref77","article-title":"robosuite: A modular simulation framework and benchmark for robot learning","author":"Zhu","year":"2020","journal-title":"arXiv preprint"},{"key":"ref78","article-title":"Proximal policy optimization algorithms","volume":"abs\/1707.06347","author":"Schulman","year":"2017","journal-title":"CoRR"},{"key":"ref79","article-title":"On uncertainty in deep state space models for model-based reinforcement learning","volume-title":"Transactions on Machine Learning Research","author":"Becker","year":"2022"},{"key":"ref80","first-page":"6309","article-title":"Neural discrete representation learning","volume-title":"Proceedings of the 31st International Conference on Neural Information Processing Systems, ser. NIPS\u201917","author":"van den Oord"},{"key":"ref81","volume-title":"Isolating sources of disentanglement in variational autoencoders","author":"Chen","year":"2018"},{"key":"ref82","article-title":"Causally aligned curriculum learning","volume-title":"Submitted to The Twelfth International Conference on Learning Representations","year":"2023"},{"key":"ref83","article-title":"Attention is all you need","volume-title":"Advances in Neural Information Processing Systems","volume":"30","author":"Vaswani","year":"2017"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","location":"Atlanta, GA, USA","start":{"date-parts":[[2025,5,19]]},"end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11127713.pdf?arnumber=11127713","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:46:16Z","timestamp":1756881976000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11127713\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":83,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11127713","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}