{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,27]],"date-time":"2026-02-27T16:01:30Z","timestamp":1772208090739,"version":"3.50.1"},"reference-count":46,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T00:00:00Z","timestamp":1658102400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,7,18]]},"DOI":"10.1109\/ijcnn55064.2022.9892973","type":"proceedings-article","created":{"date-parts":[[2022,9,30]],"date-time":"2022-09-30T19:56:04Z","timestamp":1664567764000},"page":"1-8","source":"Crossref","is-referenced-by-count":2,"title":["Prioritized Sampling with Intrinsic Motivation in Multi-Task Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Carlo","family":"D'Eramo","sequence":"first","affiliation":[{"name":"TU Darmstadt,Computer Science Department,Darmstadt,Germany"}]},{"given":"Georgia","family":"Chalvatzaki","sequence":"additional","affiliation":[{"name":"TU Darmstadt,Computer Science Department,Darmstadt,Germany"}]}],"member":"263","reference":[{"key":"ref39","author":"farahmand","year":"2011","journal-title":"Regularization in Reinforcement Learning"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1090\/S0002-9904-1954-09848-8"},{"key":"ref33","first-page":"4496","article-title":"Distral: Robust multitask reinforcement learning","author":"teh","year":"2017","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref32","article-title":"Actor-mimic: Deep mul-titask and transfer reinforcement learning","author":"parisotto","year":"2016","journal-title":"International Conference on Learning Representations"},{"key":"ref31","article-title":"Policy distillation","author":"rusu","year":"2016","journal-title":"International Conference on Learning Representations"},{"key":"ref30","article-title":"Distributed prioritized experience replay","author":"horgan","year":"2018","journal-title":"International Conference on Learning Representations"},{"key":"ref37","author":"puterman","year":"2014","journal-title":"Markov Decision Processes Discrete Stochastic Dynamic Programming"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/461"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33013796"},{"key":"ref34","article-title":"Decoding multitask dqn in the world of minecraft","author":"liu","year":"2016","journal-title":"The 13th European Workshop on Reinforcement Learning (EWRL) 2016"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-021-03819-2"},{"key":"ref40","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"International Conference on Learning Representations"},{"key":"ref11","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1023\/A:1007379606734","article-title":"Multitask learning","volume":"28","author":"caruana","year":"1997","journal-title":"Machine Learning"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2014.6907421"},{"key":"ref13","first-page":"4767","article-title":"Multi-task reinforcement learning with soft modularization","volume":"33","author":"yang","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref14","article-title":"Sharing knowledge in multi-task deep reinforcement learning","author":"d'eramo","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref15","first-page":"9767","article-title":"Multi-task reinforcement learning with context-based representations","volume":"139","author":"sodhani","year":"2021","journal-title":"Proceedings of the 38th International Conference on Machine Learning ser Proceedings of Machine Learning Research"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.1991.170605"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TEVC.2006.890271"},{"key":"ref18","article-title":"Learning to multi-task by active sampling","author":"sharma","year":"2018","journal-title":"International Conference on Learning Representations"},{"key":"ref19","article-title":"Cu-rious: Intrinsically motivated modular multi-goal reinforcement learning","author":"colas","year":"2019","journal-title":"International Conference on Machine Learning"},{"key":"ref28","article-title":"Reconciling A-returns with experience replay","volume":"32","author":"daley","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.1038\/nature14539","article-title":"Deep learning","author":"lecun","year":"2015","journal-title":"Nature"},{"key":"ref27","first-page":"1037","article-title":"Smart exploration in reinforcement learning using absolute temporal difference errors","author":"gehring","year":"2013","journal-title":"Proceedings of the 2013 International Conference on Autonomous Agents and Multi-agent Systems"},{"key":"ref3","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref6","article-title":"Trust region policy optimization","author":"schulman","year":"2015","journal-title":"International Conference on Machine Learning"},{"key":"ref29","article-title":"Prioritized experience replay","author":"schaul","year":"2016","journal-title":"International Conference on Learning Representations"},{"key":"ref5","author":"goodfellow","year":"2016","journal-title":"Deep Learning"},{"key":"ref8","article-title":"Mastering chess and shogi by self-play with a general reinforcement learning algorithm","author":"silver","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10295"},{"key":"ref2","doi-asserted-by":"crossref","DOI":"10.1038\/nature14236","article-title":"Human-level control through deep reinforcement learning","author":"mnih","year":"2015","journal-title":"Nature"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref1","author":"sutton","year":"2018","journal-title":"Reinforcement Learning An Introduction"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1023\/A:1013689704352"},{"key":"ref20","first-page":"1471","article-title":"Unifying count-based exploration and intrinsic motivation","author":"bellemare","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref45","first-page":"1107","article-title":"Least-squares policy iteration","volume":"4","author":"lagoudakis","year":"2003","journal-title":"Journal of Machine Learning Research"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref21","first-page":"222","article-title":"A possibility for implementing curiosity and boredom in model-building neural controllers","author":"schmidhuber","year":"1991","journal-title":"Proc of the international con-ference on simulation of adaptive behavior From animals to animats"},{"key":"ref42","first-page":"1","article-title":"Mush-roomrl: Simplifying reinforcement learning research","volume":"22","author":"d'eramo","year":"2021","journal-title":"J Ma-chine Learning Research"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2012.05.008"},{"key":"ref41","author":"belousov","year":"0","journal-title":"Simulators and real robot control interfaces for quanser platforms"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TCDS.2016.2538961"},{"key":"ref44","first-page":"503","article-title":"Tree-based batch mode rein-forcement learning","volume":"6","author":"ernst","year":"2005","journal-title":"Journal of Machine Learning Research"},{"key":"ref26","first-page":"761","article-title":"Horde: A scalable real-time architecture for learning knowledge from unsupervised sensorimotor interaction","author":"sutton","year":"2011","journal-title":"The 10th In-ternational Conference on Autonomous Agents and Multiagent Systems-Volume 2"},{"key":"ref43","article-title":"Openai gym","author":"brockman","year":"2016","journal-title":"ArXiv"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2016.7759584"}],"event":{"name":"2022 International Joint Conference on Neural Networks (IJCNN)","location":"Padua, Italy","start":{"date-parts":[[2022,7,18]]},"end":{"date-parts":[[2022,7,23]]}},"container-title":["2022 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9891857\/9889787\/09892973.pdf?arnumber=9892973","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,3]],"date-time":"2022-11-03T23:00:20Z","timestamp":1667516420000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9892973\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,7,18]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/ijcnn55064.2022.9892973","relation":{},"subject":[],"published":{"date-parts":[[2022,7,18]]}}}