{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,17]],"date-time":"2026-04-17T16:59:51Z","timestamp":1776445191627,"version":"3.51.2"},"reference-count":20,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,10,11]],"date-time":"2023-10-11T00:00:00Z","timestamp":1696982400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,10,11]],"date-time":"2023-10-11T00:00:00Z","timestamp":1696982400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,10,11]]},"DOI":"10.1109\/icstcc59206.2023.10308431","type":"proceedings-article","created":{"date-parts":[[2023,11,10]],"date-time":"2023-11-10T18:49:18Z","timestamp":1699642158000},"page":"428-433","source":"Crossref","is-referenced-by-count":3,"title":["Reproducibility in Deep Reinforcement Learning with Maximum Entropy"],"prefix":"10.1109","author":[{"given":"Tudor-Andrei","family":"Paleu","sequence":"first","affiliation":[{"name":"&#x201C;Gheorghe Asachi&#x201D; Technical University of Iasi,Department of Automatic Control and Applied Informatics,Iasi,Romania"}]},{"given":"Carlos","family":"Pascal","sequence":"additional","affiliation":[{"name":"&#x201C;Gheorghe Asachi&#x201D; Technical University of Iasi,Department of Automatic Control and Applied Informatics,Iasi,Romania"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593722"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1812.05905"},{"key":"ref3","article-title":"Continuous control with deep reinforcement learning","author":"Lillicrap","year":"2015","journal-title":"preprint arXiv:1509.02971"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CASE48305.2020.9216986"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11694"},{"key":"ref7","article-title":"Openai gym","author":"Brockman","year":"2016","journal-title":"arXiv preprint arXiv:1606.01540"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593894"},{"key":"ref9","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume-title":"International conference on machine learning","author":"Haarnoja"},{"key":"ref10","first-page":"2829","article-title":"Continuous deep q-learning with model-based acceleration","volume-title":"International conference on machine learning","author":"Gu"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2017.2743240"},{"key":"ref12","article-title":"Sqil: Imitation learning via reinforcement learning with sparse rewards","author":"Reddy","year":"2019","journal-title":"preprint arXiv:1905.11108"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2022.03.037"},{"key":"ref15","first-page":"1352","article-title":"Reinforcement learning with deep energy-based policies","volume-title":"International conference on machine learning","author":"Haarnoja"},{"key":"ref16","first-page":"182","article-title":"Relative entropy inverse reinforcement learning","volume-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics","author":"Boularias"},{"key":"ref17","article-title":"Maximum entropy RL (provably) solves some robust RL problems","author":"Eysenbach","year":"2021","journal-title":"arXiv preprint arXiv:2103.06257"},{"key":"ref18","article-title":"Artificial intelligence a modern approach","author":"Russell","year":"2020"},{"key":"ref19","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014","journal-title":"preprint arXiv:1412.6980"},{"key":"ref20","article-title":"Tensorflow: Large-scale machine learning on heterogeneous distributed systems","author":"Abadi","year":"2016","journal-title":"preprint arXiv:1603.04467"}],"event":{"name":"2023 27th International Conference on System Theory, Control and Computing (ICSTCC)","location":"Timisoara, Romania","start":{"date-parts":[[2023,10,11]]},"end":{"date-parts":[[2023,10,13]]}},"container-title":["2023 27th International Conference on System Theory, Control and Computing (ICSTCC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10305866\/10308427\/10308431.pdf?arnumber=10308431","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,2]],"date-time":"2024-03-02T13:25:49Z","timestamp":1709385949000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10308431\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,10,11]]},"references-count":20,"URL":"https:\/\/doi.org\/10.1109\/icstcc59206.2023.10308431","relation":{},"subject":[],"published":{"date-parts":[[2023,10,11]]}}}