{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T06:56:41Z","timestamp":1764053801166,"version":"3.28.0"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,6,8]],"date-time":"2022-06-08T00:00:00Z","timestamp":1654646400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,8]],"date-time":"2022-06-08T00:00:00Z","timestamp":1654646400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,6,8]]},"DOI":"10.23919\/acc53348.2022.9867395","type":"proceedings-article","created":{"date-parts":[[2022,9,5]],"date-time":"2022-09-05T16:24:10Z","timestamp":1662395050000},"page":"2611-2616","source":"Crossref","is-referenced-by-count":8,"title":["Soft Actor-Critic With Integer Actions"],"prefix":"10.23919","author":[{"given":"Ting-Han","family":"Fan","sequence":"first","affiliation":[{"name":"Princeton University,Department of Electrical and Computer Engineering,Princeton,NJ,08544"}]},{"given":"Yubo","family":"Wang","sequence":"additional","affiliation":[{"name":"Siemens Technology,Princeton,NJ,08540"}]}],"member":"263","reference":[{"article-title":"Smooth exploration for robotic reinforcement learning","year":"2021","author":"raffin","key":"ref38"},{"article-title":"Soft actor-critic for discrete action settings","year":"2019","author":"christodoulou","key":"ref33"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2009.5178745"},{"article-title":"Discrete and continuous action representation for practical rl in video games","year":"2019","author":"delalleau","key":"ref31"},{"article-title":"Boosting trust region policy optimization by normalizing flows policy","year":"2018","author":"tang","key":"ref30"},{"article-title":"Behavior-guided actor-critic: Improving exploration via learning policy behavior representation for deep reinforcement learning","year":"2021","author":"fayad","key":"ref37"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.010"},{"article-title":"Openai gym","year":"2016","author":"brockman","key":"ref35"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00800"},{"key":"ref10","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"0"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/84537.84552"},{"key":"ref12","first-page":"1889","article-title":"Trust region policy optimization","author":"schulman","year":"0"},{"article-title":"Proximal policy optimization algorithms","year":"2017","author":"schulman","key":"ref13"},{"key":"ref14","article-title":"High-dimensional continuous control using generalized advantage estimation","author":"schulman","year":"2016","journal-title":"4th International Conference on Learning Representations ICLR 2016 San Juan Puerto Rico May 2 - 4 2016 Conference Track Proceedings"},{"key":"ref15","article-title":"Variance reduction for policy gradient with action-dependent factorized baselines","author":"wu","year":"2018","journal-title":"International Conference on Learning Representations"},{"key":"ref16","article-title":"Categorical reparameterization with gumbel-softmax","author":"jang","year":"2017","journal-title":"5th International Conference on Learning Representations ICLR 2017 Toulon France April 24 - 26 2017 Conference Track Proceedings"},{"key":"ref17","article-title":"The concrete distribution: A continuous relaxation of discrete random variables","author":"maddison","year":"2017","journal-title":"5th International Conference on Learning Representations ICLR 2017 Toulon France April 24 - 26 2017 Conference Track Proceedings"},{"key":"ref18","article-title":"A* sampling","volume":"27","author":"maddison","year":"2014","journal-title":"Advances in neural information processing systems"},{"article-title":"Estimating or propagating gradients through stochastic neurons for conditional computation","year":"2013","author":"bengio","key":"ref19"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11798"},{"article-title":"An environment for autonomous driving decision-making","year":"2018","author":"leurent","key":"ref4"},{"article-title":"Discrete sequential prediction of continuous actions for deep rl","year":"2017","author":"metz","key":"ref27"},{"key":"ref3","first-page":"1334","article-title":"End-to-end training of deep visuomotor policies","volume":"17","author":"levine","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1287\/mnsc.26.3.242"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.6059"},{"article-title":"Learning to run a power network challenge: a retrospective analysis","year":"2021","author":"marot","key":"ref5"},{"key":"ref8","first-page":"387","article-title":"Deterministic policy gradient algorithms","author":"silver","year":"2014","journal-title":"International Conference on Machine Learning"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"73","DOI":"10.1016\/j.engappai.2004.08.018","article-title":"Application of reinforcement learning for agent-based production scheduling","volume":"18","author":"wang","year":"2005","journal-title":"Engineering Applications of Artificial Intelligence"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref9","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning"},{"article-title":"Playing atari with deep reinforcement learning","year":"2013","author":"mnih","key":"ref1"},{"key":"ref20","article-title":"Hierarchical multiscale recurrent neural networks","author":"chung","year":"2017","journal-title":"5th International Conference on Learning Representations ICLR 2017 Toulon France April 24 - 26 2017 Conference Track Proceedings"},{"article-title":"Powergym: A reinforcement learning environment for volt-var control in power distribution systems","year":"2021","author":"fan","key":"ref22"},{"article-title":"Pybullet, a python module for physics simulation for games, robotics and machine learning","year":"2016","author":"coumans","key":"ref21"},{"key":"ref24","doi-asserted-by":"crossref","first-page":"229","DOI":"10.1007\/BF00992696","article-title":"Simple statistical gradient-following algorithms for connectionist reinforcement learning","volume":"8","author":"williams","year":"1992","journal-title":"Machine Learning"},{"key":"ref23","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref26","article-title":"Neural networks for machine learning","author":"hinton","year":"2012","journal-title":"Coursera video lectures"},{"key":"ref25","article-title":"Rao-blackwellizing the straight-through gumbel-softmax gradient estimator","author":"paulus","year":"2021","journal-title":"International Conference on Learning Representations"}],"event":{"name":"2022 American Control Conference (ACC)","start":{"date-parts":[[2022,6,8]]},"location":"Atlanta, GA, USA","end":{"date-parts":[[2022,6,10]]}},"container-title":["2022 American Control Conference (ACC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9866948\/9867142\/09867395.pdf?arnumber=9867395","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,3]],"date-time":"2022-10-03T16:37:36Z","timestamp":1664815056000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9867395\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,8]]},"references-count":38,"URL":"https:\/\/doi.org\/10.23919\/acc53348.2022.9867395","relation":{},"subject":[],"published":{"date-parts":[[2022,6,8]]}}}