{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T15:45:29Z","timestamp":1774367129957,"version":"3.50.1"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,30]]},"DOI":"10.1109\/icra48506.2021.9561138","type":"proceedings-article","created":{"date-parts":[[2021,10,20]],"date-time":"2021-10-20T00:28:35Z","timestamp":1634689715000},"page":"1810-1816","source":"Crossref","is-referenced-by-count":72,"title":["Regularizing Action Policies for Smooth Control with Reinforcement Learning"],"prefix":"10.1109","author":[{"given":"Siddharth","family":"Mysore","sequence":"first","affiliation":[{"name":"Boston University,Department of Computer Science,Boston,MA,02215"}]},{"given":"Bassel","family":"Mabsout","sequence":"additional","affiliation":[{"name":"Boston University,Department of Computer Science,Boston,MA,02215"}]},{"given":"Renato","family":"Mancuso","sequence":"additional","affiliation":[{"name":"Boston University,Department of Computer Science,Boston,MA,02215"}]},{"given":"Kate","family":"Saenko","sequence":"additional","affiliation":[{"name":"Boston University,Department of Computer Science,Boston,MA,02215"}]}],"member":"263","reference":[{"key":"ref33","article-title":"Spinning Up in Deep Reinforcement Learning","author":"achiam","year":"2018"},{"key":"ref32","article-title":"Stable baselines","author":"hill","year":"2018"},{"key":"ref31","article-title":"Betaflight","year":"0"},{"key":"ref30","article-title":"Openai gym","author":"brockman","year":"2016","journal-title":"CoRR"},{"key":"ref37","article-title":"Digital Signal Processing","author":"proakis","year":"1996","journal-title":"Principles Algorithms and Applications"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1115\/1.2899060"},{"key":"ref35","article-title":"Reinforcement learning for UAV attitude control","author":"koch","year":"2018","journal-title":"ACM Transactions on Cyber-Physical Systems"},{"key":"ref34","article-title":"Openai baselines","author":"dhariwal","year":"2017"},{"key":"ref10","article-title":"Neuroflight: Next generation flight control firmware","author":"koch","year":"2019","journal-title":"CoRR"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2017.2720851"},{"key":"ref12","article-title":"A closer look at deep policy gradients","author":"ilyas","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref13","article-title":"A study on overfitting in deep reinforcement learning","author":"zhang","year":"2018","journal-title":"CoRR"},{"key":"ref14","article-title":"A dissection of overfitting and generalization in continuous reinforcement learning","author":"zhang","year":"2018","journal-title":"CoRR"},{"key":"ref15","article-title":"Implementation matters in deep rl: A case study on ppo and trpo","author":"engstrom","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref16","article-title":"Improving reproducibility in machine learning research (a report from the neurips 2019 reproducibility program)","author":"pineau","year":"2020"},{"key":"ref17","article-title":"Regularization matters in policy optimization","author":"liu","year":"2019"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2014.X.019"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/s10514-015-9459-7"},{"key":"ref28","article-title":"Spectral normalization for generative adversarial networks","author":"miyato","year":"2018"},{"key":"ref4","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"2016","journal-title":"International Conference on Learning Representations"},{"key":"ref27","article-title":"Lipschitz regularity of deep neural networks: analysis and efficient estimation","author":"scaman","year":"2018"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref6","article-title":"Sim2Real view invariant visual servoing by recurrent control","author":"sadeghi","year":"2017","journal-title":"CoRR"},{"key":"ref29","first-page":"854","article-title":"Parseval networks: Improving robustness to adversarial examples","author":"cisse","year":"2017","journal-title":"International Conference on Machine Learning"},{"key":"ref5","article-title":"Benchmarking reinforcement learning algorithms on real-world robots","author":"mahmood","year":"0"},{"key":"ref8","first-page":"1329","article-title":"Benchmarking deep reinforcement learning for continuous control","author":"duan","year":"2016","journal-title":"Proceedings of the 33rd International Conference on International Conference on Machine Learning - Volume 48 ser ICML&#x2019;16"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8967695"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2020.XVI.064"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1177\/0278364919887447"},{"key":"ref9","article-title":"Flight controller synthesis via deep reinforcement learning","author":"koch","year":"2019"},{"key":"ref20","article-title":"Deep reinforcement learning with smooth policy","author":"shen","year":"2020"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/BF00992698"},{"key":"ref21","first-page":"1057","article-title":"Policy gradient methods for reinforcement learning with function approximation","author":"sutton","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref24","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","author":"haarnoja","year":"2018","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref23","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","author":"fujimoto","year":"2018","journal-title":"International Conference on Machine Learning"},{"key":"ref26","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"CoRR"},{"key":"ref25","first-page":"1889","article-title":"Trust region policy optimization","volume":"37","author":"schulman","year":"2015","journal-title":"Proceedings of the 32nd International Conference on Machine Learning ser Proceedings of Machine Learning Research"}],"event":{"name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","location":"Xi'an, China","start":{"date-parts":[[2021,5,30]]},"end":{"date-parts":[[2021,6,5]]}},"container-title":["2021 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9560720\/9560666\/09561138.pdf?arnumber=9561138","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,2]],"date-time":"2022-08-02T23:22:54Z","timestamp":1659482574000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9561138\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,30]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/icra48506.2021.9561138","relation":{},"subject":[],"published":{"date-parts":[[2021,5,30]]}}}