{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:30:18Z","timestamp":1775579418093,"version":"3.50.1"},"reference-count":51,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T00:00:00Z","timestamp":1622332800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,5,30]]},"DOI":"10.1109\/icra48506.2021.9560764","type":"proceedings-article","created":{"date-parts":[[2021,10,19]],"date-time":"2021-10-19T20:28:35Z","timestamp":1634675315000},"page":"4171-4178","source":"Crossref","is-referenced-by-count":27,"title":["Sample-efficient Reinforcement Learning in Robotic Table Tennis"],"prefix":"10.1109","author":[{"given":"Jonas","family":"Tebbe","sequence":"first","affiliation":[]},{"given":"Lukas","family":"Krauch","sequence":"additional","affiliation":[]},{"given":"Yapeng","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Andreas","family":"Zell","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-40994-3_42"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1177\/0278364912472380"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2018.03.012"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196536"},{"key":"ref31","article-title":"Hierarchical policy design for sample-efficient learning of robot table tennis through self-play","author":"mahjourian","year":"2018","journal-title":"CoRR"},{"key":"ref30","first-page":"1","article-title":"Model-free trajectory-based policy optimization with monotonic improvement","volume":"19","author":"akrour","year":"2018","journal-title":"Journal of Machine Learning Research"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TIM.2016.2555179"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ACC.2010.5530520"},{"key":"ref35","author":"asai","year":"2019"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ROBIO.2013.6739578"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/RCAR.2018.8621776"},{"key":"ref27","doi-asserted-by":"crossref","first-page":"1607","DOI":"10.1609\/aaai.v24i1.7727","article-title":"Relative entropy policy search","author":"peters","year":"2010","journal-title":"Proceedings of the Twenty-Fourth AAAI Conference on Artificial Intelligence"},{"key":"ref29","doi-asserted-by":"crossref","DOI":"10.1109\/IROS45743.2020.9341191","article-title":"Robotic table tennis with model-free reinforcement learning","author":"gao","year":"2020"},{"key":"ref2","article-title":"Agent57: Outperforming the atari human benchmark","author":"badia","year":"2020"},{"key":"ref1","article-title":"Rainbow: Combining improvements in deep reinforcement learning","author":"hessel","year":"2017"},{"key":"ref20","first-page":"2067","article-title":"Trial without error: Towards safe reinforcement learning via human intervention","author":"saunders","year":"2018","journal-title":"Proc of International Conference on Autonomous Agents and Multiagent Systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.049"},{"key":"ref21","first-page":"4299","article-title":"Deep reinforcement learning from human preferences","author":"christiano","year":"2017","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"ref23","article-title":"Leveraging demonstrations for deep reinforcement learning on robotics problems with sparse rewards","author":"vecerik","year":"2018"},{"key":"ref26","article-title":"Residual policy learning","author":"silver","year":"2018"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794127"},{"key":"ref50","first-page":"1889","article-title":"Trust region policy optimization","volume":"37","author":"schulman","year":"0"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1631\/jzus.C0910528"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1038\/nature14236"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1038\/nature16961"},{"key":"ref40","article-title":"Learning to play table tennis from scratch using muscular robots","author":"b\u00fcchler","year":"2020"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1177\/0278364913495721"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460528"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8793789"},{"key":"ref15","first-page":"262","article-title":"Sim-to-real robot learning from pixels with progressive nets","volume":"78","author":"rusu","year":"0"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917710318"},{"key":"ref18","first-page":"278","article-title":"Policy invariance under reward transformations: Theory and application to reward shaping","author":"ng","year":"1999","journal-title":"Proceedings of the Sixteenth International Conference on Machine Learning"},{"key":"ref19","first-page":"937","article-title":"A strategy-aware technique for learning behaviors from discrete human feedback","author":"loftin","year":"2014","journal-title":"Proceedings of the Twenty-Eighth AAAI Conference on Artificial Intelligence"},{"key":"ref4","article-title":"Openai gym","author":"brockman","year":"2016"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1126\/science.aar6404"},{"key":"ref6","article-title":"Continuous control with deep reinforcement learning","author":"lillicrap","year":"0","journal-title":"4th International Conference on Learning Representations ICLR 2016 San Juan Puerto Rico May 2-4 2016 Conference Track Proceedings"},{"key":"ref5","article-title":"Multi-goal reinforcement learning: Challenging robotics environments and request for research","author":"plappert","year":"2018"},{"key":"ref8","first-page":"1861","article-title":"Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor","volume":"80","author":"haarnoja","year":"0"},{"key":"ref7","first-page":"5048","article-title":"Hindsight experience replay","volume":"30","author":"andrychowicz","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref49","first-page":"1587","article-title":"Addressing function approximation error in actor-critic methods","volume":"80","author":"fujimoto","year":"0"},{"key":"ref9","article-title":"Deep reinforcement learning doesn&#x2019;t work yet","author":"irpan","year":"2018"},{"key":"ref46","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume":"48","author":"mnih","year":"0"},{"key":"ref45","article-title":"Stable baselines","author":"hill","year":"2018"},{"key":"ref48","article-title":"Proximal policy optimization algorithms","author":"schulman","year":"2017","journal-title":"CoRR"},{"key":"ref47","first-page":"5279","article-title":"Scalable trust-region method for deep reinforcement learning using kronecker-factored approximation","volume":"30","author":"wu","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-12939-2_3"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2011.6094892"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330701"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2011.5980578"}],"event":{"name":"2021 IEEE International Conference on Robotics and Automation (ICRA)","location":"Xi'an, China","start":{"date-parts":[[2021,5,30]]},"end":{"date-parts":[[2021,6,5]]}},"container-title":["2021 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9560720\/9560666\/09560764.pdf?arnumber=9560764","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,12]],"date-time":"2023-01-12T17:44:49Z","timestamp":1673545489000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9560764\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,5,30]]},"references-count":51,"URL":"https:\/\/doi.org\/10.1109\/icra48506.2021.9560764","relation":{},"subject":[],"published":{"date-parts":[[2021,5,30]]}}}