{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,7]],"date-time":"2026-04-07T16:30:22Z","timestamp":1775579422407,"version":"3.50.1"},"reference-count":38,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2022,10,8]],"date-time":"2022-10-08T00:00:00Z","timestamp":1665187200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,10,8]],"date-time":"2022-10-08T00:00:00Z","timestamp":1665187200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2023,6]]},"DOI":"10.1007\/s10489-022-04131-w","type":"journal-article","created":{"date-parts":[[2022,10,8]],"date-time":"2022-10-08T06:03:50Z","timestamp":1665209030000},"page":"13309-13322","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":16,"title":["Optimal stroke learning with policy gradient approach for robotic table tennis"],"prefix":"10.1007","volume":"53","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9258-9304","authenticated-orcid":false,"given":"Yapeng","family":"Gao","sequence":"first","affiliation":[]},{"given":"Jonas","family":"Tebbe","sequence":"additional","affiliation":[]},{"given":"Andreas","family":"Zell","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,8]]},"reference":[{"issue":"1","key":"4131_CR1","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1023\/A:1022140919877","volume":"13","author":"AG Barto","year":"2003","unstructured":"Barto AG, Mahadevan S (2003) Recent advances in hierarchical reinforcement learning. Discrete Event Dynamic Syst 13(1):41\u201377. https:\/\/doi.org\/10.1023\/A:1022140919877","journal-title":"Discrete Event Dynamic Syst"},{"key":"4131_CR2","doi-asserted-by":"publisher","unstructured":"Kendall A, Hawke J, Janz D, Mazur P, Reda D, Allen J-M, Lam V-D, Bewley A, Shah A (2019) Learning to drive in a day. In: 2019 international conference on robotics and automation (ICRA), pp 8248\u20138254. https:\/\/doi.org\/10.1109\/ICRA.2019.8793742","DOI":"10.1109\/ICRA.2019.8793742"},{"key":"4131_CR3","doi-asserted-by":"publisher","unstructured":"Osi\u0144ski B, Jakubowski A, Zi\u0119cina P, Mi\u0142o\u015b P, Galias C, Homoceanu S, Michalewski H (2020) Simulation-based reinforcement learning for real-world autonomous driving. In: 2020 IEEE international conference on robotics and automation (ICRA), pp 6411\u20136418, https:\/\/doi.org\/10.1109\/ICRA40945.2020.9196730","DOI":"10.1109\/ICRA40945.2020.9196730"},{"issue":"7676","key":"4131_CR4","doi-asserted-by":"publisher","first-page":"354","DOI":"10.1038\/nature24270","volume":"550","author":"D Silver","year":"2017","unstructured":"Silver D, Schrittwieser J, Simonyan K, Antonoglou I, Huang A, Guez A, Hubert T, Baker L, Lai M, Bolton A et al (2017) Mastering the game of go without human knowledge. Nature 550 (7676):354\u2013359. https:\/\/doi.org\/10.1038\/nature24270","journal-title":"Nature"},{"key":"4131_CR5","unstructured":"Berner C, Brockman G, Chan B, Cheung V, Debiak P, Dennison C, Farhi D, Fischer Q, Hashme S, Hesse C et al (2019) Dota 2 with large scale deep reinforcement learning. arXiv:1912.06680"},{"key":"4131_CR6","doi-asserted-by":"publisher","unstructured":"Gu S, Holly E, Lillicrap T, Levine S (2017) Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE international conference on robotics and automation (ICRA). IEEE, pp 3389\u20133396. https:\/\/doi.org\/10.1109\/ICRA.2017.7989385","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"4131_CR7","unstructured":"Kalashnikov D, Irpan A, Pastor P, Ibarz J, Herzog A, Jang E, Quillen D, Holly E, Kalakrishnan M, Vanhoucke V et al (2018) Scalable deep reinforcement learning for visionbased robotic manipulation. In: 2018 Conference on robot learning. PMLR, vol 87, pp 651\u2013673. https:\/\/proceedings.mlr.press\/v87\/kalashnikov18a.html"},{"key":"4131_CR8","doi-asserted-by":"publisher","unstructured":"Koos S, Mouret J-B, Doncieux S (2010) Crossing the reality gap in evolutionary robotics by promoting transferable controllers. In: Proceedings of the 12th annual conference on genetic and evolutionary computation, pp 119\u2013126. https:\/\/doi.org\/10.1145\/1830483.1830505","DOI":"10.1145\/1830483.1830505"},{"key":"4131_CR9","doi-asserted-by":"publisher","unstructured":"Cutler M, How JP (2015) Efficient reinforcement learning for robots using informative simulated priors. In: 2015 IEEE international conference on robotics and automation (ICRA), pp 2605\u20132612. https:\/\/doi.org\/10.1109\/ICRA.2015.7139550","DOI":"10.1109\/ICRA.2015.7139550"},{"key":"4131_CR10","doi-asserted-by":"publisher","unstructured":"Gao W, Graesser L, Choromanski K, Song X, Lazic N, Sanketi P, Sindhwani V, Jaitly N (2020) Robotic table tennis with model-free reinforcement learning. In: 2020 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 5556\u20135563. https:\/\/doi.org\/10.1109\/IROS45743.2020.9341191","DOI":"10.1109\/IROS45743.2020.9341191"},{"key":"4131_CR11","unstructured":"Mahjourian R, Miikkulainen R, Lazic N, Levine S, Jaitly N (2018) Hierarchical policy design for sample-efficient learning of robot table tennis through self-play. arXiv:1811.12927"},{"key":"4131_CR12","doi-asserted-by":"publisher","unstructured":"Zhu Y, Zhao Y, Jin L, Wu J, Xiong R (2018) Towards high level skill learning: Learn to return table tennis ball using monte-carlo based policy gradient method. In: 2018 IEEE international conference on real-time computing and robotics (RCAR), pp 34\u201341. https:\/\/doi.org\/10.1109\/RCAR.2018.8621776","DOI":"10.1109\/RCAR.2018.8621776"},{"key":"4131_CR13","doi-asserted-by":"publisher","unstructured":"Hanna JP, Desai S, Karnan H, Warnell G, Stone P (2021) Grounded action transformation for sim-to-real reinforcement learning. Mach Learn:1\u201331. https:\/\/doi.org\/10.1007\/s10994-021-05982-z","DOI":"10.1007\/s10994-021-05982-z"},{"key":"4131_CR14","doi-asserted-by":"publisher","unstructured":"B\u00fcchler D., Guist S, Calandra R, Berenz V, Sch\u00f6lkopf B, Peters J (2022) Learning to play table tennis from scratch using muscular robots. In: 2022 IEEE Transactions on robotics. IEEE, pp 1\u201311. https:\/\/doi.org\/10.1109\/TRO.2022.3176207","DOI":"10.1109\/TRO.2022.3176207"},{"key":"4131_CR15","unstructured":"Schulman J, Levine S, Abbeel P, Jordan M, Moritz P (2015) Trust region policy optimization. In: Bach F, Blei D (eds) Proceedings of the 32nd international conference on machine learning. PMLR, vol 37, pp. 1889\u20131897. https:\/\/proceedings.mlr.press\/v37\/schulman15.html"},{"key":"4131_CR16","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv:707.06347"},{"key":"4131_CR17","unstructured":"Lillicrap TP, Hunt JJ, Pritzel A, Heess N, Erez T, Tassa Y, Silver D, Wierstra D (2015) Continuous control with deep reinforcement learning. arXiv:509.02971"},{"key":"4131_CR18","unstructured":"Fujimoto S, Hoof H, Meger D (2018) Addressing function approximation error in actor-critic methods. In: 2018 International conference on machine learning. PMLR, pp 1587\u20131596. https:\/\/proceedings.mlr.press\/v80\/fujimoto18a.html"},{"key":"4131_CR19","unstructured":"Haarnoja T, Zhou A, Abbeel P, Levine S (2018) Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: 2018 International conference on machine learning. PMLR, pp 1861\u20131870, https:\/\/proceedings.mlr.press\/v80\/haarnoja18b.html"},{"key":"4131_CR20","unstructured":"ASAI K, Nakayama M, YASE S (2019) The ping pong robot to return a ball precisely. https:\/\/www.omron.com\/global\/en\/technology\/omrontechnics\/vol51\/016.html. Accessed 2019"},{"key":"4131_CR21","doi-asserted-by":"publisher","first-page":"92","DOI":"10.1016\/j.neucom.2019.01.087","volume":"345","author":"F Li","year":"2019","unstructured":"Li F, Jiang Q, Zhang S, Wei M, Song R (2019) Robot skill acquisition in assembly process using deep reinforcement learning. Neurocomputing 345:92\u2013102. https:\/\/doi.org\/10.1016\/j.neucom.2019.01.087","journal-title":"Neurocomputing"},{"key":"4131_CR22","doi-asserted-by":"publisher","unstructured":"Abreu M, Reis LP, Lau N (2019) Learning to run faster in a humanoid robot soccer environment through reinforcement learning. In: Chalup S, Niemueller T, Suthakorn J, Williams M-A (eds) RoboCup 2019: robot world cup XXIII. Springer, pp 3\u201315. https:\/\/doi.org\/10.1007\/978-3-030-35699-6_1","DOI":"10.1007\/978-3-030-35699-6_1"},{"key":"4131_CR23","doi-asserted-by":"publisher","unstructured":"Gao Y, Tebbe J, Zell A (2021) Robust stroke recognition via vision and imu in robotic table tennis. In: Farka\u0161 I, Masulli P, Otte S, Wermter S (eds) Artificial neural networks and machine learning \u2013 ICANN 2021. Springer, pp 379\u2013390. https:\/\/doi.org\/10.1007\/978-3-030-86362-3_31","DOI":"10.1007\/978-3-030-86362-3_31"},{"key":"4131_CR24","unstructured":"Coumans E, Bai Y (2017) Pybullet, a python module for physics simulation in robotics games and machine learning"},{"key":"4131_CR25","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1016\/j.robot.2018.03.012","volume":"105","author":"O Ko\u00e7","year":"2018","unstructured":"Ko\u00e7 O, Maeda G, Peters J (2018) Online optimal trajectory generation for robot table tennis. Rob Auton Syst 105:121\u2013137. https:\/\/doi.org\/10.1016\/j.robot.2018.03.012","journal-title":"Rob Auton Syst"},{"key":"4131_CR26","doi-asserted-by":"publisher","unstructured":"Silva R, Melo FS, Veloso M (2015) Towards table tennis with a quadrotor autonomous learning robot and onboard vision. In: 2015 IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 649\u2013655. https:\/\/doi.org\/10.1109\/IROS.2015.7353441","DOI":"10.1109\/IROS.2015.7353441"},{"key":"4131_CR27","doi-asserted-by":"publisher","unstructured":"Blank P, Groh BH, Eskofier BM (2017) Ball speed and spin estimation in table tennis using a racket-mounted inertial sensor. In: Proceedings of the 2017 ACM international symposium on wearable computers. ISWC \u201917, Association for computing machinery pp 2\u20139. https:\/\/doi.org\/10.1145\/3123021.3123040","DOI":"10.1145\/3123021.3123040"},{"key":"4131_CR28","doi-asserted-by":"publisher","unstructured":"Tebbe J, Klamt L, Gao Y, Zell A (2020) Spin detection in robotic table tennis. In: 2020 IEEE international conference on robotics and automation (ICRA), pp 9694\u20139700. https:\/\/doi.org\/10.1109\/ICRA40945.2020.9196536","DOI":"10.1109\/ICRA40945.2020.9196536"},{"issue":"3","key":"4131_CR29","doi-asserted-by":"publisher","first-page":"385","DOI":"10.1007\/s10994-012-5322-7","volume":"90","author":"T Hester","year":"2013","unstructured":"Hester T, Stone P (2013) Texplore: real-time sample-efficient reinforcement learning for robots. Mach Learn 90(3):385\u2013429. https:\/\/doi.org\/10.1007\/s10994-012-5322-7","journal-title":"Mach Learn"},{"key":"4131_CR30","doi-asserted-by":"publisher","unstructured":"Gu S, Holly E, Lillicrap T, Levine S (2017) Deep reinforcement learning for robotic manipulation with asynchronous off-policy updates. In: 2017 IEEE international conference on robotics and automation (ICRA), pp 3389\u20133396. https:\/\/doi.org\/10.1109\/ICRA.2017.7989385","DOI":"10.1109\/ICRA.2017.7989385"},{"key":"4131_CR31","doi-asserted-by":"publisher","unstructured":"Huang Y, B\u00fcchler D, Ko\u00e7 O, Sch\u00f6lkopf B, Peters J (2016) Jointly learning trajectory generation and hitting point prediction in robot table tennis. In: 2016 IEEE-RAS 16th international conference on humanoid robots (Humanoids), pp 650\u2013655. https:\/\/doi.org\/10.1109\/HUMANOIDS.2016.7803343","DOI":"10.1109\/HUMANOIDS.2016.7803343"},{"key":"4131_CR32","doi-asserted-by":"publisher","first-page":"99816","DOI":"10.1109\/ACCESS.2021.3093340","volume":"9","author":"L Yang","year":"2021","unstructured":"Yang L, Zhang H, Zhu X, Sheng X (2021) Ball motion control in the table tennis robot system using time-series deep reinforcement learning. IEEE Access 9:99816\u201399827. https:\/\/doi.org\/10.1109\/ACCESS.2021.3093340","journal-title":"IEEE Access"},{"key":"4131_CR33","doi-asserted-by":"publisher","unstructured":"Tebbe J, Krauch L, Gao Y, Zell A (2021) Sample-efficient reinforcement learning in robotic table tennis. In: 2021 IEEE international conference on robotics and automation (ICRA), pp 4171\u20134178. https:\/\/doi.org\/10.1109\/ICRA48506.2021.9560764","DOI":"10.1109\/ICRA48506.2021.9560764"},{"key":"4131_CR34","doi-asserted-by":"publisher","first-page":"99816","DOI":"10.1109\/ACCESS.2021.3093340","volume":"9","author":"L Yang","year":"2021","unstructured":"Yang L, Zhang H, Zhu X, Sheng X (2021) Ball motion control in the table tennis robot system using time-series deep reinforcement learning. IEEE Access 9:99816\u201399827","journal-title":"IEEE Access"},{"key":"4131_CR35","doi-asserted-by":"publisher","unstructured":"Tebbe J, Gao Y, Sastre-Rienietz M, Zell A (2018) A table tennis robot system using an industrial kuka robot arm. In: German conference on pattern recognition, pp 33\u201345. https:\/\/doi.org\/10.1007\/978-3-030-12939-2_3","DOI":"10.1007\/978-3-030-12939-2_3"},{"key":"4131_CR36","doi-asserted-by":"publisher","unstructured":"Zhang Y, Zhao Y, Xiong R, Wang Y, Wang J, Chu J (2014) Spin observation and trajectory prediction of a ping-pong ball. In: 2014 IEEE international conference on robotics and automation (ICRA), pp 4108\u20134114. https:\/\/doi.org\/10.1109\/ICRA.2014.6907456","DOI":"10.1109\/ICRA.2014.6907456"},{"issue":"1","key":"4131_CR37","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1109\/TRO.2009.2035744","volume":"26","author":"T Kr\u00f6ger","year":"2010","unstructured":"Kr\u00f6ger T, Wahl FM (2010) Online trajectory generation: basic concepts for instantaneous reactions to unforeseen events. IEEE Trans Rob 26(1):94\u2013111. https:\/\/doi.org\/10.1109\/TRO.2009.2035744","journal-title":"IEEE Trans Rob"},{"issue":"3","key":"4131_CR38","doi-asserted-by":"publisher","first-page":"263","DOI":"10.1177\/0278364912472380","volume":"32","author":"K M\u00fclling","year":"2013","unstructured":"M\u00fclling K., Kober J, Kroemer O, Peters J (2013) Learning to select and generalize striking movements in robot table tennis. Int J Rob Res 32(3):263\u2013279. https:\/\/doi.org\/10.1177\/0278364912472380","journal-title":"Int J Rob Res"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-04131-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-022-04131-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-022-04131-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,31]],"date-time":"2023-05-31T10:20:29Z","timestamp":1685528429000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-022-04131-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,8]]},"references-count":38,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2023,6]]}},"alternative-id":["4131"],"URL":"https:\/\/doi.org\/10.1007\/s10489-022-04131-w","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"value":"0924-669X","type":"print"},{"value":"1573-7497","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,10,8]]},"assertion":[{"value":"29 August 2022","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 October 2022","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}