{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T00:12:51Z","timestamp":1759363971307,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":21,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032001399","type":"print"},{"value":"9783032001405","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-00140-5_15","type":"book-chapter","created":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T23:56:42Z","timestamp":1759276602000},"page":"220-230","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Comparing RL Policies for\u00a0Robotic Pusher"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0005-7781-2036","authenticated-orcid":false,"given":"Pedro","family":"Bonjour","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9170-5078","authenticated-orcid":false,"given":"Rui Pedro","family":"Lopes","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,1]]},"reference":[{"key":"15_CR1","doi-asserted-by":"publisher","unstructured":"Beltran-Hernandez, C.C., et al.: Learning force control for contact-rich manipulation tasks with rigid position-controlled robots. IEEE Robot. Autom. Lett. 5(4), 5709\u20135716 (2020). ISSN: 2377-3766, 2377-3774. https:\/\/doi.org\/10.1109\/LRA.2020.3010739. https:\/\/ieeexplore.ieee.org\/document\/9145608\/. Accessed 01 Feb 2025","DOI":"10.1109\/LRA.2020.3010739"},{"key":"15_CR2","doi-asserted-by":"publisher","unstructured":"Bergmann, L., et al.: Precision-Focused Reinforcement Learning Model for Robotic Object Pushing (2024). https:\/\/doi.org\/10.48550\/arXiv.2411.08622. arXiv:2411.08622. http:\/\/arxiv.org\/abs\/2411.08622. Accessed 05 Mar 2025","DOI":"10.48550\/arXiv.2411.08622"},{"key":"15_CR3","unstructured":"Duma, D.: Deep reinforcement learning in physics based simulations. Accepted (2025). Thesis 13 July 2023. http:\/\/dspace.epoka.edu.al\/handle\/1\/2345. Accessed 05 Mar 2025"},{"key":"15_CR4","doi-asserted-by":"publisher","unstructured":"Fujimoto, S., van Hoof, H., Meger, D.: Addressing Function Approximation Error in Actor-Critic Methods. arXiv:1802.09477 (2018). https:\/\/doi.org\/10.48550\/arXiv.1802.09477. http:\/\/arxiv.org\/abs\/1802.09477. Accessed 05 Feb 2025","DOI":"10.48550\/arXiv.1802.09477"},{"key":"15_CR5","doi-asserted-by":"publisher","unstructured":"Haarnoja, T., et al.: Soft Actor-Critic Algorithms and Applications. arXiv:1812.05905 (2019). https:\/\/doi.org\/10.48550\/arXiv.1812.05905. http:\/\/arxiv.org\/abs\/1812.05905. Accessed 07 Feb 2025","DOI":"10.48550\/arXiv.1812.05905"},{"key":"15_CR6","doi-asserted-by":"publisher","unstructured":"Haarnoja, T., et al.: Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor. arXiv:1801.01290 (2018). https:\/\/doi.org\/10.48550\/arXiv.1801.01290. http:\/\/arxiv.org\/abs\/1801.01290. Accessed 05 Feb 2025","DOI":"10.48550\/arXiv.1801.01290"},{"key":"15_CR7","unstructured":"Kiran, M., Ozyildirim, M.: Hyperparameter Tuning for Deep Reinforcement Learning Applications. arXiv abs\/2201.11182 (2022)"},{"key":"15_CR8","doi-asserted-by":"publisher","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. Version Number: 6 (2015). https:\/\/doi.org\/10.48550\/ARXIV.1509.02971. https:\/\/arxiv.org\/abs\/1509.02971. Accessed 05 Feb 2025","DOI":"10.48550\/ARXIV.1509.02971"},{"key":"15_CR9","doi-asserted-by":"publisher","unstructured":"Liu, X., et al.: Optimized interaction control for robot manipulator interacting with flexible environment. IEEE\/ASME Trans. Mechatron. 26(6), 2888\u20132898 (2021). ISSN: 1083-4435, 1941-014X. https:\/\/doi.org\/10.1109\/TMECH.2020.3047919. https:\/\/ieeexplore.ieee.org\/document\/9310291\/. Accessed 01 Feb 2025","DOI":"10.1109\/TMECH.2020.3047919"},{"key":"15_CR10","doi-asserted-by":"publisher","unstructured":"Lou, G., et al.: Controlling soft robotic arms using hybrid modelling and reinforcement learning. IEEE Robot. Autom. Lett. 9(8), 7070\u20137077 (2024). ISSN: 2377-3766, 2377-3774. https:\/\/doi.org\/10.1109\/LRA.2024.3418312. https:\/\/ieeexplore.ieee.org\/document\/10569050\/. Accessed 01 Feb 2025","DOI":"10.1109\/LRA.2024.3418312"},{"key":"15_CR11","unstructured":"Mnih, V., et al.: Asynchronous methods for deep reinforcement learning. In: Proceedings of the 33rd International Conference on International Conference on Machine Learning - Volume 48. ICML 2016, pp. 1928\u20131937. JMLR.org, New York (2016)"},{"key":"15_CR12","doi-asserted-by":"publisher","unstructured":"Mnih, V., et al.: Playing Atari with Deep Reinforcement Learning. arXiv:1312.5602 (2013). https:\/\/doi.org\/10.48550\/arXiv.1312.5602. http:\/\/arxiv.org\/abs\/1312.5602. Accessed 05 Feb 2025","DOI":"10.48550\/arXiv.1312.5602"},{"key":"15_CR13","doi-asserted-by":"publisher","unstructured":"Patterson, A., et al.: Empirical Design in Reinforcement Learning. arXiv:2304.01315 (2024). https:\/\/doi.org\/10.48550\/arXiv.2304.01315. http:\/\/arxiv.org\/abs\/2304.01315. Accessed 05 Feb 2025","DOI":"10.48550\/arXiv.2304.01315"},{"key":"15_CR14","unstructured":"Pusher - Gymnasium Documentation. https:\/\/gymnasium.farama.org\/environments\/mujoco\/pusher\/. Accessed 30 Jan 2025"},{"key":"15_CR15","unstructured":"Puterman, M.L.: Markov Decision Processes: Discrete Stochastic Dynamic Programming. Wiley Series in Probability and Statistics v.414. Wiley, Hoboken (2009). ISBN: 978-0-471-72782-8"},{"key":"15_CR16","doi-asserted-by":"publisher","unstructured":"Sanghi, N.: Deep Q-learning (DQN). In: Deep Reinforcement Learning with Python: RLHF for Chatbots and Large Language Models, pp. 225\u2013271. Apress, Berkeley (2024). ISBN: 979-8-8688-0273-7. https:\/\/doi.org\/10.1007\/979-8-8688-0273-7_6","DOI":"10.1007\/979-8-8688-0273-7_6"},{"key":"15_CR17","doi-asserted-by":"publisher","unstructured":"Schulman, J., et al.: Proximal Policy Optimization Algorithms. arXiv:1707.06347 (2017). https:\/\/doi.org\/10.48550\/arXiv.1707.06347. http:\/\/arxiv.org\/abs\/1707.06347. Accessed 05 Feb 2025","DOI":"10.48550\/arXiv.1707.06347"},{"key":"15_CR18","doi-asserted-by":"publisher","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. Nature 529(7587), 484\u2013489 (2016). ISSN: 0028- 0836, 1476-4687. https:\/\/doi.org\/10.1038\/nature16961. https:\/\/www.nature.com\/articles\/nature16961. Accessed 23 Jan 2025","DOI":"10.1038\/nature16961"},{"key":"15_CR19","unstructured":"Sutton, R.S., Barto, A.: Reinforcement learning: an introduction. Second edition. Adaptive computation and machine learning. The MIT Press, Cambridge, Massachusetts, London (2020). ISBN: 978-0-262-03924-6"},{"key":"15_CR20","doi-asserted-by":"publisher","unstructured":"Xu, Z., et al.: Open-source reinforcement learning environments implemented in MuJoCo with Franka manipulator. In: 2024 IEEE International Conference on Advanced Intelligent Mechatronics (AIM), Boston, MA, USA, pp. 709\u2013714. IEEE (2024). ISBN: 979-8-3503-5536-9. https:\/\/doi.org\/10.1109\/AIM55361.2024.10636979. https:\/\/ieeexplore.ieee.org\/document\/10636979\/. Accessed 01 Feb 2025","DOI":"10.1109\/AIM55361.2024.10636979"},{"key":"15_CR21","doi-asserted-by":"publisher","first-page":"4936","DOI":"10.1080\/00207543.2021.1943037","volume":"60","author":"S Yang","year":"2021","unstructured":"Yang, S., Zhigang, X.: Intelligent scheduling and reconfiguration via deep reinforcement learning in smart manufacturing. Int. J. Prod. Res. 60, 4936\u20134953 (2021). https:\/\/doi.org\/10.1080\/00207543.2021.1943037","journal-title":"Int. J. Prod. Res."}],"container-title":["Communications in Computer and Information Science","Optimization, Learning Algorithms and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-00140-5_15","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T23:56:46Z","timestamp":1759276606000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-00140-5_15"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,1]]},"ISBN":["9783032001399","9783032001405"],"references-count":21,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-00140-5_15","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,1]]},"assertion":[{"value":"1 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"OL2A","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Optimization, Learning Algorithms and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Sesti Levante","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 April 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 April 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ol2a2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/ol2a.ipb.pt","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}