{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,16]],"date-time":"2025-10-16T20:46:57Z","timestamp":1760647617686,"version":"3.40.3"},"publisher-location":"Cham","reference-count":27,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031024436"},{"type":"electronic","value":"9783031024443"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-02444-3_6","type":"book-chapter","created":{"date-parts":[[2022,5,9]],"date-time":"2022-05-09T12:02:50Z","timestamp":1652097770000},"page":"73-86","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Fast Proximal Policy Optimization"],"prefix":"10.1007","author":[{"given":"Weiqi","family":"Zhao","sequence":"first","affiliation":[]},{"given":"Haobo","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Jin","family":"Xie","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,5,10]]},"reference":[{"key":"6_CR1","unstructured":"Brockman, G., et al.: OpenAI Gym (2016)"},{"key":"6_CR2","unstructured":"Duan, Y., Chen, X., Houthooft, R., Schulman, J., Abbeel, P.: Benchmarking deep reinforcement learning for continuous control. In: International Conference on Machine Learning, pp. 1329\u20131338. PMLR (2016)"},{"key":"6_CR3","unstructured":"Engstrom, L., et al.: Implementation matters in deep policy gradients: a case study on PPO and TRPO. arXiv preprint arXiv:2005.12729 (2020)"},{"key":"6_CR4","unstructured":"Fujimoto, S., Hoof, H., Meger, D.: Addressing function approximation error in actor-critic methods. In: International Conference on Machine Learning, pp. 1587\u20131596. PMLR (2018)"},{"key":"6_CR5","unstructured":"Haarnoja, T., Zhou, A., Abbeel, P., Levine, S.: Soft actor-critic: off-policy maximum entropy deep reinforcement learning with a stochastic actor. In: International Conference on Machine Learning, pp. 1861\u20131870. PMLR (2018)"},{"issue":"9","key":"6_CR6","doi-asserted-by":"publisher","first-page":"3570","DOI":"10.1109\/TNNLS.2019.2945019","volume":"31","author":"Y Hu","year":"2019","unstructured":"Hu, Y., Wang, W., Liu, H., Liu, L.: Reinforcement learning tracking control for robotic manipulator with kernel-based dynamic model. IEEE Trans. Neural Netw. Learn. Syst. 31(9), 3570\u20133578 (2019)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"6_CR7","unstructured":"Ilyas, A., et al.: Are deep policy gradient algorithms truly policy gradient algorithms? (2018)"},{"key":"6_CR8","doi-asserted-by":"crossref","unstructured":"Jiang, H., Qian, J., Xie, J., Yang, J.: Planning with learned dynamic model for unsupervised point cloud registration (2021)","DOI":"10.24963\/ijcai.2021\/107"},{"key":"6_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"562","DOI":"10.1007\/978-3-030-03398-9_48","volume-title":"Pattern Recognition and Computer Vision","author":"H Jiang","year":"2018","unstructured":"Jiang, H., Qian, J., Xie, J., Yang, J.: Episode-experience replay based tree-backup method for off-policy actor-critic algorithm. In: Lai, J.-H., et al. (eds.) PRCV 2018. LNCS, vol. 11256, pp. 562\u2013573. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-03398-9_48"},{"key":"6_CR10","doi-asserted-by":"crossref","unstructured":"Jiang, H., Shen, Y., Xie, J., Li, J., Qian, J., Yang, J.: Sampling network guided cross-entropy method for unsupervised point cloud registration. arXiv preprint arXiv:2109.06619 (2021)","DOI":"10.1109\/ICCV48922.2021.00607"},{"key":"6_CR11","doi-asserted-by":"crossref","unstructured":"Jiang, H., Xie, J., Yang, J.: Action candidate based clipped double q-learning for discrete and continuous action tasks. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 35, pp. 7979\u20137986 (2021)","DOI":"10.1609\/aaai.v35i9.16973"},{"key":"6_CR12","unstructured":"Kakade, S.M.: A natural policy gradient. In: Advances in Neural Information Processing Systems, vol. 14 (2001)"},{"key":"6_CR13","doi-asserted-by":"crossref","unstructured":"Kempka, M., Wydmuch, M., Runc, G., Toczek, J., Ja\u015bkowski, W.: ViZDoom: a doom-based AI research platform for visual reinforcement learning. In: 2016 IEEE Conference on Computational Intelligence and Games (CIG), pp. 1\u20138. IEEE (2016)","DOI":"10.1109\/CIG.2016.7860433"},{"issue":"1","key":"6_CR14","first-page":"1334","volume":"17","author":"S Levine","year":"2016","unstructured":"Levine, S., Finn, C., Darrell, T., Abbeel, P.: End-to-end training of deep visuomotor policies. J. Mach. Learn. Res. 17(1), 1334\u20131373 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"6_CR15","doi-asserted-by":"crossref","unstructured":"Li, T., Geyer, H., Atkeson, C.G., Rai, A.: Using deep reinforcement learning to learn high-level policies on the ATRIAS biped. In: 2019 International Conference on Robotics and Automation (ICRA), pp. 263\u2013269. IEEE (2019)","DOI":"10.1109\/ICRA.2019.8793864"},{"key":"6_CR16","unstructured":"Lillicrap, T.P., et al.: Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)"},{"key":"6_CR17","unstructured":"Mnih, V., et al.: Playing Atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)"},{"issue":"7540","key":"6_CR18","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/nature14236","volume":"518","author":"V Mnih","year":"2015","unstructured":"Mnih, V., et al.: Human-level control through deep reinforcement learning. nature 518(7540), 529\u2013533 (2015)","journal-title":"nature"},{"key":"6_CR19","unstructured":"Schulman, J., Levine, S., Abbeel, P., Jordan, M., Moritz, P.: Trust region policy optimization. In: International Conference on Machine Learning, pp. 1889\u20131897. PMLR (2015)"},{"key":"6_CR20","unstructured":"Schulman, J., Wolski, F., Dhariwal, P., Radford, A., Klimov, O.: Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)"},{"issue":"7587","key":"6_CR21","doi-asserted-by":"publisher","first-page":"484","DOI":"10.1038\/nature16961","volume":"529","author":"D Silver","year":"2016","unstructured":"Silver, D., et al.: Mastering the game of go with deep neural networks and tree search. nature 529(7587), 484\u2013489 (2016)","journal-title":"nature"},{"key":"6_CR22","unstructured":"Sutton, R.S., McAllester, D.A., Singh, S.P., Mansour, Y., et al.: Policy gradient methods for reinforcement learning with function approximation. In: NIPS, vol. 99, pp. 1057\u20131063. Citeseer (1999)"},{"key":"6_CR23","doi-asserted-by":"crossref","unstructured":"Todorov, E., Erez, T., Tassa, Y.: MuJoCo: a physics engine for model-based control. In: 2012 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 5026\u20135033. IEEE (2012)","DOI":"10.1109\/IROS.2012.6386109"},{"key":"6_CR24","unstructured":"Wang, Y., He, H., Tan, X.: Truly proximal policy optimization. In: Uncertainty in Artificial Intelligence, pp. 113\u2013122. PMLR (2020)"},{"key":"6_CR25","unstructured":"Wang, Y., He, H., Tan, X., Gan, Y.: Trust region-guided proximal policy optimization. arXiv preprint arXiv:1901.10314 (2019)"},{"key":"6_CR26","unstructured":"Wu, Y., Mansimov, E., Liao, S., Grosse, R., Ba, J.: Scalable trust-region method for deep reinforcement learning using Kronecker-factored approximation. arXiv preprint arXiv:1708.05144 (2017)"},{"key":"6_CR27","unstructured":"Zhu, W., Rosendo, A.: Proximal policy optimization smoothed algorithm. arXiv preprint arXiv:2012.02439 (2020)"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-02444-3_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,4]],"date-time":"2023-02-04T18:32:58Z","timestamp":1675535578000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-02444-3_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031024436","9783031024443"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-02444-3_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"10 May 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACPR","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Pattern Recognition","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Jeju Island","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Korea (Republic of)","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2021","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 November 2021","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 November 2021","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"acpr2021","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.acpr2021.org","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"154","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"85","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"55% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"4","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}