{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,18]],"date-time":"2025-09-18T10:29:17Z","timestamp":1758191357764,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":25,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,18]]},"DOI":"10.1145\/3719545.3721109","type":"proceedings-article","created":{"date-parts":[[2025,9,16]],"date-time":"2025-09-16T09:38:41Z","timestamp":1758015521000},"page":"58-70","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["InPTR: Integration Prioritized Trajectory Replay"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-4652-9594","authenticated-orcid":false,"given":"Chendie","family":"Yao","sequence":"first","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3594-2167","authenticated-orcid":false,"given":"Xingxing","family":"Liang","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8259-5148","authenticated-orcid":false,"given":"Longfei","family":"Zhang","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2937-3065","authenticated-orcid":false,"given":"Jincai","family":"Huang","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2354-7618","authenticated-orcid":false,"given":"Jun","family":"Lei","sequence":"additional","affiliation":[{"name":"National University of Defense Technology, Changsha, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2353-1416","authenticated-orcid":false,"given":"Yulong","family":"Zhang","sequence":"additional","affiliation":[{"name":"31002 Unit, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,9,16]]},"reference":[{"key":"e_1_3_3_2_2_2","unstructured":"Vladimir Braverman Avinatan Hassidim Yossi Matias Mariano Schain Sandeep Silwal and Samson Zhou. 2021. Adversarial robustness of streaming algorithms through importance sampling. CoRR abs\/2106.14952 4 (2021)."},{"key":"e_1_3_3_2_3_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCE-Asia49877.2020.9276975"},{"key":"e_1_3_3_2_4_2","unstructured":"Brett Daley and Christopher Amato. 2019. Reconciling \u03bb -returns with experience replay. Advances in Neural Information Processing Systems 32 (2019) 1133\u20131142."},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"crossref","unstructured":"Jingliang Duan Shengbo\u00a0Eben Li Yang Guan Qi Sun and Bo Cheng. 2020. Hierarchical reinforcement learning for self-driving decision-making without reliance on labelled driving data. IET Intelligent Transport Systems 14 5 (2020) 297\u2013305.","DOI":"10.1049\/iet-its.2019.0317"},{"key":"e_1_3_3_2_6_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/376"},{"key":"e_1_3_3_2_7_2","first-page":"1587","volume-title":"International Conference on Machine Learning","author":"Fujimoto Scott","year":"2018","unstructured":"Scott Fujimoto, Herke Hoof, and David Meger. 2018. Addressing function approximation error in actor-critic methods. In International Conference on Machine Learning. PMLR, 1587\u20131596."},{"key":"e_1_3_3_2_8_2","first-page":"2829","volume-title":"International conference on machine learning","author":"Gu Shixiang","year":"2016","unstructured":"Shixiang Gu, Timothy Lillicrap, Ilya Sutskever, and Sergey Levine. 2016. Continuous deep q-learning with model-based acceleration. In International conference on machine learning. PMLR, 2829\u20132838."},{"key":"e_1_3_3_2_9_2","unstructured":"Dan Horgan John Quan David Budden Gabriel Barth-Maron Matteo Hessel Hado Van\u00a0Hasselt and David Silver. 2018. Distributed prioritized experience replay. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1803.00933 (2018)."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/SMC.2017.8122622"},{"key":"e_1_3_3_2_11_2","volume-title":"In Proc. 19th International Conference on Machine Learning","author":"Kakade Sham","year":"2002","unstructured":"Sham Kakade and John Langford. 2002. Approximately optimal approximate reinforcement learning. In In Proc. 19th International Conference on Machine Learning. Citeseer."},{"key":"e_1_3_3_2_12_2","unstructured":"Xingxing Liang He Feng Yang MA et\u00a0al. 2020. Deep Multi-Agent Reinforcement Learning: A Survey. ACTA AUTOMATICA SINICA (in Chinese) 46 12 (2020) 2537\u20132557."},{"key":"e_1_3_3_2_13_2","unstructured":"XX Liang YH Feng JC Huang et\u00a0al. 2020. A Novel Deep Reinforcement Learning Algorithm based on Attention-based Value Function and Autoregressive Environment Model. Ruan Jian Xue Bao. Journal of Software 4 (2020) 948\u2013966."},{"key":"e_1_3_3_2_14_2","unstructured":"Xingxing Liang Yang Ma Yanghe Feng and Zhong Liu. 2021. PTR-PPO: Proximal Policy Optimization with Prioritized Trajectory Replay. arxiv:https:\/\/arXiv.org\/abs\/2112.03798\u00a0[cs.LG] https:\/\/arxiv.org\/abs\/2112.03798"},{"key":"e_1_3_3_2_15_2","first-page":"1928","volume-title":"International conference on machine learning","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih, Adria\u00a0Puigdomenech Badia, Mehdi Mirza, Alex Graves, Timothy Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu. 2016. Asynchronous methods for deep reinforcement learning. In International conference on machine learning. PMLR, 1928\u20131937."},{"key":"e_1_3_3_2_16_2","doi-asserted-by":"crossref","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei\u00a0A Rusu Joel Veness Marc\u00a0G Bellemare Alex Graves Martin Riedmiller Andreas\u00a0K Fidjeland Georg Ostrovski et\u00a0al. 2015. Human-level control through deep reinforcement learning. nature 518 7540 (2015) 529\u2013533.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_3_2_17_2","unstructured":"Tom Schaul John Quan Ioannis Antonoglou and David Silver. 2015. Prioritized experience replay. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1511.05952 (2015)."},{"key":"e_1_3_3_2_18_2","doi-asserted-by":"crossref","unstructured":"Julian Schrittwieser Ioannis Antonoglou Thomas Hubert Karen Simonyan Laurent Sifre Simon Schmitt Arthur Guez Edward Lockhart Demis Hassabis Thore Graepel et\u00a0al. 2020. Mastering atari go chess and shogi by planning with a learned model. Nature 588 7839 (2020) 604\u2013609.","DOI":"10.1038\/s41586-020-03051-4"},{"key":"e_1_3_3_2_19_2","first-page":"1889","volume-title":"International conference on machine learning","author":"Schulman John","year":"2015","unstructured":"John Schulman, Sergey Levine, Pieter Abbeel, Michael Jordan, and Philipp Moritz. 2015. Trust region policy optimization. In International conference on machine learning. PMLR, 1889\u20131897."},{"key":"e_1_3_3_2_20_2","unstructured":"John Schulman Philipp Moritz Sergey Levine Michael Jordan and Pieter Abbeel. 2015. High-dimensional continuous control using generalized advantage estimation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1506.02438 (2015)."},{"key":"e_1_3_3_2_21_2","unstructured":"John Schulman Filip Wolski Prafulla Dhariwal Alec Radford and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1707.06347 (2017)."},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"crossref","unstructured":"David Silver Julian Schrittwieser Karen Simonyan Ioannis Antonoglou Aja Huang Arthur Guez Thomas Hubert Lucas Baker Matthew Lai Adrian Bolton et\u00a0al. 2017. Mastering the game of go without human knowledge. nature 550 7676 (2017) 354\u2013359.","DOI":"10.1038\/nature24270"},{"key":"e_1_3_3_2_23_2","volume-title":"Reinforcement learning: An introduction","author":"Sutton Richard\u00a0S","year":"2018","unstructured":"Richard\u00a0S Sutton and Andrew\u00a0G Barto. 2018. Reinforcement learning: An introduction. MIT press."},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"crossref","unstructured":"Oriol Vinyals Igor Babuschkin Wojciech\u00a0M Czarnecki Micha\u00ebl Mathieu Andrew Dudzik Junyoung Chung David\u00a0H Choi Richard Powell Timo Ewalds Petko Georgiev et\u00a0al. 2019. Grandmaster level in StarCraft II using multi-agent reinforcement learning. Nature 575 7782 (2019) 350\u2013354.","DOI":"10.1038\/s41586-019-1724-z"},{"key":"e_1_3_3_2_25_2","unstructured":"Ziyu Wang Victor Bapst Nicolas Heess Volodymyr Mnih Remi Munos Koray Kavukcuoglu and Nando de Freitas. 2016. Sample efficient actor-critic with experience replay. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1611.01224 (2016)."},{"key":"e_1_3_3_2_26_2","first-page":"113","volume-title":"Conference on Robot Learning","author":"Zhao Rui","year":"2018","unstructured":"Rui Zhao and Volker Tresp. 2018. Energy-based hindsight experience prioritization. In Conference on Robot Learning. PMLR, 113\u2013122."}],"event":{"name":"DAI '24: 6th International Conference on Distributed Artificial Intelligences","acronym":"DAI '24","location":"Singapore Singapore"},"container-title":["Proceedings of the 2024 Sixth International Conference on Distributed Artificial Intelligences"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3719545.3721109","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T13:12:26Z","timestamp":1758114746000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3719545.3721109"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,18]]},"references-count":25,"alternative-id":["10.1145\/3719545.3721109","10.1145\/3719545"],"URL":"https:\/\/doi.org\/10.1145\/3719545.3721109","relation":{},"subject":[],"published":{"date-parts":[[2024,12,18]]},"assertion":[{"value":"2025-09-16","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}