{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,29]],"date-time":"2026-04-29T21:14:40Z","timestamp":1777497280816,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":28,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,8,14]],"date-time":"2022-08-14T00:00:00Z","timestamp":1660435200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"A*STAR AI3 HTPO seed grant C211118016 on Upside-Down Multi-Objective Bayesian Optimization for Few-Shot Design","award":["C211118016"],"award-info":[{"award-number":["C211118016"]}]},{"name":"NTU Data Science and Artificial Intelligence Center"},{"name":"A*STAR Centre for Frontier AI Research"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,8,14]]},"DOI":"10.1145\/3534678.3539266","type":"proceedings-article","created":{"date-parts":[[2022,8,12]],"date-time":"2022-08-12T19:06:12Z","timestamp":1660331172000},"page":"1420-1429","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":8,"title":["Importance Prioritized Policy Distillation"],"prefix":"10.1145","author":[{"given":"Xinghua","family":"Qu","sequence":"first","affiliation":[{"name":"Bytedance AI Lab, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yew Soon","family":"Ong","sequence":"additional","affiliation":[{"name":"Nanyang Technological University &amp; A*STAR Centre for Frontier AI, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Abhishek","family":"Gupta","sequence":"additional","affiliation":[{"name":"Singapore Institute of Manufacturing Technology, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pengfei","family":"Wei","sequence":"additional","affiliation":[{"name":"Bytedance AI Lab, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhu","family":"Sun","sequence":"additional","affiliation":[{"name":"Institute of High Performance Computing and Centre for Frontier AI Research &amp; A*STAR, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zejun","family":"Ma","sequence":"additional","affiliation":[{"name":"Bytedance AI Lab, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2022,8,14]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.5555\/3305381.3305428"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1613\/jair.3912"},{"key":"e_1_3_2_2_4_1","volume-title":"Openai gym. arXiv preprint arXiv:1606.01540","author":"Brockman Greg","year":"2016","unstructured":"Greg Brockman, Vicki Cheung, Ludwig Pettersson, Jonas Schneider, John Schulman, Jie Tang, and Wojciech Zaremba. 2016. Openai gym. arXiv preprint arXiv:1606.01540 (2016)."},{"key":"e_1_3_2_2_5_1","volume-title":"International Conference on Machine Learning (ICML). PMLR, 872--881","author":"Byrd Jonathon","year":"2019","unstructured":"Jonathon Byrd and Zachary Lipton. 2019. What is the effect of importance weighting in deep learning?. In International Conference on Machine Learning (ICML). PMLR, 872--881."},{"key":"e_1_3_2_2_6_1","volume-title":"Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics (AISTATS). 1331--1340","author":"Czarnecki Wojciech M","year":"2019","unstructured":"Wojciech M Czarnecki, Razvan Pascanu, Simon Osindero, Siddhant Jayakumar, Grzegorz Swirszcz, and Max Jaderberg. 2019. Distilling Policy Distillation. In Proceedings of the 22nd International Conference on Artificial Intelligence and Statistics (AISTATS). 1331--1340."},{"key":"e_1_3_2_2_7_1","volume-title":"Online robustness training for deep reinforcement learning. arXiv preprint arXiv:1911.00887","author":"Fischer Marc","year":"2019","unstructured":"Marc Fischer, Matthew Mirman, Steven Stalder, and Martin Vechev. 2019. Online robustness training for deep reinforcement learning. arXiv preprint arXiv:1911.00887 (2019)."},{"key":"e_1_3_2_2_8_1","volume-title":"Noisy Networks For Exploration. In International Conference on Learning Representations (ICLR).","author":"Fortunato Meire","year":"2018","unstructured":"Meire Fortunato, Mohammad Gheshlaghi Azar, Bilal Piot, Jacob Menick, Matteo Hessel, Ian Osband, Alex Graves, Volodymyr Mnih, Remi Munos, Demis Hassabis, et al. 2018. Noisy Networks For Exploration. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_9_1","volume-title":"Proceedings of 35th International Conference on Machine Learning (ICML)","volume":"80","author":"Fruit Ronan","year":"2018","unstructured":"Ronan Fruit, Matteo Pirotta, Alessandro Lazaric, and Ronald Ortner. 2018. Efficient Bias-Span-Constrained Exploration-Exploitation in Reinforcement Learning. In Proceedings of 35th International Conference on Machine Learning (ICML), Vol. 80. 1578--1586."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01453-z"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11796"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRev.106.620"},{"key":"e_1_3_2_2_13_1","volume-title":"Dual Policy Distillation. In International Joint Conference on Artificial Intelligence (IJCAI). 3146--3152","author":"Lai Kwei-Herng","year":"2020","unstructured":"Kwei-Herng Lai, Daochen Zha, Yuening Li, and Xia Hu. 2020. Dual Policy Distillation. In International Joint Conference on Artificial Intelligence (IJCAI). 3146--3152."},{"key":"e_1_3_2_2_14_1","volume-title":"The International Conference on Learning Representations (ICLR).","author":"Lillicrap Timothy P","year":"2016","unstructured":"Timothy P Lillicrap, Jonathan J Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2016. Continuous control with deep reinforcement learning.. In The International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5963"},{"key":"e_1_3_2_2_16_1","volume-title":"Offline Meta-Reinforcement Learning with Advantage Weighting. In International Conference on Machine Learning (ICML). PMLR, 7780--7791","author":"Mitchell Eric","year":"2021","unstructured":"Eric Mitchell, Rafael Rafailov, Xue Bin Peng, Sergey Levine, and Chelsea Finn. 2021. Offline Meta-Reinforcement Learning with Advantage Weighting. In International Conference on Machine Learning (ICML). PMLR, 7780--7791."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"crossref","unstructured":"Volodymyr Mnih Koray Kavukcuoglu David Silver Andrei A Rusu Joel Veness Marc G Bellemare Alex Graves Martin Riedmiller Andreas K Fidjeland Georg Ostrovski et al. 2015. Human-level control through deep reinforcement learning. Nature 518 7540 (2015) 529--533.","DOI":"10.1038\/nature14236"},{"key":"e_1_3_2_2_18_1","volume-title":"Advantageweighted regression: Simple and scalable off-policy reinforcement learning. arXiv preprint arXiv:1910.00177","author":"Peng Xue Bin","year":"2019","unstructured":"Xue Bin Peng, Aviral Kumar, Grace Zhang, and Sergey Levine. 2019. Advantageweighted regression: Simple and scalable off-policy reinforcement learning. arXiv preprint arXiv:1910.00177 (2019)."},{"key":"e_1_3_2_2_19_1","volume-title":"International Conference on Machine Learning (ICML). PMLR, 5142-- 5151","author":"Phuong Mary","year":"2019","unstructured":"Mary Phuong and Christoph Lampert. 2019. Towards understanding knowledge distillation. In International Conference on Machine Learning (ICML). PMLR, 5142-- 5151."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2016.2543000"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.88"},{"key":"e_1_3_2_2_22_1","volume-title":"Proceedings of the 14th International Conference on Artificial Intelligence and Statistics (AISTATS). 627--635","author":"Ross St\u00e9phane","year":"2011","unstructured":"St\u00e9phane Ross, Geoffrey Gordon, and Drew Bagnell. 2011. A reduction of imitation learning and structured prediction to no-regret online learning. In Proceedings of the 14th International Conference on Artificial Intelligence and Statistics (AISTATS). 627--635."},{"key":"e_1_3_2_2_23_1","volume-title":"Caglar Gulcehre, Guillaume Desjardins, James Kirkpatrick, Razvan Pascanu, Volodymyr Mnih, Koray Kavukcuoglu, and Raia Hadsell.","author":"Rusu Andrei A","year":"2015","unstructured":"Andrei A Rusu, Sergio Gomez Colmenarejo, Caglar Gulcehre, Guillaume Desjardins, James Kirkpatrick, Razvan Pascanu, Volodymyr Mnih, Koray Kavukcuoglu, and Raia Hadsell. 2015. Policy distillation. arXiv preprint arXiv:1511.06295 (2015)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"crossref","unstructured":"David Silver Julian Schrittwieser Karen Simonyan Ioannis Antonoglou Aja Huang Arthur Guez Thomas Hubert Lucas Baker Matthew Lai Adrian Bolton et al. 2017. Mastering the game of go without human knowledge. Nature 550 7676 (2017) 354--359.","DOI":"10.1038\/nature24270"},{"key":"e_1_3_2_2_25_1","first-page":"312","article-title":"Statistical learning theory. Hoboken. Wiley. Wang, K., Tsung, F.(2007). Run-to-run Process Adjust. using Categ","volume":"39","author":"Vapnik Vladimir","year":"1998","unstructured":"Vladimir Vapnik. 1998. Statistical learning theory. Hoboken. Wiley. Wang, K., Tsung, F.(2007). Run-to-run Process Adjust. using Categ. Obs. J. Qual. Technol. 39, 4 (1998), 312.","journal-title":"Obs. J. Qual. Technol."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/72.788640"},{"key":"e_1_3_2_2_27_1","volume-title":"Proceedings of the 33rd International Conference on Machine Learning (ICML). 1995--2003","author":"Wang Ziyu","year":"2016","unstructured":"Ziyu Wang, Tom Schaul, Matteo Hessel, Hado Hasselt, Marc Lanctot, and Nando Freitas. 2016. Dueling network architectures for deep reinforcement learning. In Proceedings of the 33rd International Conference on Machine Learning (ICML). 1995--2003."},{"key":"e_1_3_2_2_28_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Xu Da","year":"2021","unstructured":"Da Xu, Yuting Ye, and Chuanwei Ruan. 2021. Understanding the role of importance weighting for deep learning. In International Conference on Learning Representations (ICLR)."}],"event":{"name":"KDD '22: The 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Washington DC USA","acronym":"KDD '22","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539266","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3534678.3539266","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:59:59Z","timestamp":1750186799000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3534678.3539266"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,14]]},"references-count":28,"alternative-id":["10.1145\/3534678.3539266","10.1145\/3534678"],"URL":"https:\/\/doi.org\/10.1145\/3534678.3539266","relation":{},"subject":[],"published":{"date-parts":[[2022,8,14]]},"assertion":[{"value":"2022-08-14","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}