{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:18:36Z","timestamp":1750220316377,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,1,8]],"date-time":"2022-01-08T00:00:00Z","timestamp":1641600000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,1,8]]},"DOI":"10.1145\/3493700.3493709","type":"proceedings-article","created":{"date-parts":[[2022,1,7]],"date-time":"2022-01-07T23:54:21Z","timestamp":1641599661000},"page":"81-89","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Identifying efficient curricula for reinforcement learning in complex environments with a fixed computational budget"],"prefix":"10.1145","author":[{"given":"Omkar","family":"Shelke","sequence":"first","affiliation":[{"name":"TCS Research, IN"}]},{"given":"Hardik","family":"Meisheri","sequence":"additional","affiliation":[{"name":"TCS Research, IN"}]},{"given":"Harshad","family":"Khadilkar","sequence":"additional","affiliation":[{"name":"TCS Research, IN"}]}],"member":"320","published-online":{"date-parts":[[2022,1,8]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"Yusuf Aytar Tobias Pfaff David Budden Thomas Paine Ziyu Wang and Nando de Freitas. 2018. Playing hard exploration games by watching youtube. In Advances in Neural Information Processing Systems. 2930\u20132941."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Michael Bain and Claude Sammut. 1995. A Framework for Behavioural Cloning.. In Machine Intelligence 15. 103\u2013129.","DOI":"10.1093\/oso\/9780198538677.003.0006"},{"key":"e_1_3_2_1_3_1","volume-title":"The complexity of decentralized control of Markov decision processes. Mathematics of operations research 27, 4","author":"Bernstein S","year":"2002","unstructured":"Daniel\u00a0S Bernstein, Robert Givan, Neil Immerman, and Shlomo Zilberstein. 2002. The complexity of decentralized control of Markov decision processes. Mathematics of operations research 27, 4 (2002), 819\u2013840."},{"key":"e_1_3_2_1_4_1","unstructured":"Jack Clark and Dario Amodei. 2016. Faulty Reward Functions in the Wild. https:\/\/openai.com\/blog\/faulty-reward-functions\/."},{"key":"e_1_3_2_1_5_1","volume-title":"Search-based structured prediction. Machine learning 75, 3","author":"Daum\u00e9 Hal","year":"2009","unstructured":"Hal Daum\u00e9, John Langford, and Daniel Marcu. 2009. Search-based structured prediction. Machine learning 75, 3 (2009), 297\u2013325."},{"key":"e_1_3_2_1_6_1","volume-title":"Robot shaping: Developing autonomous agents through learning. Artificial intelligence 71, 2","author":"Dorigo Marco","year":"1994","unstructured":"Marco Dorigo and Marco Colombetti. 1994. Robot shaping: Developing autonomous agents through learning. Artificial intelligence 71, 2 (1994), 321\u2013370."},{"key":"e_1_3_2_1_7_1","volume-title":"Skynet: A top deep RL agent in the inaugural pommerman team competition. arXiv preprint arXiv:1905.01360(2019).","author":"Gao Chao","year":"2019","unstructured":"Chao Gao, Pablo Hernandez-Leal, Bilal Kartal, and Matthew\u00a0E Taylor. 2019. Skynet: A top deep RL agent in the inaugural pommerman team competition. arXiv preprint arXiv:1905.01360(2019)."},{"key":"e_1_3_2_1_8_1","unstructured":"Dylan Hadfield-Menell. 2017. Cooperatively Learning Human Values. https:\/\/bair.berkeley.edu\/blog\/2017\/08\/17\/cooperatively-learning-human-values\/."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"Todd Hester Matej Vecerik Olivier Pietquin Marc Lanctot Tom Schaul Bilal Piot Dan Horgan John Quan Andrew Sendonaris Gabriel Dulac-Arnold 2017. Deep q-learning from demonstrations. arXiv preprint arXiv:1704.03732(2017).","DOI":"10.1609\/aaai.v32i1.11757"},{"key":"e_1_3_2_1_10_1","unstructured":"Jonathan Ho and Stefano Ermon. 2016. Generative adversarial imitation learning. In Advances in neural information processing systems. 4565\u20134573."},{"key":"e_1_3_2_1_11_1","volume-title":"Planning and acting in partially observable stochastic domains. Artificial intelligence 101, 1-2","author":"Kaelbling Leslie\u00a0Pack","year":"1998","unstructured":"Leslie\u00a0Pack Kaelbling, Michael\u00a0L Littman, and Anthony\u00a0R Cassandra. 1998. Planning and acting in partially observable stochastic domains. Artificial intelligence 101, 1-2 (1998), 99\u2013134."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.10827"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487167"},{"key":"e_1_3_2_1_14_1","unstructured":"Adam Lerer and Alexander Peysakhovich. 2018. Learning social conventions in markov games. arXiv preprint arXiv:1806.10071(2018)."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.5555\/3091574.3091594"},{"key":"e_1_3_2_1_16_1","unstructured":"Hardik Meisheri and Harshad Khadilkar. 2020. Sample Efficient Training in Multi-Agent Adversarial Games with Limited Teammate Communication. arXiv preprint arXiv:2011.00424(2020)."},{"key":"e_1_3_2_1_17_1","unstructured":"Hardik Meisheri Omkar Shelke Richa Verma and Harshad Khadilkar. 2019. Accelerating Training in Pommerman with Imitation and Reinforcement Learning. arXiv preprint arXiv:1911.04947(2019)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8463162"},{"key":"e_1_3_2_1_19_1","unstructured":"Andrew\u00a0Y Ng Daishi Harada and Stuart Russell. 1999. Policy invariance under reward transformations: Theory and application to reward shaping. In ICML Vol.\u00a099. 278\u2013287."},{"key":"e_1_3_2_1_20_1","unstructured":"OpenAI. 2018. OpenAI Five. https:\/\/blog.openai.com\/openai-five\/."},{"key":"e_1_3_2_1_21_1","unstructured":"Takayuki Osogami and Toshihiro Takahashi. 2019. Real-time tree search with pessimistic scenarios. arXiv preprint arXiv:1902.10870(2019)."},{"key":"e_1_3_2_1_22_1","volume-title":"Cooperative multi-agent learning: The state of the art. Autonomous agents and multi-agent systems 11, 3","author":"Panait Liviu","year":"2005","unstructured":"Liviu Panait and Sean Luke. 2005. Cooperative multi-agent learning: The state of the art. Autonomous agents and multi-agent systems 11, 3 (2005), 387\u2013434."},{"key":"e_1_3_2_1_23_1","unstructured":"Peng Peng Liang Pang Yufeng Yuan and Chao Gao. 2018. Continual match based training in Pommerman: Technical report. arXiv preprint arXiv:1812.07297(2018)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201311"},{"volume-title":"The NeurIPS \u201918 Competition","author":"Resnick Cinjon","key":"e_1_3_2_1_25_1","unstructured":"Cinjon Resnick, Chao Gao, G\u00f6r\u00f6g M\u00e1rton, Takayuki Osogami, Liang Pang, and Toshihiro Takahashi. 2020. Pommerman & NeurIPS 2018. In The NeurIPS \u201918 Competition, Sergio Escalera and Ralf Herbrich (Eds.). Springer International Publishing, Cham, 11\u201336."},{"key":"e_1_3_2_1_26_1","volume-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics. 627\u2013635","author":"Ross St\u00e9phane","year":"2011","unstructured":"St\u00e9phane Ross, Geoffrey Gordon, and Drew Bagnell. 2011. A reduction of imitation learning and structured prediction to no-regret online learning. In Proceedings of the fourteenth international conference on artificial intelligence and statistics. 627\u2013635."},{"key":"e_1_3_2_1_27_1","unstructured":"John Schulman Filip Wolski Prafulla Dhariwal Alec Radford and Oleg Klimov. 2017. Proximal Policy Optimization Algorithms. arxiv:1707.06347\u00a0[cs.LG]"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Shihong Song Jiayi Weng Hang Su Dong Yan Haosheng Zou and Jun Zhu. 2019. Playing FPS Games With Environment-Aware Hierarchical Reinforcement Learning.. In IJCAI. 3475\u20133482.","DOI":"10.24963\/ijcai.2019\/482"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"crossref","unstructured":"Jiakai Zhang and Kyunghyun Cho. 2016. Query-efficient imitation learning for end-to-end autonomous driving. arXiv preprint arXiv:1605.06450(2016).","DOI":"10.1609\/aaai.v31i1.10857"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3235765.3235812"}],"event":{"name":"CODS-COMAD 2022: 5th Joint International Conference on Data Science & Management of Data (9th ACM IKDD CODS and 27th COMAD)","sponsor":["SIGGRAPH ACM Special Interest Group on Computer Graphics and Interactive Techniques"],"location":"Bangalore India","acronym":"CODS-COMAD 2022"},"container-title":["Proceedings of the 5th Joint International Conference on Data Science &amp; Management of Data (9th ACM IKDD CODS and 27th COMAD)"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3493700.3493709","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3493700.3493709","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T20:11:51Z","timestamp":1750191111000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3493700.3493709"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,1,8]]},"references-count":30,"alternative-id":["10.1145\/3493700.3493709","10.1145\/3493700"],"URL":"https:\/\/doi.org\/10.1145\/3493700.3493709","relation":{},"subject":[],"published":{"date-parts":[[2022,1,8]]},"assertion":[{"value":"2022-01-08","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}