{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T14:57:40Z","timestamp":1776783460438,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":27,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,26]],"date-time":"2024-06-26T00:00:00Z","timestamp":1719360000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,26]]},"DOI":"10.1145\/3652037.3663948","type":"proceedings-article","created":{"date-parts":[[2024,6,26]],"date-time":"2024-06-26T12:30:38Z","timestamp":1719405038000},"page":"466-473","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Exploring Performance in Complex Search-and-Retrieve Tasks: A Comparative Analysis of PPO and GAIL Robots"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-1277-294X","authenticated-orcid":false,"given":"Shashank","family":"Kapoor","sequence":"first","affiliation":[{"name":"Applied Cognitive Science Laboratory, Indian Institute of Technology Mandi, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2601-2125","authenticated-orcid":false,"given":"Shashank","family":"Uttrani","sequence":"additional","affiliation":[{"name":"Applied Cognitive Science Laboratory, Indian Institute of Technology Mandi, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3433-1716","authenticated-orcid":false,"given":"Gunjan","family":"Paul","sequence":"additional","affiliation":[{"name":"Applied Cognitive Science Laboratory, Indian Institute of Technology Mandi, India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4218-4907","authenticated-orcid":false,"given":"Varun","family":"Dutt","sequence":"additional","affiliation":[{"name":"Applied Cognitive Science Laboratory, Indian Institute of Technology Mandi, India"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,6,26]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"A brief survey of deep reinforcement learning. arXiv preprint arXiv:1708.05866","author":"Arulkumaran Kai","year":"2017","unstructured":"Kai Arulkumaran, Marc\u00a0Peter Deisenroth, Miles Brundage, and Anil\u00a0Anthony Bharath. 2017. A brief survey of deep reinforcement learning. arXiv preprint arXiv:1708.05866 (2017)."},{"key":"e_1_3_2_1_2_1","volume-title":"Dota 2 with large scale deep reinforcement learning. arXiv preprint arXiv:1912.06680","author":"Berner Christopher","year":"2019","unstructured":"Christopher Berner, Greg Brockman, Brooke Chan, Vicki Cheung, Przemys\u0142aw D\u0119biak, Christy Dennison, David Farhi, Quirin Fischer, Shariq Hashme, Chris Hesse, 2019. Dota 2 with large scale deep reinforcement learning. arXiv preprint arXiv:1912.06680 (2019)."},{"key":"e_1_3_2_1_3_1","volume-title":"International conference on machine learning. PMLR","author":"Haarnoja Tuomas","year":"2018","unstructured":"Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, and Sergey Levine. 2018. Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor. In International conference on machine learning. PMLR, 1861\u20131870."},{"key":"e_1_3_2_1_4_1","volume-title":"Generative adversarial imitation learning. Advances in neural information processing systems 29","author":"Ho Jonathan","year":"2016","unstructured":"Jonathan Ho and Stefano Ermon. 2016. Generative adversarial imitation learning. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.3390\/systems11050217"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.3390\/app12031379"},{"key":"e_1_3_2_1_7_1","volume-title":"Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971","author":"Lillicrap P","year":"2015","unstructured":"Timothy\u00a0P Lillicrap, Jonathan\u00a0J Hunt, Alexander Pritzel, Nicolas Heess, Tom Erez, Yuval Tassa, David Silver, and Daan Wierstra. 2015. Continuous control with deep reinforcement learning. arXiv preprint arXiv:1509.02971 (2015)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.5555\/1865756.1865798"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.3390\/app12147006"},{"key":"e_1_3_2_1_10_1","volume-title":"Unity Ml-agents Toolkit. Retrieved","year":"2023","unstructured":"Ml-agents 2021. Unity Ml-agents Toolkit. Retrieved Dec 2, 2023 from https:\/\/github.com\/Unity-Technologies\/ml-agents\/blob\/main\/docs\/LearningEnvironment-Examples.md"},{"key":"e_1_3_2_1_11_1","volume-title":"International conference on machine learning. PMLR","author":"Mnih Volodymyr","year":"2016","unstructured":"Volodymyr Mnih, Adria\u00a0Puigdomenech Badia, Mehdi Mirza, Alex Graves, Timothy Lillicrap, Tim Harley, David Silver, and Koray Kavukcuoglu. 2016. Asynchronous methods for deep reinforcement learning. In International conference on machine learning. PMLR, 1928\u20131937."},{"key":"e_1_3_2_1_12_1","volume-title":"Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602","author":"Mnih Volodymyr","year":"2013","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Alex Graves, Ioannis Antonoglou, Daan Wierstra, and Martin Riedmiller. 2013. Playing atari with deep reinforcement learning. arXiv preprint arXiv:1312.5602 (2013)."},{"key":"e_1_3_2_1_13_1","volume-title":"Human-level control through deep reinforcement learning. nature 518, 7540","author":"Mnih Volodymyr","year":"2015","unstructured":"Volodymyr Mnih, Koray Kavukcuoglu, David Silver, Andrei\u00a0A Rusu, Joel Veness, Marc\u00a0G Bellemare, Alex Graves, Martin Riedmiller, Andreas\u00a0K Fidjeland, Georg Ostrovski, 2015. Human-level control through deep reinforcement learning. nature 518, 7540 (2015), 529\u2013533."},{"key":"e_1_3_2_1_14_1","unstructured":"Andrew\u00a0Y Ng Stuart Russell 2000. Algorithms for inverse reinforcement learning.. In Icml Vol.\u00a01. 2."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201311"},{"key":"e_1_3_2_1_16_1","volume-title":"Variational discriminator bottleneck: Improving imitation learning, inverse rl, and gans by constraining information flow. arXiv preprint arXiv:1810.00821","author":"Peng Xue\u00a0Bin","year":"2018","unstructured":"Xue\u00a0Bin Peng, Angjoo Kanazawa, Sam Toyer, Pieter Abbeel, and Sergey Levine. 2018. Variational discriminator bottleneck: Improving imitation learning, inverse rl, and gans by constraining information flow. arXiv preprint arXiv:1810.00821 (2018)."},{"key":"e_1_3_2_1_17_1","volume-title":"Experimental evidence of effective human\u2013AI collaboration in medical decision-making. Scientific reports 12, 1","author":"Reverberi Carlo","year":"2022","unstructured":"Carlo Reverberi, Tommaso Rigon, Aldo Solari, Cesare Hassan, Paolo Cherubini, and Andrea Cherubini. 2022. Experimental evidence of effective human\u2013AI collaboration in medical decision-making. Scientific reports 12, 1 (2022), 14952."},{"key":"e_1_3_2_1_18_1","volume-title":"Mastering atari, go, chess and shogi by planning with a learned model. Nature 588, 7839","author":"Schrittwieser Julian","year":"2020","unstructured":"Julian Schrittwieser, Ioannis Antonoglou, Thomas Hubert, Karen Simonyan, Laurent Sifre, Simon Schmitt, Arthur Guez, Edward Lockhart, Demis Hassabis, Thore Graepel, 2020. Mastering atari, go, chess and shogi by planning with a learned model. Nature 588, 7839 (2020), 604\u2013609."},{"key":"e_1_3_2_1_19_1","volume-title":"Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347","author":"Schulman John","year":"2017","unstructured":"John Schulman, Filip Wolski, Prafulla Dhariwal, Alec Radford, and Oleg Klimov. 2017. Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347 (2017)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2980380"},{"key":"e_1_3_2_1_21_1","volume-title":"Julian Schrittwieser, Ioannis Antonoglou","author":"Silver David","year":"2016","unstructured":"David Silver, Aja Huang, Chris\u00a0J Maddison, Arthur Guez, Laurent Sifre, George Van Den\u00a0Driessche, Julian Schrittwieser, Ioannis Antonoglou, Veda Panneershelvam, Marc Lanctot, 2016. Mastering the game of Go with deep neural networks and tree search. nature 529, 7587 (2016), 484\u2013489."},{"key":"e_1_3_2_1_22_1","volume-title":"Mastering the game of go without human knowledge. nature 550, 7676","author":"Silver David","year":"2017","unstructured":"David Silver, Julian Schrittwieser, Karen Simonyan, Ioannis Antonoglou, Aja Huang, Arthur Guez, Thomas Hubert, Lucas Baker, Matthew Lai, Adrian Bolton, 2017. Mastering the game of go without human knowledge. nature 550, 7676 (2017), 354\u2013359."},{"key":"e_1_3_2_1_23_1","volume-title":"Training your agents 7 times faster with ML-Agents. Unity Blog","author":"Teng Ervin","year":"2019","unstructured":"Ervin Teng. 2019. Training your agents 7 times faster with ML-Agents. Unity Blog (2019)."},{"key":"e_1_3_2_1_24_1","volume-title":"International Advanced Computing Conference. Springer, 348\u2013361","author":"Vohra Ishita","year":"2021","unstructured":"Ishita Vohra, Shashank Uttrani, Akash\u00a0K Rao, and Varun Dutt. 2021. Evaluating the efficacy of different neural network deep reinforcement algorithms in complex search-and-retrieve virtual simulations. In International Advanced Computing Conference. Springer, 348\u2013361."},{"key":"e_1_3_2_1_25_1","volume-title":"Supporting artificial social intelligence with theory of mind. Frontiers in artificial intelligence 5","author":"Williams Jessica","year":"2022","unstructured":"Jessica Williams, Stephen\u00a0M Fiore, and Florian Jentsch. 2022. Supporting artificial social intelligence with theory of mind. Frontiers in artificial intelligence 5 (2022), 750763."},{"key":"e_1_3_2_1_26_1","volume-title":"Learn what not to learn: Action elimination with deep reinforcement learning. Advances in neural information processing systems 31","author":"Zahavy Tom","year":"2018","unstructured":"Tom Zahavy, Matan Haroush, Nadav Merlis, Daniel\u00a0J Mankowitz, and Shie Mannor. 2018. Learn what not to learn: Action elimination with deep reinforcement learning. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/SSCI47803.2020.9308468"}],"event":{"name":"PETRA '24: The PErvasive Technologies Related to Assistive Environments Conference","location":"Crete Greece","acronym":"PETRA '24"},"container-title":["Proceedings of the 17th International Conference on PErvasive Technologies Related to Assistive Environments"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652037.3663948","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3652037.3663948","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T15:43:30Z","timestamp":1755877410000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652037.3663948"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,26]]},"references-count":27,"alternative-id":["10.1145\/3652037.3663948","10.1145\/3652037"],"URL":"https:\/\/doi.org\/10.1145\/3652037.3663948","relation":{},"subject":[],"published":{"date-parts":[[2024,6,26]]},"assertion":[{"value":"2024-06-26","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}