{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T20:46:08Z","timestamp":1777322768318,"version":"3.51.4"},"reference-count":54,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"DOI":"10.13039\/501100004329","name":"The Slovenian Research and Innovation Agency","doi-asserted-by":"publisher","award":["P2-0270"],"award-info":[{"award-number":["P2-0270"]}],"id":[{"id":"10.13039\/501100004329","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004329","name":"The Slovenian Research and Innovation Agency","doi-asserted-by":"publisher","award":["L2-60153"],"award-info":[{"award-number":["L2-60153"]}],"id":[{"id":"10.13039\/501100004329","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Robot."],"published-print":{"date-parts":[[2026]]},"DOI":"10.1109\/tro.2026.3677944","type":"journal-article","created":{"date-parts":[[2026,3,26]],"date-time":"2026-03-26T19:53:19Z","timestamp":1774554799000},"page":"1729-1748","source":"Crossref","is-referenced-by-count":0,"title":["Interagent Beliefs for Learning to Communicate in Large-Scale Multirobot Visual Object Search"],"prefix":"10.1109","volume":"42","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3746-9980","authenticated-orcid":false,"given":"Jernej","family":"Puc","sequence":"first","affiliation":[{"name":"Faculty of Mechanical Engineering, University of Ljubljana, Ljubljana, Slovenia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4054-7871","authenticated-orcid":false,"given":"Ga\u0161per","family":"\u0160kulj","sequence":"additional","affiliation":[{"name":"Faculty of Mechanical Engineering, University of Ljubljana, Ljubljana, Slovenia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8998-1364","authenticated-orcid":false,"given":"Jan","family":"Pleterski","sequence":"additional","affiliation":[{"name":"Faculty of Mechanical Engineering, University of Ljubljana, Ljubljana, Slovenia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9932-9521","authenticated-orcid":false,"given":"Primo\u017e","family":"Podr\u017eaj","sequence":"additional","affiliation":[{"name":"Faculty of Mechanical Engineering, University of Ljubljana, Ljubljana, Slovenia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1720-9145","authenticated-orcid":false,"given":"Rok","family":"Vrabi\u010d","sequence":"additional","affiliation":[{"name":"Faculty of Mechanical Engineering, University of Ljubljana, Ljubljana, Slovenia"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2024.3410318"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1146\/annurev-control-030323-022510"},{"key":"ref3","first-page":"1538","article-title":"TarMAC: Targeted multi-agent communication","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Das","year":"2019"},{"key":"ref4","first-page":"7265","article-title":"Learning attentional communication for multi-agent cooperation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Jiang","year":"2018"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01581"},{"key":"ref6","first-page":"2698","article-title":"VAIN: Attentional multi-agent predictive modeling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hoshen","year":"2017"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2023.3285300"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2017.2776305"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2023.3263459"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.024"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2022.3211873"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2022.3164044"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3062557"},{"key":"ref14","article-title":"On evaluation of embodied navigation agents","author":"Anderson","year":"2018"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TASE.2024.3378010"},{"key":"ref16","first-page":"3442","article-title":"Visual semantic navigation using scene priors","volume-title":"Proc. 7th Int. Conf. Learn. Representations","author":"Yang","year":"2019"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3068906"},{"key":"ref18","first-page":"32340","article-title":"ZSON: Zero-shot object-goal navigation using multimodal goal embeddings","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Majumdar","year":"2022"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.26599\/TST.2021.9010012"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00945"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref23","article-title":"Collaborative visual navigation","author":"Wang","year":"2021"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3145964"},{"key":"ref25","first-page":"1059","article-title":"Learning to navigate in complex environments","volume-title":"Proc. 5th Int. Conf. Learn. Representations","author":"Mirowski","year":"2017"},{"key":"ref26","first-page":"7374","article-title":"Evaluating long-term memory in 3D mazes","volume-title":"Proc. 11th Int. Conf. Learn. Representations","author":"Pasukonis","year":"2023"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2024.3412638"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2022.3224667"},{"key":"ref29","first-page":"2145","article-title":"Learning to communicate with deep multi-agent reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Foerster","year":"2016"},{"key":"ref30","first-page":"2252","article-title":"Learning multiagent communication with backpropagation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Sukhbaatar","year":"2016"},{"key":"ref31","first-page":"3911","article-title":"Learning when to communicate at scale in multiagent cooperative and competitive tasks","volume-title":"Proc. 7th Int. Conf. Learn. Representations","author":"Singh","year":"2019"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.65109\/edcb3795"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6221"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11492"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6217"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3036597"},{"key":"ref37","article-title":"Isaac gym: High performance GPU based physics simulation for robot learning","volume-title":"Proc. 35th Conf. Neural Inf. Process. Syst., Track Datasets Benchmarks","author":"Makoviychuk","year":"2021"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.2994002"},{"key":"ref39","volume-title":"Multi-Agent Reinforcement Learning: Foundations and Modern Approaches","author":"Albrecht","year":"2024"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-28929-8"},{"key":"ref41","article-title":"Hyperbolic discounting and learning over multiple horizons","author":"Fedus","year":"2019"},{"key":"ref42","first-page":"24611","article-title":"The surprising effectiveness of PPO in cooperative multi-agent games","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Yu","year":"2022"},{"key":"ref43","first-page":"13458","article-title":"Settling the variance of multi-agent policy gradients","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kuba","year":"2021"},{"key":"ref44","article-title":"QPLEX: Duplex dueling multi-agent Q-learning","volume-title":"Proc. 9th Int. Conf. Learn. Representations","author":"Wang","year":"2021"},{"key":"ref45","article-title":"High-dimensional continuous control using generalized advantage estimation","volume-title":"Proc. 4th Int. Conf. Learn. Representations","author":"Schulman","year":"2016"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN54540.2023.10191652"},{"key":"ref47","first-page":"2020","article-title":"Phasic policy gradient","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Cobbe","year":"2021"},{"key":"ref48","first-page":"2961","article-title":"Actor-attention-critic for multi-agent reinforcement learning","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Iqbal","year":"2019"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17276"},{"key":"ref50","first-page":"1098","article-title":"What matters for on-policy deep actor-critic methods? A large-scale study","volume-title":"Proc. 9th Int. Conf. Learn. Representations","author":"Andrychowicz","year":"2021"},{"key":"ref51","article-title":"Semi-on-policy training for sample efficient multi-agent policy gradients","volume-title":"Proc. 20th Int. Conf. Auton. Agents MultiAgent Syst., Adaptive Learn. Agents Workshop","author":"Vasilev","year":"2021"},{"key":"ref52","article-title":"Recurrent experience replay in distributed reinforcement learning","volume-title":"Proc. 7th Int. Conf. Learn. Representations","author":"Kapturowski","year":"2019"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1990.2.4.490"},{"key":"ref54","first-page":"9367","article-title":"Descending through a crowded valleyBenchmarking deep learning optimizers","volume-title":"Proc. 38th Int. Conf. Mach. Learn.","author":"Schmidt","year":"2021"}],"container-title":["IEEE Transactions on Robotics"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/8860\/11297026\/11456614.pdf?arnumber=11456614","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T19:52:47Z","timestamp":1777319567000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11456614\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"references-count":54,"URL":"https:\/\/doi.org\/10.1109\/tro.2026.3677944","relation":{},"ISSN":["1552-3098","1941-0468"],"issn-type":[{"value":"1552-3098","type":"print"},{"value":"1941-0468","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]}}}