{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,7]],"date-time":"2026-03-07T19:14:30Z","timestamp":1772910870360,"version":"3.50.1"},"reference-count":45,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Robot. Autom. Lett."],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1109\/lra.2023.3234766","type":"journal-article","created":{"date-parts":[[2023,1,6]],"date-time":"2023-01-06T18:41:36Z","timestamp":1673030496000},"page":"928-935","source":"Crossref","is-referenced-by-count":29,"title":["Catch Me if You Hear Me: Audio-Visual Navigation in Complex Unmapped Environments With Moving Sounds"],"prefix":"10.1109","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2786-8371","authenticated-orcid":false,"given":"Abdelrahman","family":"Younes","sequence":"first","affiliation":[{"name":"Department of Computer Science, University of Freiburg, Freiburg, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1421-773X","authenticated-orcid":false,"given":"Daniel","family":"Honerkamp","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Freiburg, Freiburg, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1163-4992","authenticated-orcid":false,"given":"Tim","family":"Welschehold","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Freiburg, Freiburg, Germany"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4710-3114","authenticated-orcid":false,"given":"Abhinav","family":"Valada","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Freiburg, Freiburg, Germany"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.769"},{"key":"ref2","article-title":"DD-PPO: Learning near-perfect pointgoal navigators from 2.5 billion frames","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Wijmans","year":"2019"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1177\/0278364917727062"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1093\/brain\/awn250"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58539-6_2"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197008"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01526"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00034"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/icassp49357.2023.10095818"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01144"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00122"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IROS40897.2019.8967847"},{"key":"ref13","article-title":"Learning to set waypoints for audio-visual navigation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chen","year":"2020"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref15","article-title":"The replica dataset: A digital replica of indoor spaces","author":"Straub","year":"2019"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/3DV.2017.00081"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2019.XV.010"},{"key":"ref18","article-title":"Visual semantic navigation using scene priors","author":"Yang","year":"2018"},{"key":"ref19","article-title":"VTNet: Visual transformer network for object goal navigation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Du","year":"2020"},{"key":"ref20","article-title":"Speaker-follower models for vision-and-language navigation","volume-title":"Proc. Adv. Neural Inform. Process. Syst","author":"Fried","year":"2018"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01112"},{"key":"ref22","first-page":"12782","article-title":"Towards distraction-robust active visual tracking","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhong","year":"2021"},{"key":"ref23","article-title":"MOPT: Multi-object panoptic tracking","author":"Hurtado","year":"2020"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.70"},{"key":"ref25","article-title":"Learning to explore using active neural slam","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chaplot","year":"2019"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1017\/S0263574706002992"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2012.08.013"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989037"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2018.8593825"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2021.650325"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1002\/047134608x.w8266"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.robot.2017.07.011"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2004.1307286"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICDSP.2015.7252071"},{"key":"ref35","article-title":"Audio vision: Using audio-visual synchrony to locate sounds","volume-title":"Proc. Adv. Neural Inform. Process. Syst.","volume":"12","author":"Hershey","year":"2000"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.274"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_16"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_27"},{"key":"ref39","article-title":"Sound adversarial audio-visual navigation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Yu","year":"2022"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1186\/s40537-019-0197-0"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2017.XIII.034"},{"key":"ref42","first-page":"334","article-title":"Transferring end-to-end visuomotor control from simulation to real world for a multi-stage task","volume-title":"Proc. Conf. Robot Learn.","author":"James","year":"2017"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref44","article-title":"On evaluation of embodied navigation agents","author":"Anderson","year":"2018"},{"key":"ref45","article-title":"Proximal policy optimization algorithms","author":"Schulman","year":"2017"}],"container-title":["IEEE Robotics and Automation Letters"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7083369\/9990971\/10008039.pdf?arnumber=10008039","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,2,9]],"date-time":"2024-02-09T02:33:50Z","timestamp":1707446030000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10008039\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2]]},"references-count":45,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/lra.2023.3234766","relation":{},"ISSN":["2377-3766","2377-3774"],"issn-type":[{"value":"2377-3766","type":"electronic"},{"value":"2377-3774","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,2]]}}}