{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T16:17:54Z","timestamp":1781194674022,"version":"3.54.1"},"reference-count":72,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"name":"Institute of Information & Communications Technology Planning & Evaluation","award":["RS-2021-II212068-AIHub\/5%"],"award-info":[{"award-number":["RS-2021-II212068-AIHub\/5%"]}]},{"name":"Institute of Information & Communications Technology Planning & Evaluation","award":["RS-2021-II211343-GSAI\/5%"],"award-info":[{"award-number":["RS-2021-II211343-GSAI\/5%"]}]},{"name":"Institute of Information & Communications Technology Planning & Evaluation","award":["2022-0-00951-LBA\/20%"],"award-info":[{"award-number":["2022-0-00951-LBA\/20%"]}]},{"name":"Institute of Information & Communications Technology Planning & Evaluation","award":["2022-0-00953-PICA\/20%"],"award-info":[{"award-number":["2022-0-00953-PICA\/20%"]}]},{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"crossref","award":["RS-2024-00353991-SPARC\/20%"],"award-info":[{"award-number":["RS-2024-00353991-SPARC\/20%"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"crossref","award":["RS-2024-00358416\/10%"],"award-info":[{"award-number":["RS-2024-00358416\/10%"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"crossref","award":["RS-2023-00274280\/10%"],"award-info":[{"award-number":["RS-2023-00274280\/10%"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"crossref"}]},{"name":"Korea Planning & Evaluation Institute of Industrial Technology","award":["RS-2024-00423940\/10%"],"award-info":[{"award-number":["RS-2024-00423940\/10%"]}]},{"name":"Korean Government"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Access"],"published-print":{"date-parts":[[2024]]},"DOI":"10.1109\/access.2024.3413864","type":"journal-article","created":{"date-parts":[[2024,6,13]],"date-time":"2024-06-13T18:16:45Z","timestamp":1718302605000},"page":"83796-83809","source":"Crossref","is-referenced-by-count":4,"title":["Visual Hindsight Self-Imitation Learning for Interactive Navigation"],"prefix":"10.1109","volume":"12","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-2475-036X","authenticated-orcid":false,"given":"Kibeom","family":"Kim","sequence":"first","affiliation":[{"name":"Interdisciplinary Program in Neuroscience, Seoul National University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9408-0085","authenticated-orcid":false,"given":"Moonhoen","family":"Lee","sequence":"additional","affiliation":[{"name":"Interdisciplinary Program in Cognitive Science, Seoul National University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6681-1031","authenticated-orcid":false,"given":"Min","family":"Whoo Lee","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Seoul National University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3259-4353","authenticated-orcid":false,"given":"Kisung","family":"Shin","sequence":"additional","affiliation":[{"name":"Interdisciplinary Program in Artificial Intelligence, Seoul National University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9601-3863","authenticated-orcid":false,"given":"Minsu","family":"Lee","sequence":"additional","affiliation":[{"name":"AI Institute, Seoul National University (AIIS), Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9890-0389","authenticated-orcid":false,"given":"Byoung-Tak","family":"Zhang","sequence":"additional","affiliation":[{"name":"Interdisciplinary Program in Neuroscience, Seoul National University, Seoul, South Korea"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2022.3141105"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00943"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1080\/019697297126029"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00945"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00279"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/URAI.2019.8768782"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.3390\/s20123543"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/UR49135.2020.9144932"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00974"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00586"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00691"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01652"},{"key":"ref13","first-page":"2783","article-title":"Goal-aware cross-entropy for multi-target reinforcement learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Kim"},{"key":"ref14","article-title":"Building generalizable agents with a realistic and rich 3D environment","author":"Wu","year":"2018","journal-title":"arXiv:1801.02209"},{"key":"ref15","first-page":"1","article-title":"DD-PPO: Learning near-perfect pointgoal navigators from 2.5 billion frames","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Wijmans"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01509"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981090"},{"key":"ref18","article-title":"Truncated horizon policy search: Combining reinforcement learning imitation learning","author":"Sun","year":"2018","journal-title":"arXiv:1805.11240"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48891.2023.10161566"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3054912"},{"key":"ref21","first-page":"1","article-title":"Generative adversarial imitation learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"29","author":"Ho"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/S1364-6613(99)01327-3"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1049\/cit2.12043"},{"key":"ref24","article-title":"Imitation learning with human eye gaze via multi-objective prediction","author":"Thakur","year":"2023","journal-title":"arXiv:2102.13008"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9812316"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01716"},{"key":"ref27","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Ouyang"},{"key":"ref28","first-page":"1","article-title":"Hindsight experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Andrychowicz"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-89370-5_3"},{"key":"ref30","first-page":"1","article-title":"Curriculum-guided hindsight experience replay","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Fang"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.02.090"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.3390\/electronics9101742"},{"key":"ref33","first-page":"1","article-title":"Dher: Hindsight experience replay for dynamic goals","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Fang"},{"key":"ref34","first-page":"2863","article-title":"Hindsight expectation maximization for goal-conditioned reinforcement learning","volume-title":"Proc. Int. Conf. Artif. Intell. Statist.","author":"Tang"},{"key":"ref35","first-page":"1","article-title":"Addressing sample complexity in visual tasks using her and hallucinatory gans","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Sahni"},{"key":"ref36","article-title":"L-SA: Learning under-explored targets in multi-target reinforcement learning","author":"Kim","year":"2023","journal-title":"arXiv:2305.13741"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2012.6386109"},{"key":"ref38","article-title":"Minigrid miniworld: Modular customizable reinforcement learning environments for goal-oriented tasks","author":"Chevalier-Boisvert","year":"2023","journal-title":"arXiv:2306.13831"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989381"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2020.3036597"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/s11063-022-10796-8"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2020.2994002"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1038\/s41598-022-07264-7"},{"key":"ref44","first-page":"4247","article-title":"Object goal navigation using goal-oriented semantic exploration","volume-title":"Proc. Annu. Conf. Neural Inf. Process. Syst.","author":"Chaplot"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00252"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01111"},{"key":"ref47","article-title":"AI2-THOR: An interactive 3D environment for visual AI","author":"Kolve","year":"2017","journal-title":"arXiv:1712.05474"},{"key":"ref48","first-page":"3878","article-title":"Self-imitation learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Oh"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109845"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA48506.2021.9561411"},{"key":"ref51","first-page":"13964","article-title":"Self-imitation learning via generalized lower bound q-learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Tang"},{"key":"ref52","article-title":"Self-imitation learning from demonstrations","author":"Pshikhachev","year":"2022","journal-title":"arXiv:2203.10905"},{"key":"ref53","first-page":"1","article-title":"Learning to reach goals via iterated supervised learning","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Ghosh"},{"key":"ref54","first-page":"1928","article-title":"Asynchronous methods for deep reinforcement learning","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Mnih"},{"key":"ref55","first-page":"18661","article-title":"Supervised contrastive learning","volume-title":"Proc. NIPS","author":"Khosla"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/ICEIEC49280.2020.9152261"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1016\/j.patrec.2020.07.015"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00269"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/IROS47612.2022.9981838"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1312.6114"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1561\/2200000056"},{"issue":"11","key":"ref62","first-page":"1","article-title":"Visualizing data using t-SNE","volume":"9","author":"Van der Maaten","year":"2008","journal-title":"J. Mach. Learn. Res."},{"key":"ref63","article-title":"Llama 2: Open foundation and fine-tuned chat models","author":"Touvron","year":"2023","journal-title":"arXiv:2307.09288"},{"key":"ref64","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018","journal-title":"arXiv:1810.04805"},{"key":"ref65","article-title":"A survey of large language models","author":"Xin Zhao","year":"2023","journal-title":"arXiv:2303.18223"},{"key":"ref66","volume-title":"Video Generation Models as World Simulators","author":"Brooks","year":"2024"},{"key":"ref67","article-title":"Gemini: A family of highly capable multimodal models","author":"Team","year":"2023","journal-title":"arXiv:2312.11805"},{"key":"ref68","first-page":"1","article-title":"Visual instruction tuning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Liu"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11832"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19842-7_29"},{"key":"ref71","article-title":"Collaborative visual navigation","author":"Wang","year":"2021","journal-title":"arXiv:2107.01151"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.31449\/inf.v46i2.3603"}],"container-title":["IEEE Access"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6287639\/10380310\/10556540.pdf?arnumber=10556540","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,4]],"date-time":"2024-09-04T08:38:40Z","timestamp":1725439120000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10556540\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"references-count":72,"URL":"https:\/\/doi.org\/10.1109\/access.2024.3413864","relation":{},"ISSN":["2169-3536"],"issn-type":[{"value":"2169-3536","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]}}}