{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T11:18:37Z","timestamp":1773141517091,"version":"3.50.1"},"publisher-location":"Cham","reference-count":45,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031198410","type":"print"},{"value":"9783031198427","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19842-7_28","type":"book-chapter","created":{"date-parts":[[2022,10,22]],"date-time":"2022-10-22T12:12:59Z","timestamp":1666440779000},"page":"480-496","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":29,"title":["TIDEE: Tidying Up Novel Rooms Using Visuo-Semantic Commonsense Priors"],"prefix":"10.1007","author":[{"given":"Gabriel","family":"Sarch","sequence":"first","affiliation":[]},{"given":"Zhaoyuan","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Adam W.","family":"Harley","sequence":"additional","affiliation":[]},{"given":"Paul","family":"Schydlo","sequence":"additional","affiliation":[]},{"given":"Michael J.","family":"Tarr","sequence":"additional","affiliation":[]},{"given":"Saurabh","family":"Gupta","sequence":"additional","affiliation":[]},{"given":"Katerina","family":"Fragkiadaki","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,10,23]]},"reference":[{"key":"28_CR1","unstructured":"Anderson, P., et al.: On evaluation of embodied navigation agents. arXiv preprint arXiv:1807.06757 (2018)"},{"key":"28_CR2","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Vision-and-language navigation: interpreting visually-grounded navigation instructions in real environments. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3674\u20133683 (2018)","DOI":"10.1109\/CVPR.2018.00387"},{"key":"28_CR3","unstructured":"Batra, D., et al.: Rearrangement: a challenge for embodied AI. arXiv abs\/2011.01975 (2020)"},{"key":"28_CR4","unstructured":"Batra, D., et al.: Objectnav revisited: on evaluation of embodied agents navigating to objects. arXiv preprint arXiv:2006.13171 (2020)"},{"key":"28_CR5","unstructured":"Blukis, V., Paxton, C., Fox, D., Garg, A., Artzi, Y.: A persistent spatial semantic representation for high-level natural language instruction execution. In: Conference on Robot Learning, pp. 706\u2013717. PMLR (2022)"},{"key":"28_CR6","first-page":"4283","volume":"33","author":"M Chang","year":"2020","unstructured":"Chang, M., Gupta, A., Gupta, S.: Semantic visual navigation by watching Youtube videos. Adv. Neural. Inf. Process. Syst. 33, 4283\u20134294 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"28_CR7","unstructured":"Chaplot, D.S., Gandhi, D., Gupta, S., Gupta, A., Salakhutdinov, R.: Learning to explore using active neural slam. In: International Conference on Learning Representations (ICLR) (2020)"},{"key":"28_CR8","unstructured":"Chaplot, D.S., Gandhi, D.P., Gupta, A., Salakhutdinov, R.R.: Object goal navigation using goal-oriented semantic exploration. In: Advances in Neural Information Processing Systems, vol. 33 (2020)"},{"key":"28_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1007\/978-3-030-58539-6_19","volume-title":"Computer Vision \u2013 ECCV 2020","author":"DS Chaplot","year":"2020","unstructured":"Chaplot, D.S., Jiang, H., Gupta, S., Gupta, A.: Semantic curiosity for active visual learning. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12351, pp. 309\u2013326. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58539-6_19"},{"key":"28_CR10","unstructured":"Chen, T., Gupta, S., Gupta, A.: Learning exploration policies for navigation. In: International Conference on Learning Representations (2019). https:\/\/openreview.net\/pdf?id=SyMWn05F7"},{"key":"28_CR11","doi-asserted-by":"crossref","unstructured":"Chen, X., Li, L.J., Fei-Fei, L., Gupta, A.: Iterative visual reasoning beyond convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7239\u20137248 (2018)","DOI":"10.1109\/CVPR.2018.00756"},{"key":"28_CR12","doi-asserted-by":"crossref","unstructured":"Das, A., Datta, S., Gkioxari, G., Lee, S., Parikh, D., Batra, D.: Embodied question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1\u201310 (2018)","DOI":"10.1109\/CVPR.2018.00008"},{"key":"28_CR13","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), Minneapolis, Minnesota, pp. 4171\u20134186. Association for Computational Linguistics, June 2019. https:\/\/doi.org\/10.18653\/v1\/N19-1423. https:\/\/aclanthology.org\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"28_CR14","unstructured":"Fan, L., et al.: Surreal: open-source reinforcement learning framework and robot manipulation benchmark. In: Conference on Robot Learning, pp. 767\u2013782. PMLR (2018)"},{"key":"28_CR15","unstructured":"Fang, Z., Jain, A., Sarch, G., Harley, A.W., Fragkiadaki, K.: Move to see better: self-improving embodied object detection. In: The British Machine Vision Conference (2021)"},{"key":"28_CR16","unstructured":"Gan, C., et al.: Threedworld: a platform for interactive multi-modal physical simulation. arXiv preprint arXiv:2007.04954 (2020)"},{"key":"28_CR17","doi-asserted-by":"publisher","unstructured":"Gan, C., et al.: The threedworld transport challenge: a visually guided task-and-motion planning benchmark towards physically realistic embodied AI. In: 2022 International Conference on Robotics and Automation (ICRA), pp. 8847\u20138854 (2022). https:\/\/doi.org\/10.1109\/ICRA46639.2022.9812329","DOI":"10.1109\/ICRA46639.2022.9812329"},{"key":"28_CR18","doi-asserted-by":"crossref","unstructured":"Gordon, D., Kembhavi, A., Rastegari, M., Redmon, J., Fox, D., Farhadi, A.: IQA: visual question answering in interactive environments. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4089\u20134098 (2018)","DOI":"10.1109\/CVPR.2018.00430"},{"key":"28_CR19","doi-asserted-by":"crossref","unstructured":"Gupta, S., Davidson, J., Levine, S., Sukthankar, R., Malik, J.: Cognitive mapping and planning for visual navigation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.769"},{"key":"28_CR20","unstructured":"Haber, N., Mrowca, D., Fei-Fei, L., Yamins, D.L.: Learning to play with intrinsically-motivated self-aware agents. In: 32nd Conference on Neural Information Processing Systems (2018)"},{"key":"28_CR21","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1016\/0010-0277(94)00643-Y","volume":"55","author":"WG Hayward","year":"1995","unstructured":"Hayward, W.G., Tarr, M.J.: Spatial language and spatial representation. Cognition 55, 39\u201384 (1995)","journal-title":"Cognition"},{"key":"28_CR22","doi-asserted-by":"crossref","unstructured":"Johnson, J., et al.: Image retrieval using scene graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3668\u20133678 (2015)","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"28_CR23","unstructured":"Kolve, E., et al.: AI2-THOR: An Interactive 3D Environment for Visual AI. arXiv (2017)"},{"key":"28_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"28_CR25","unstructured":"Malisiewicz, T., Efros, A.A.: Beyond categories: the visual memex model for reasoning about object relationships. In: NIPS, December 2009"},{"key":"28_CR26","doi-asserted-by":"publisher","unstructured":"Marino, K., Salakhutdinov, R., Gupta, A.: The more you know: using knowledge graphs for image classification. In: 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 20\u201328 (2017). https:\/\/doi.org\/10.1109\/CVPR.2017.10","DOI":"10.1109\/CVPR.2017.10"},{"key":"28_CR27","unstructured":"Min, S.Y., Chaplot, D.S., Ravikumar, P., Bisk, Y., Salakhutdinov, R.: Film: following instructions in language with modular methods (2021)"},{"key":"28_CR28","unstructured":"Murali, A., et al.: Pyrobot: an open-source robotics framework for research and benchmarking. arXiv preprint arXiv:1906.08236 (2019)"},{"key":"28_CR29","unstructured":"Padmakumar, A., et al.: Teach: task-driven embodied agents that chat (2021)"},{"key":"28_CR30","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"400","DOI":"10.1007\/978-3-030-58558-7_24","volume-title":"Computer Vision \u2013 ECCV 2020","author":"SK Ramakrishnan","year":"2020","unstructured":"Ramakrishnan, S.K., Al-Halah, Z., Grauman, K.: Occupancy anticipation for efficient exploration and navigation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12350, pp. 400\u2013418. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58558-7_24"},{"key":"28_CR31","doi-asserted-by":"crossref","unstructured":"Savva, M., et al.: Habitat: a platform for embodied AI research. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9339\u20139347 (2019)","DOI":"10.1109\/ICCV.2019.00943"},{"key":"28_CR32","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"593","DOI":"10.1007\/978-3-319-93417-4_38","volume-title":"The Semantic Web","author":"M Schlichtkrull","year":"2018","unstructured":"Schlichtkrull, M., Kipf, T.N., Bloem, P., van\u00a0den Berg, R., Titov, I., Welling, M.: Modeling relational data with graph convolutional networks. In: Gangemi, A., et al. (eds.) ESWC 2018. LNCS, vol. 10843, pp. 593\u2013607. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-319-93417-4_38"},{"key":"28_CR33","doi-asserted-by":"crossref","unstructured":"Shen, B., et al.: iGibson 1.0: a simulation environment for interactive tasks in large realistic scenes. In: 2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems. IEEE (2021)","DOI":"10.1109\/IROS51168.2021.9636667"},{"key":"28_CR34","doi-asserted-by":"crossref","unstructured":"Shridhar, M., et al.: Alfred: a benchmark for interpreting grounded instructions for everyday tasks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10740\u201310749 (2020)","DOI":"10.1109\/CVPR42600.2020.01075"},{"key":"28_CR35","unstructured":"Suglia, A., Gao, Q., Thomason, J., Thattai, G., Sukhatme, G.S.: Embodied bert: a transformer model for embodied, language-guided visual task completion. In: EMNLP 2021 Workshop on Novel Ideas in Learning-to-Learn through Interaction (2021). https:\/\/www.amazon.science\/publications\/embodied-bert-a-transformer-model-for-embodied-language-guided-visual-task-completion"},{"key":"28_CR36","doi-asserted-by":"crossref","unstructured":"Wang, X., Ye, Y., Gupta, A.: Zero-shot recognition via semantic embeddings and knowledge graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6857\u20136866 (2018)","DOI":"10.1109\/CVPR.2018.00717"},{"key":"28_CR37","doi-asserted-by":"crossref","unstructured":"Weihs, L., Deitke, M., Kembhavi, A., Mottaghi, R.: Visual room rearrangement. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), June 2021","DOI":"10.1109\/CVPR46437.2021.00586"},{"key":"28_CR38","unstructured":"Weihs, L., et al.: Learning generalizable visual representations via interactive gameplay. In: International Conference on Learning Representations (2021)"},{"key":"28_CR39","unstructured":"Wijmans, E., et al.: DD-PPO: learning near-perfect pointgoal navigators from 2.5 billion frames. In: ICLR (2020)"},{"key":"28_CR40","doi-asserted-by":"crossref","unstructured":"Wortsman, M., Ehsani, K., Rastegari, M., Farhadi, A., Mottaghi, R.: Learning to learn how to learn: self-adaptive visual navigation using meta-learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6750\u20136759 (2019)","DOI":"10.1109\/CVPR.2019.00691"},{"key":"28_CR41","unstructured":"Yamauchi, B.: A frontier-based approach for autonomous exploration. In: Proceedings 1997 IEEE International Symposium on Computational Intelligence in Robotics and Automation CIRA 1997. Towards New Computational Principles for Robotics and Automation, pp. 146\u2013151. IEEE (1997)"},{"key":"28_CR42","unstructured":"Yang, W., Wang, X., Farhadi, A., Gupta, A., Mottaghi, R.: Visual semantic navigation using scene priors. In: Proceedings of (ICLR) International Conference on Learning Representations, May 2019"},{"key":"28_CR43","unstructured":"Yu, T., et al.: Meta-world: a benchmark and evaluation for multi-task and meta reinforcement learning. In: Conference on Robot Learning, pp. 1094\u20131100. PMLR (2020)"},{"key":"28_CR44","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"606","DOI":"10.1007\/978-3-030-58592-1_36","volume-title":"Computer Vision \u2013 ECCV 2020","author":"A Zareian","year":"2020","unstructured":"Zareian, A., Karaman, S., Chang, S.-F.: Bridging knowledge graphs to generate scene graphs. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12368, pp. 606\u2013623. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58592-1_36"},{"key":"28_CR45","unstructured":"Zhu, X., Su, W., Lu, L., Li, B., Wang, X., Dai, J.: Deformable DETR: deformable transformers for end-to-end object detection. In: International Conference on Learning Representations (2021). https:\/\/openreview.net\/forum?id=gZ9hCDWe6ke"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19842-7_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T19:09:56Z","timestamp":1710356996000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19842-7_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031198410","9783031198427"],"references-count":45,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19842-7_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"23 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}