{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T06:49:59Z","timestamp":1742971799448,"version":"3.40.3"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783031056420"},{"type":"electronic","value":"9783031056437"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-05643-7_26","type":"book-chapter","created":{"date-parts":[[2022,5,14]],"date-time":"2022-05-14T08:03:05Z","timestamp":1652515385000},"page":"405-419","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Scene Change Captioning in\u00a0Real Scenarios"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2181-9475","authenticated-orcid":false,"given":"Yue","family":"Qiu","sequence":"first","affiliation":[]},{"given":"Kodai","family":"Nakashima","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0638-0855","authenticated-orcid":false,"given":"Yutaka","family":"Satoh","sequence":"additional","affiliation":[]},{"given":"Ryota","family":"Suzuki","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6609-8221","authenticated-orcid":false,"given":"Kenji","family":"Iwata","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8844-165X","authenticated-orcid":false,"given":"Hirokatsu","family":"Kataoka","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,5,15]]},"reference":[{"key":"26_CR1","doi-asserted-by":"crossref","unstructured":"Herbst, E., Henry, P., Ren, X., Fox, D.: Toward object discovery and modeling via 3-D scene comparison. In: 2011 IEEE International Conference on Robotics and Automation, pp. 2623\u20132629 (2011)","DOI":"10.1109\/ICRA.2011.5980542"},{"key":"26_CR2","doi-asserted-by":"crossref","unstructured":"Ambru\u015f, R., Bore, N., Folkesson, J., Jensfelt, P.: Meta-rooms: building and maintaining long term spatial models in a dynamic world. In: Proceedings of the 2014 IEEE\/RSJ International Conference on Intelligent Robots and Systems, pp. 1854\u20131861 (2014)","DOI":"10.1109\/IROS.2014.6942806"},{"key":"26_CR3","doi-asserted-by":"crossref","unstructured":"Langer, E., Ridder, B., Cashmore, M., Magazzeni, D., Zillich, M., Vincze, M.: On-the-fly detection of novel objects in indoor environments. In: 2017 IEEE International Conference on Robotics and Biomimetics, pp. 900\u2013907 (2017)","DOI":"10.1109\/ROBIO.2017.8324532"},{"key":"26_CR4","unstructured":"Xu, K., Ba, J., Kiros, R., et al.: Show, attend and tell: neural image caption generation with visual attention. In: International Conference on Machine Learning, pp. 2048\u20132057 (2015)"},{"key":"26_CR5","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6077\u20136086 (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"26_CR6","doi-asserted-by":"crossref","unstructured":"Biten, A.F., Gomez, L., Rusinol, M., Karatzas, D.: Good news, everyone! context driven entity-aware captioning for news images. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 12466\u201312475 (2019)","DOI":"10.1109\/CVPR.2019.01275"},{"key":"26_CR7","unstructured":"Yoshida, K., Minoguchi, M., Wani, K., Nakamura, A., Kataoka, H.: Neural joking machine: humorous image captioning. arXiv preprint arXiv:1805.11850"},{"key":"26_CR8","doi-asserted-by":"crossref","unstructured":"Jhamtani, H., Berg-Kirkpatrick, T.: Learning to describe differences between pairs of similar images. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing, pp. 4024\u20134034 (2018)","DOI":"10.18653\/v1\/D18-1436"},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Park, D.H., Darrell, T., Rohrbach, A.: Robust change captioning. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4624\u20134633 (2019)","DOI":"10.1109\/ICCV.2019.00472"},{"key":"26_CR10","doi-asserted-by":"crossref","unstructured":"Qiu, Y., Satoh, Y., Suzuki, R., Iwata, K., Kataoka, H.: Indoor scene change captioning based on multimodality data. Sensors 20(17), 4761 (2020)","DOI":"10.3390\/s20174761"},{"key":"26_CR11","doi-asserted-by":"crossref","unstructured":"Halber, M., Shi, Y., Xu, K., Funkhouser, T.: Rescan: inductive instance segmentation for indoor RGBD Scans. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2541\u20132550 (2019)","DOI":"10.1109\/ICCV.2019.00263"},{"key":"26_CR12","doi-asserted-by":"crossref","unstructured":"Fujita, A., Sakurada, K., Imaizumi, T., Ito, R., Hikosaka, S., Nakamura, R.: Damage detection from aerial images via convolutional neural networks. In: 2017 Fifteenth IAPR International Conference on Machine Vision Applications, pp. 5\u20138 (2017)","DOI":"10.23919\/MVA.2017.7986759"},{"key":"26_CR13","doi-asserted-by":"crossref","unstructured":"Sakurada, K., Okatani, T.: Change detection from a street image pair using CNN features and superpixel segmentation. In: British Machine Vision Conference, vol. 61, pp. 1\u201312 (2015)","DOI":"10.5244\/C.29.61"},{"key":"26_CR14","doi-asserted-by":"crossref","unstructured":"Khan, S.H., He, X., Porikli, F., Bennamoun, M.: Forest change detection in incomplete satellite images with deep neural networks. IEEE Trans. Geosci. Remote Sens. 55(9), 5407\u20135423 (2017)","DOI":"10.1109\/TGRS.2017.2707528"},{"key":"26_CR15","doi-asserted-by":"crossref","unstructured":"Saha, S., Bovolo, F., Bruzzone, L.: Unsupervised deep change vector analysis for multiple-change detection in VHR images. IEEE Trans. Geosci. Remote Sens. 57(6), 3677\u20133693 (2019)","DOI":"10.1109\/TGRS.2018.2886643"},{"issue":"7","key":"26_CR16","doi-asserted-by":"publisher","first-page":"1301","DOI":"10.1007\/s10514-018-9734-5","volume":"42","author":"PF Alcantarilla","year":"2018","unstructured":"Alcantarilla, P.F., Stent, S., Ros, G., Arroyo, R., Gherardi, R.: Street-view change detection with deconvolutional networks. Auton. Robot. 42(7), 1301\u20131322 (2018). https:\/\/doi.org\/10.1007\/s10514-018-9734-5","journal-title":"Auton. Robot."},{"key":"26_CR17","unstructured":"Daudt, R.C., Le Saux, B., Boulch, A.: Fully convolutional Siamese networks for change detection. In: 2018 25th IEEE International Conference on Image Processing, pp. 4063\u20134067 (2018)"},{"key":"26_CR18","doi-asserted-by":"crossref","unstructured":"Su, H., Maji, S., Kalogerakis, E., Learned-Miller, E.: Multi-view convolutional neural networks for 3D shape recognition. In: Proceedings of the IEEE International Conference on Computer Vision, Santiago, Chile 7\u201313, pp. 945\u2013953 (2015)","DOI":"10.1109\/ICCV.2015.114"},{"key":"26_CR19","doi-asserted-by":"crossref","unstructured":"Kanezaki, A., Matsushita, Y., Nishida, Y.: Rotationnet: joint object categorization and pose estimation using multiviews from unsupervised viewpoints. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5010\u20135019 (2018)","DOI":"10.1109\/CVPR.2018.00526"},{"key":"26_CR20","doi-asserted-by":"crossref","unstructured":"Esteves, C., Xu, Y., Allen-Blanchette, C., Daniilidis, K.: Equivariant multi-view networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1568\u20131577 (2019)","DOI":"10.1109\/ICCV.2019.00165"},{"key":"26_CR21","unstructured":"Qi, C.R., Su, H., Mo, K., et al.: Pointnet: deep learning on point sets for 3D classification and segmentation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 652\u2013660 (2017)"},{"key":"26_CR22","unstructured":"Qi, C.R., Yi, L., Su, H., Guibas, L.J.: Pointnet++: deep hierarchical feature learning on point sets in a metric space. In: Advances in Neural Information Processing Systems (2017)"},{"key":"26_CR23","doi-asserted-by":"crossref","unstructured":"Johnson, J., Hariharan, B., Van Der Maaten, L., Fei-Fei, L., Lawrence Zitnick, C., Girshick, R.: Clevr: a diagnostic dataset for compositional language and elementary visual reasoning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2901\u20132910 (2017)","DOI":"10.1109\/CVPR.2017.215"},{"key":"26_CR24","doi-asserted-by":"crossref","unstructured":"Chang, A., et al.: Matterport3D: learning from RGB-D data in indoor environments. In: Proceedings of the 2017 International Conference on 3D Vision, pp. 667\u2013676 (2017)","DOI":"10.1109\/3DV.2017.00081"},{"key":"26_CR25","doi-asserted-by":"crossref","unstructured":"Xia, F., Zamir, A.R., He, Z., Sax, A., Malik, J., Savarese, S.: Gibson ENV: real-world perception for embodied agents. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 9068\u20139079 (2018)","DOI":"10.1109\/CVPR.2018.00945"},{"key":"26_CR26","unstructured":"Straub, J., et al.: The Replica dataset: a digital replica of indoor spaces. arXiv 2019, arXiv:1906.05797"},{"key":"26_CR27","unstructured":"Xtion Site. https:\/\/www.asus.com\/jp\/3D-Sensor\/Xtion_PRO_LIVE\/. Accessed on 1 Jan 2022"},{"key":"26_CR28","unstructured":"FARO Site. https:\/\/www.faro.com\/ja-jp\/. Accessed on 1 Jan 2022"},{"key":"26_CR29","doi-asserted-by":"crossref","unstructured":"Savva, M., et al.: A platform for embodied AI research. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 9339\u20139347 (2019)","DOI":"10.1109\/ICCV.2019.00943"},{"key":"26_CR30","doi-asserted-by":"publisher","first-page":"2451","DOI":"10.1162\/089976600300015015","volume":"12","author":"FA Gers","year":"2000","unstructured":"Gers, F.A., Schmidhuber, J., Cummins, F.: Learning to forget: continual prediction with LSTM. Neural Comput. 12, 2451\u20132471 (2000)","journal-title":"Neural Comput."},{"key":"26_CR31","unstructured":"Lin, C.-Y.: ROUGE: a package for automatic evaluation of summaries. Association for Computational Linguistics, pp. 74\u201381 (2004)"},{"key":"26_CR32","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"382","DOI":"10.1007\/978-3-319-46454-1_24","volume-title":"Computer Vision \u2013 ECCV 2016","author":"P Anderson","year":"2016","unstructured":"Anderson, P., Fernando, B., Johnson, M., Gould, S.: SPICE: semantic propositional image caption evaluation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9909, pp. 382\u2013398. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46454-1_24"},{"key":"26_CR33","doi-asserted-by":"crossref","unstructured":"Denkowski, M.,, Lavie A.: Meteor universal: language specific translation evaluation for any target language. In: Proceedings of the Ninth Workshop on Statistical Machine Translation, pp. 376\u2013380 (2014)","DOI":"10.3115\/v1\/W14-3348"},{"key":"26_CR34","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., et al.: BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting on Association for Computational Linguistics. Association for Computational Linguistics, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"26_CR35","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in HCI"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-05643-7_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,13]],"date-time":"2024-03-13T19:10:56Z","timestamp":1710357056000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-05643-7_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031056420","9783031056437"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-05643-7_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"15 May 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"HCII","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Human-Computer Interaction","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"26 June 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 July 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"24","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"hcii2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/2022.hci.international\/index.html","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}