{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,28]],"date-time":"2025-03-28T08:22:24Z","timestamp":1743150144624,"version":"3.40.3"},"publisher-location":"Cham","reference-count":15,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030110178"},{"type":"electronic","value":"9783030110185"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-11018-5_17","type":"book-chapter","created":{"date-parts":[[2019,1,24]],"date-time":"2019-01-24T05:50:50Z","timestamp":1548309050000},"page":"180-187","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Adding Object Detection Skills to Visual Dialogue Agents"],"prefix":"10.1007","author":[{"given":"Gabriele","family":"Bani","sequence":"first","affiliation":[]},{"given":"Davide","family":"Belli","sequence":"additional","affiliation":[]},{"given":"Gautier","family":"Dagan","sequence":"additional","affiliation":[]},{"given":"Alexander","family":"Geenen","sequence":"additional","affiliation":[]},{"given":"Andrii","family":"Skliar","sequence":"additional","affiliation":[]},{"given":"Aashish","family":"Venkatesh","sequence":"additional","affiliation":[]},{"given":"Tim","family":"Baumg\u00e4rtner","sequence":"additional","affiliation":[]},{"given":"Elia","family":"Bruni","sequence":"additional","affiliation":[]},{"given":"Raquel","family":"Fern\u00e1ndez","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,1,23]]},"reference":[{"key":"17_CR1","doi-asserted-by":"crossref","unstructured":"Antol, S., et al.: VQA: visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2425\u20132433 (2015)","DOI":"10.1109\/ICCV.2015.279"},{"key":"17_CR2","doi-asserted-by":"crossref","unstructured":"Das, A., et al.: Visual dialog. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, vol. 2 (2017)","DOI":"10.1109\/CVPR.2017.121"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"De Vries, H., Strub, F., Chandar, S., Pietquin, O., Larochelle, H., Courville, A.C.: Guesswhat?! visual object discovery through multi-modal dialogue. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.475"},{"key":"17_CR4","unstructured":"De Vries, H., Strub, F., Mary, J., Larochelle, H., Pietquin, O., Courville, A.C.: Modulating early visual processing by language. In: Advances in Neural Information Processing Systems, pp. 6594\u20136604 (2017)"},{"key":"17_CR5","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"17_CR6","doi-asserted-by":"crossref","unstructured":"Girshick, R.: Fast R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1440\u20131448 (2015)","DOI":"10.1109\/ICCV.2015.169"},{"key":"17_CR7","unstructured":"Girshick, R., Radosavovic, I., Gkioxari, G., Doll\u00e1r, P., He, K.: Detectron (2018). https:\/\/github.com\/facebookresearch\/detectron"},{"key":"17_CR8","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2980\u20132988 (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"17_CR9","doi-asserted-by":"crossref","unstructured":"Hu, R., Xu, H., Rohrbach, M., Feng, J., Saenko, K., Darrell, T.: Natural language object retrieval. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4555\u20134564 (2016)","DOI":"10.1109\/CVPR.2016.493"},{"key":"17_CR10","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., Doll\u00e1r, P., Girshick, R.B., He, K., Hariharan, B., Belongie, S.J.: Feature pyramid networks for object detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2017)","DOI":"10.1109\/CVPR.2017.106"},{"key":"17_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"TY Lin","year":"2014","unstructured":"Lin, T.Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"17_CR12","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, pp. 91\u201399 (2015)"},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Shekhar, R., et al.: FOIL it! find one mismatch between image and language caption. In: Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), vol. 1, pp. 255\u2013265 (2017)","DOI":"10.18653\/v1\/P17-1024"},{"key":"17_CR14","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)"},{"key":"17_CR15","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/978-3-319-46475-6_5","volume-title":"Computer Vision \u2013 ECCV 2016","author":"L Yu","year":"2016","unstructured":"Yu, L., Poirson, P., Yang, S., Berg, A.C., Berg, T.L.: Modeling context in referring expressions. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9906, pp. 69\u201385. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46475-6_5"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2018 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-11018-5_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,22]],"date-time":"2023-01-22T01:18:53Z","timestamp":1674350333000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-11018-5_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030110178","9783030110185"],"references-count":15,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-11018-5_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"23 January 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2018","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 September 2018","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"14 September 2018","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2018","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2018.org\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}