{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T08:37:55Z","timestamp":1726043875742},"publisher-location":"Cham","reference-count":26,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030305079"},{"type":"electronic","value":"9783030305086"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-30508-6_10","type":"book-chapter","created":{"date-parts":[[2019,9,8]],"date-time":"2019-09-08T23:02:47Z","timestamp":1567983767000},"page":"119-130","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Referring Expression Comprehension via Co-attention and Visual Context"],"prefix":"10.1007","author":[{"given":"Youming","family":"Gao","sequence":"first","affiliation":[]},{"given":"Yi","family":"Ji","sequence":"additional","affiliation":[]},{"given":"Ting","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Yunlong","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Chunping","family":"Liu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,9,9]]},"reference":[{"key":"10_CR1","doi-asserted-by":"publisher","unstructured":"Ben-Younes, H., Cadene, R., Cord, M., Thome, N.: Mutan: multimodal tucker fusion for visual question answering. In: Proceedings of the IEEE International Conference on Computer Vision, vol. 3 (2017). \n                      https:\/\/doi.org\/10.1109\/iccv.2017.285","DOI":"10.1109\/iccv.2017.285"},{"key":"10_CR2","doi-asserted-by":"publisher","unstructured":"Chen, K., Kovvuri, R., Nevatia, R.: Query-guided regression network with context policy for phrase grounding. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV) (2017). \n                      https:\/\/doi.org\/10.1109\/iccv.2017.95","DOI":"10.1109\/iccv.2017.95"},{"issue":"3","key":"10_CR3","doi-asserted-by":"publisher","first-page":"201","DOI":"10.1038\/nrn755","volume":"3","author":"M Corbetta","year":"2002","unstructured":"Corbetta, M., Shulman, G.L.: Control of goal-directed and stimulus-driven attention in the brain. Nat. Rev. Neurosci. 3(3), 201 (2002). \n                      https:\/\/doi.org\/10.1038\/nrn755","journal-title":"Nat. Rev. Neurosci."},{"key":"10_CR4","doi-asserted-by":"crossref","unstructured":"Das, A., et al.: Visual dialog. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, vol. 2 (2017)","DOI":"10.1109\/CVPR.2017.121"},{"key":"10_CR5","doi-asserted-by":"publisher","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 2980\u20132988. IEEE (2017). \n                      https:\/\/doi.org\/10.1109\/iccv.2017.322","DOI":"10.1109\/iccv.2017.322"},{"key":"10_CR6","doi-asserted-by":"publisher","unstructured":"Hu, R., Rohrbach, M., Andreas, J., Darrell, T., Saenko, K.: Modeling relationships in referential expressions with compositional modular networks. In: CVPR, pp. 4418\u20134427 (2017). \n                      https:\/\/doi.org\/10.1109\/cvpr.2017.470","DOI":"10.1109\/cvpr.2017.470"},{"key":"10_CR7","doi-asserted-by":"publisher","unstructured":"Hu, R., Xu, H., Rohrbach, M., Feng, J., Saenko, K., Darrell, T.: Natural language object retrieval. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4555\u20134564 (2016). \n                      https:\/\/doi.org\/10.1109\/cvpr.2016.493","DOI":"10.1109\/cvpr.2016.493"},{"key":"10_CR8","doi-asserted-by":"publisher","unstructured":"Johnson, J., et al.: Inferring and executing programs for visual reasoning. In: ICCV, pp. 3008\u20133017 (2017). \n                      https:\/\/doi.org\/10.1109\/iccv.2017.325","DOI":"10.1109\/iccv.2017.325"},{"key":"10_CR9","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). \n                      https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"10_CR10","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"176","DOI":"10.1007\/978-3-030-01418-6_18","volume-title":"Artificial Neural Networks and Machine Learning \u2013 ICANN 2018","author":"A Lindh","year":"2018","unstructured":"Lindh, A., Ross, R.J., Mahalunkar, A., Salton, G., Kelleher, J.D.: Generating diverse and meaningful captions. In: K\u016frkov\u00e1, V., Manolopoulos, Y., Hammer, B., Iliadis, L., Maglogiannis, I. (eds.) ICANN 2018. LNCS, vol. 11139, pp. 176\u2013187. Springer, Cham (2018). \n                      https:\/\/doi.org\/10.1007\/978-3-030-01418-6_18"},{"key":"10_CR11","doi-asserted-by":"publisher","unstructured":"Liu, J., Wang, L., Yang, M.H., et al.: Referring expression generation and comprehension via attributes. In: Proceedings of CVPR (2017). \n                      https:\/\/doi.org\/10.1109\/iccv.2017.520","DOI":"10.1109\/iccv.2017.520"},{"key":"10_CR12","doi-asserted-by":"publisher","unstructured":"Lu, J., Xiong, C., Parikh, D., Socher, R.: Knowing when to look: adaptive attention via a visual sentinel for image captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), vol. 6, p. 2 (2017). \n                      https:\/\/doi.org\/10.1109\/cvpr.2017.345","DOI":"10.1109\/cvpr.2017.345"},{"key":"10_CR13","unstructured":"Lu, J., Yang, J., Batra, D., Parikh, D.: Hierarchical question-image co-attention for visual question answering. In: Advances In Neural Information Processing Systems, pp. 289\u2013297 (2016)"},{"key":"10_CR14","doi-asserted-by":"publisher","unstructured":"Luo, R., Shakhnarovich, G.: Comprehension-guided referring expressions. In: Computer Vision and Pattern Recognition (CVPR), vol. 2 (2017). \n                      https:\/\/doi.org\/10.1109\/cvpr.2017.333","DOI":"10.1109\/cvpr.2017.333"},{"key":"10_CR15","doi-asserted-by":"publisher","unstructured":"Mao, J., Huang, J., Toshev, A., Camburu, O., Yuille, A.L., Murphy, K.: Generation and comprehension of unambiguous object descriptions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 11\u201320 (2016). \n                      https:\/\/doi.org\/10.1109\/cvpr.2016.9","DOI":"10.1109\/cvpr.2016.9"},{"key":"10_CR16","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"792","DOI":"10.1007\/978-3-319-46493-0_48","volume-title":"Computer Vision \u2013 ECCV 2016","author":"VK Nagaraja","year":"2016","unstructured":"Nagaraja, V.K., Morariu, V.I., Davis, L.S.: Modeling context between objects for referring expression understanding. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 792\u2013807. Springer, Cham (2016). \n                      https:\/\/doi.org\/10.1007\/978-3-319-46493-0_48"},{"key":"10_CR17","doi-asserted-by":"publisher","unstructured":"Pennington, J., Socher, R., Manning, C.: GloVe: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014). \n                      https:\/\/doi.org\/10.3115\/v1\/d14-1162","DOI":"10.3115\/v1\/d14-1162"},{"issue":"6","key":"10_CR18","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"Shaoqing Ren","year":"2017","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, pp. 91\u201399 (2015). \n                      https:\/\/doi.org\/10.1109\/tpami.2016.2577031","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"817","DOI":"10.1007\/978-3-319-46448-0_49","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Rohrbach","year":"2016","unstructured":"Rohrbach, A., Rohrbach, M., Hu, R., Darrell, T., Schiele, B.: Grounding of textual phrases in images by reconstruction. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 817\u2013834. Springer, Cham (2016). \n                      https:\/\/doi.org\/10.1007\/978-3-319-46448-0_49"},{"key":"10_CR20","doi-asserted-by":"publisher","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D., et al.: Grad-cam: visual explanations from deep networks via gradient-based localization. In: ICCV, pp. 618\u2013626 (2017). \n                      https:\/\/doi.org\/10.1109\/iccv.2017.74","DOI":"10.1109\/iccv.2017.74"},{"key":"10_CR21","doi-asserted-by":"publisher","unstructured":"Wei, W.L., et al: Seethevoice: learning from music to visual storytelling of shots. In: 2018 IEEE International Conference on Multimedia and Expo (ICME), pp. 1\u20136. IEEE (2018). \n                      https:\/\/doi.org\/10.1109\/icme.2018.8486496","DOI":"10.1109\/icme.2018.8486496"},{"key":"10_CR22","unstructured":"Xiong, C., Zhong, V., Socher, R.: Dynamic coattention networks for question answering. arXiv preprint \n                      arXiv:1611.01604\n                      \n                     (2016)"},{"key":"10_CR23","doi-asserted-by":"publisher","unstructured":"Yu, L., et al.: MAttNet: modular attention network for referring expression comprehension. In: CVPR (2018). \n                      https:\/\/doi.org\/10.1109\/CVPR.2018.00142","DOI":"10.1109\/CVPR.2018.00142"},{"key":"10_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"69","DOI":"10.1007\/978-3-319-46475-6_5","volume-title":"Computer Vision \u2013 ECCV 2016","author":"L Yu","year":"2016","unstructured":"Yu, L., Poirson, P., Yang, S., Berg, A.C., Berg, T.L.: Modeling context in referring expressions. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9906, pp. 69\u201385. Springer, Cham (2016). \n                      https:\/\/doi.org\/10.1007\/978-3-319-46475-6_5"},{"key":"10_CR25","doi-asserted-by":"publisher","unstructured":"Yu, L., Tan, H., Bansal, M., Berg, T.L.: A joint speaker-listener-reinforcer model for referring expressions. In: Computer Vision and Pattern Recognition (CVPR), vol. 2 (2017). \n                      https:\/\/doi.org\/10.1109\/cvpr.2017.375","DOI":"10.1109\/cvpr.2017.375"},{"key":"10_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"14","DOI":"10.1007\/978-3-030-01418-6_2","volume-title":"Artificial Neural Networks and Machine Learning \u2013 ICANN 2018","author":"Y Zhang","year":"2018","unstructured":"Zhang, Y., Gu, Y., Gu, X.: Two-stream convolutional neural network for multimodal matching. In: K\u016frkov\u00e1, V., Manolopoulos, Y., Hammer, B., Iliadis, L., Maglogiannis, I. (eds.) ICANN 2018. LNCS, vol. 11139, pp. 14\u201321. Springer, Cham (2018). \n                      https:\/\/doi.org\/10.1007\/978-3-030-01418-6_2"}],"container-title":["Lecture Notes in Computer Science","Artificial Neural Networks and Machine Learning \u2013 ICANN 2019: Image Processing"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-30508-6_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,9,8]],"date-time":"2019-09-08T23:18:42Z","timestamp":1567984722000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-30508-6_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030305079","9783030305086"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-30508-6_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"9 September 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICANN","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Artificial Neural Networks","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Munich","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Germany","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17 September 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icann2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/e-nns.org\/icann2019\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}