{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,27]],"date-time":"2026-03-27T08:37:32Z","timestamp":1774600652828,"version":"3.50.1"},"publisher-location":"Cham","reference-count":85,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585358","type":"print"},{"value":"9783030585365","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58536-5_21","type":"book-chapter","created":{"date-parts":[[2020,11,2]],"date-time":"2020-11-02T23:02:42Z","timestamp":1604358162000},"page":"347-365","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":212,"title":["Mining Cross-Image Semantics for Weakly Supervised Semantic Segmentation"],"prefix":"10.1007","author":[{"given":"Guolei","family":"Sun","sequence":"first","affiliation":[]},{"given":"Wenguan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Jifeng","family":"Dai","sequence":"additional","affiliation":[]},{"given":"Luc","family":"Van Gool","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,3]]},"reference":[{"key":"21_CR1","doi-asserted-by":"crossref","unstructured":"Ahn, J., Cho, S., Kwak, S.: Weakly supervised learning of instance segmentation with inter-pixel relations. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00231"},{"key":"21_CR2","doi-asserted-by":"crossref","unstructured":"Ahn, J., Kwak, S.: Learning pixel-level semantic affinity with image-level supervision for weakly supervised semantic segmentation. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00523"},{"key":"21_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"549","DOI":"10.1007\/978-3-319-46478-7_34","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Bearman","year":"2016","unstructured":"Bearman, A., Russakovsky, O., Ferrari, V., Fei-Fei, L.: What\u2019s the point: semantic segmentation with point supervision. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9911, pp. 549\u2013565. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46478-7_34"},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Cao, J., Pang, Y., Li, X.: Triply supervised decoder networks for joint detection and segmentation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00757"},{"key":"21_CR5","doi-asserted-by":"crossref","unstructured":"Chaudhry, A., Dokania, P.K., Torr, P.H.: Discovering class-specific pixels for weakly-supervised semantic segmentation. In: BMVC (2017)","DOI":"10.5244\/C.31.20"},{"issue":"4","key":"21_CR6","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","volume":"40","author":"LC Chen","year":"2017","unstructured":"Chen, L.C., Papandreou, G., Kokkinos, I., Murphy, K., Yuille, A.L.: DeepLab: semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected CRFs. TPAMI 40(4), 834\u2013848 (2017)","journal-title":"TPAMI"},{"key":"21_CR7","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"833","DOI":"10.1007\/978-3-030-01234-2_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"L-C Chen","year":"2018","unstructured":"Chen, L.-C., Zhu, Y., Papandreou, G., Schroff, F., Adam, H.: Encoder-decoder with atrous separable convolution for semantic image segmentation. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11211, pp. 833\u2013851. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_49"},{"key":"21_CR8","doi-asserted-by":"crossref","unstructured":"Cheng, J., Dong, L., Lapata, M.: Long short-term memory-networks for machine reading. In: EMNLP (2016)","DOI":"10.18653\/v1\/D16-1053"},{"key":"21_CR9","doi-asserted-by":"crossref","unstructured":"Chu, X., Yang, W., Ouyang, W., Ma, C., Yuille, A.L., Wang, X.: Multi-context attention for human pose estimation. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.601"},{"key":"21_CR10","doi-asserted-by":"crossref","unstructured":"Dai, J., He, K., Sun, J.: BoxSup: exploiting bounding boxes to supervise convolutional networks for semantic segmentation. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.191"},{"issue":"1","key":"21_CR11","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, S.A., Van Gool, L., Williams, C.K., Winn, J., Zisserman, A.: The pascal visual object classes challenge: a retrospective. IJCV 111(1), 98\u2013136 (2015)","journal-title":"IJCV"},{"key":"21_CR12","doi-asserted-by":"crossref","unstructured":"Fan, J., Zhang, Z., Tan, T.: CIAN: cross-image affinity net for weakly supervised semantic segmentation. In: AAAI (2020)","DOI":"10.1609\/aaai.v34i07.6705"},{"key":"21_CR13","doi-asserted-by":"crossref","unstructured":"Fang, H., Lu, G., Fang, X., Xie, J., Tai, Y., Lu, C.: Weakly and semi supervised human body part parsing via pose-guided knowledge transfer. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00015"},{"key":"21_CR14","doi-asserted-by":"crossref","unstructured":"Fu, J., et al.: Dual attention network for scene segmentation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00326"},{"key":"21_CR15","doi-asserted-by":"crossref","unstructured":"Ge, W., Yang, S., Yu, Y.: Multi-evidence filtering and fusion for multi-label classification, object detection and semantic segmentation based on weakly supervised learning. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00139"},{"key":"21_CR16","unstructured":"Gidaris, S., Singh, P., Komodakis, N.: Unsupervised representation learning by predicting image rotations. In: ICLR (2018)"},{"key":"21_CR17","unstructured":"Griffin, G., Holub, A., Perona, P.: Caltech-256 object category dataset (2007)"},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Hariharan, B., Arbel\u00e1ez, P., Bourdev, L., Maji, S., Malik, J.: Semantic contours from inverse detectors. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126343"},{"key":"21_CR19","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"21_CR20","doi-asserted-by":"crossref","unstructured":"Hong, S., Yeo, D., Kwak, S., Lee, H., Han, B.: Weakly supervised semantic segmentation using web-crawled videos. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.239"},{"issue":"4","key":"21_CR21","doi-asserted-by":"publisher","first-page":"815","DOI":"10.1109\/TPAMI.2018.2815688","volume":"41","author":"Q Hou","year":"2019","unstructured":"Hou, Q., Cheng, M.M., Hu, X., Borji, A., Tu, Z., Torr, P.: Deeply supervised salient object detection with short connections. TPAMI 41(4), 815\u2013828 (2019)","journal-title":"TPAMI"},{"key":"21_CR22","unstructured":"Hou, Q., Jiang, P., Wei, Y., Cheng, M.M.: Self-erasing network for integral object attention. In: NeurIPS (2018)"},{"key":"21_CR23","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"21_CR24","doi-asserted-by":"crossref","unstructured":"Huang, Z., Wang, X., Wang, J., Liu, W., Wang, J.: Weakly-supervised semantic segmentation network with deep seeded region growing. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00733"},{"key":"21_CR25","doi-asserted-by":"crossref","unstructured":"Jiang, P.T., Hou, Q., Cao, Y., Cheng, M.M., Wei, Y., Xiong, H.K.: Integral object mining via online attention accumulation. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00216"},{"key":"21_CR26","doi-asserted-by":"crossref","unstructured":"Jin, B., Ortiz Segovia, M.V., Susstrunk, S.: Webly supervised semantic segmentation. In: ICCV (2017)","DOI":"10.1109\/CVPR.2017.185"},{"key":"21_CR27","doi-asserted-by":"crossref","unstructured":"Joulin, A., Bach, F., Ponce, J.: Discriminative clustering for image co-segmentation. In: CVPR (2010)","DOI":"10.1109\/CVPR.2010.5539868"},{"key":"21_CR28","doi-asserted-by":"crossref","unstructured":"Kim, D., Cho, D., Yoo, D., So Kweon, I.: Two-phase learning for weakly supervised object localization. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.382"},{"key":"21_CR29","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"695","DOI":"10.1007\/978-3-319-46493-0_42","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Kolesnikov","year":"2016","unstructured":"Kolesnikov, A., Lampert, C.H.: Seed, expand and constrain: three principles for weakly-supervised image segmentation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 695\u2013711. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_42"},{"key":"21_CR30","unstructured":"Kr\u00e4henb\u00fchl, P., Koltun, V.: Efficient inference in fully connected CRFs with Gaussian edge potentials. In: NeurIPS (2011)"},{"key":"21_CR31","unstructured":"Krizhevsky, A., Sutskever, I., Hinton, G.E.: ImageNet classification with deep convolutional neural networks. In: NeurIPS (2012)"},{"key":"21_CR32","doi-asserted-by":"crossref","unstructured":"Kumar Singh, K., Jae Lee, Y.: Hide-and-seek: forcing a network to be meticulous for weakly-supervised object and action localization. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.381"},{"key":"21_CR33","doi-asserted-by":"crossref","unstructured":"Lee, J., Kim, E., Lee, S., Lee, J., Yoon, S.: FickleNet: weakly and semi-supervised semantic image segmentation using stochastic inference. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00541"},{"key":"21_CR34","doi-asserted-by":"crossref","unstructured":"Lee, J., Kim, E., Lee, S., Lee, J., Yoon, S.: Frame-to-frame aggregation of active regions in web videos for weakly supervised semantic segmentation. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00691"},{"key":"21_CR35","unstructured":"Lee, S., Lee, J., Lee, J., Park, C.K., Yoon, S.: Robust tumor localization with pyramid grad-cam. arXiv preprint (2018)"},{"key":"21_CR36","doi-asserted-by":"crossref","unstructured":"Li, K., Wu, Z., Peng, K.C., Ernst, J., Fu, Y.: Tell me where to look: guided attention inference network. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00960"},{"key":"21_CR37","doi-asserted-by":"crossref","unstructured":"Li, K., Zhang, Y., Li, K., Li, Y., Fu, Y.: Attention bridging network for knowledge transfer. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00530"},{"key":"21_CR38","doi-asserted-by":"crossref","unstructured":"Lin, D., Dai, J., Jia, J., He, K., Sun, J.: ScribbleSup: scribble-supervised convolutional networks for semantic segmentation. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.344"},{"key":"21_CR39","unstructured":"Lin, Z., et al.: A structured self-attentive sentence embedding. In: ICLR (2017)"},{"key":"21_CR40","doi-asserted-by":"crossref","unstructured":"Liu, J.J., Hou, Q., Cheng, M.M., Feng, J., Jiang, J.: A simple pooling-based design for real-time salient object detection. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00404"},{"key":"21_CR41","unstructured":"Lu, J., Yang, J., Batra, D., Parikh, D.: Hierarchical question-image co-attention for visual question answering. In: NeurIPS (2016)"},{"key":"21_CR42","doi-asserted-by":"crossref","unstructured":"Lu, X., Wang, W., Ma, C., Shen, J., Shao, L., Porikli, F.: See more, know more: unsupervised video object segmentation with co-attention Siamese networks. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00374"},{"key":"21_CR43","doi-asserted-by":"crossref","unstructured":"Luong, M.T., Pham, H., Manning, C.D.: Effective approaches to attention-based neural machine translation. In: EMNLP (2015)","DOI":"10.18653\/v1\/D15-1166"},{"key":"21_CR44","doi-asserted-by":"crossref","unstructured":"Nguyen, D.K., Okatani, T.: Improved fusion of visual and language representations by dense symmetric co-attention for visual question answering. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00637"},{"key":"21_CR45","unstructured":"Odena, A., Olah, C., Shlens, J.: Conditional image synthesis with auxiliary classifier GANs. In: ICML (2017)"},{"key":"21_CR46","doi-asserted-by":"crossref","unstructured":"Pan, B., Cao, Z., Adeli, E., Niebles, J.C.: Adversarial cross-domain action recognition with co-attention. In: AAAI (2020)","DOI":"10.1609\/aaai.v34i07.6854"},{"key":"21_CR47","doi-asserted-by":"crossref","unstructured":"Papandreou, G., Chen, L.C., Murphy, K.P., Yuille, A.L.: Weakly-and semi-supervised learning of a deep convolutional network for semantic image segmentation. In: ICCV (2015)","DOI":"10.1109\/ICCV.2015.203"},{"key":"21_CR48","unstructured":"Pathak, D., Shelhamer, E., Long, J., Darrell, T.: Fully convolutional multi-class multiple instance learning. arXiv preprint (2014)"},{"key":"21_CR49","unstructured":"Paulus, R., Xiong, C., Socher, R.: A deep reinforced model for abstractive summarization. In: ICLR (2018)"},{"key":"21_CR50","doi-asserted-by":"crossref","unstructured":"Pinheiro, P.O., Collobert, R.: From image-level to pixel-level labeling with convolutional networks. In: CVPR (2015)","DOI":"10.1109\/CVPR.2015.7298780"},{"key":"21_CR51","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1007\/978-3-319-46484-8_6","volume-title":"Computer Vision \u2013 ECCV 2016","author":"X Qi","year":"2016","unstructured":"Qi, X., Liu, Z., Shi, J., Zhao, H., Jia, J.: Augmented feedback in semantic segmentation under image level supervision. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 90\u2013105. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_6"},{"key":"21_CR52","doi-asserted-by":"crossref","unstructured":"Roy, A., Todorovic, S.: Combining bottom-up, top-down, and smoothness cues for weakly supervised image segmentation. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.770"},{"issue":"3","key":"21_CR53","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., et al.: ImageNet large scale visual recognition challenge. IJCV 115(3), 211\u2013252 (2015)","journal-title":"IJCV"},{"key":"21_CR54","doi-asserted-by":"crossref","unstructured":"Shen, T., Lin, G., Liu, L., Shen, C., Reid, I.: Weakly supervised semantic segmentation based on web image co-segmentation. In: BMVC (2017)","DOI":"10.5244\/C.31.17"},{"key":"21_CR55","doi-asserted-by":"crossref","unstructured":"Shen, T., Lin, G., Shen, C., Reid, I.: Bootstrapping the performance of webly supervised semantic segmentation. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00148"},{"key":"21_CR56","doi-asserted-by":"crossref","unstructured":"Shimoda, W., Yanai, K.: Self-supervised difference detection for weakly-supervised semantic segmentation. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00531"},{"key":"21_CR57","unstructured":"Simonyan, K., Zisserman, A.: Very deep convolutional networks for large-scale image recognition. arXiv preprint (2014)"},{"key":"21_CR58","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"834","DOI":"10.1007\/978-3-030-01270-0_49","volume-title":"Computer Vision \u2013 ECCV 2018","author":"M Sun","year":"2018","unstructured":"Sun, M., Yuan, Y., Zhou, F., Ding, E.: Multi-attention multi-class constraint for fine-grained image recognition. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11220, pp. 834\u2013850. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01270-0_49"},{"key":"21_CR59","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"388","DOI":"10.1007\/978-3-319-46493-0_24","volume-title":"Computer Vision \u2013 ECCV 2016","author":"P Tokmakov","year":"2016","unstructured":"Tokmakov, P., Alahari, K., Schmid, C.: Weakly-supervised semantic segmentation using motion cues. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 388\u2013404. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_24"},{"key":"21_CR60","unstructured":"Vaswani, A., et al.: Attention is all you need. In: NeurIPS (2017)"},{"key":"21_CR61","doi-asserted-by":"crossref","unstructured":"Wang, W., Lu, X., Shen, J., Crandall, D.J., Shao, L.: Zero-shot video object segmentation via attentive graph neural networks. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00933"},{"issue":"6","key":"21_CR62","first-page":"1011","volume":"18","author":"W Wang","year":"2016","unstructured":"Wang, W., Shen, J.: Higher-order image co-segmentation. IEEE TMM 18(6), 1011\u20131021 (2016)","journal-title":"IEEE TMM"},{"key":"21_CR63","doi-asserted-by":"crossref","unstructured":"Wang, W., Zhu, H., Dai, J., Pang, Y., Shen, J., Shao, L.: Hierarchical human parsing with typed part-relation reasoning. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00895"},{"key":"21_CR64","doi-asserted-by":"crossref","unstructured":"Wang, X., You, S., Li, X., Ma, H.: Weakly-supervised semantic segmentation by iteratively mining common object features. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00147"},{"key":"21_CR65","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., He, K.: Non-local neural networks. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00813"},{"key":"21_CR66","doi-asserted-by":"crossref","unstructured":"Wang, X., Li, L., Ye, W., Long, M., Wang, J.: Transferable attention for domain adaptation. In: AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33015345"},{"key":"21_CR67","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: Reinforced cross-modal matching and self-supervised imitation learning for vision-language navigation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00679"},{"key":"21_CR68","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"218","DOI":"10.1007\/978-3-319-46493-0_14","volume-title":"Computer Vision \u2013 ECCV 2016","author":"W Shimoda","year":"2016","unstructured":"Shimoda, W., Yanai, K.: Distinct class-specific saliency maps for weakly supervised semantic segmentation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9908, pp. 218\u2013234. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46493-0_14"},{"key":"21_CR69","doi-asserted-by":"crossref","unstructured":"Wei, Y., Feng, J., Liang, X., Cheng, M.M., Zhao, Y., Yan, S.: Object region mining with adversarial erasing: a simple classification to semantic segmentation approach. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.687"},{"issue":"11","key":"21_CR70","doi-asserted-by":"publisher","first-page":"2314","DOI":"10.1109\/TPAMI.2016.2636150","volume":"39","author":"Y Wei","year":"2016","unstructured":"Wei, Y.: STC: a simple to complex framework for weakly-supervised semantic segmentation. TPAMI 39(11), 2314\u20132320 (2016)","journal-title":"TPAMI"},{"key":"21_CR71","doi-asserted-by":"crossref","unstructured":"Wei, Y., Xiao, H., Shi, H., Jie, Z., Feng, J., Huang, T.S.: Revisiting dilated convolution: a simple approach for weakly-and semi-supervised semantic segmentation. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00759"},{"key":"21_CR72","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-01234-2_1","volume-title":"Computer Vision \u2013 ECCV 2018","author":"S Woo","year":"2018","unstructured":"Woo, S., Park, J., Lee, J.-Y., Kweon, I.S.: CBAM: convolutional block attention module. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11211, pp. 3\u201319. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01234-2_1"},{"key":"21_CR73","doi-asserted-by":"crossref","unstructured":"Wu, Q., Wang, P., Shen, C., Reid, I., Van Den Hengel, A.: Are you talking to me? Reasoned visual dialog generation through adversarial learning. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00639"},{"key":"21_CR74","unstructured":"Xiong, C., Zhong, V., Socher, R.: Dynamic coattention networks for question answering. In: ICLR (2017)"},{"key":"21_CR75","doi-asserted-by":"crossref","unstructured":"Xu, T., et al.: AttnGAN: fine-grained text to image generation with attentional generative adversarial networks. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00143"},{"key":"21_CR76","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"346","DOI":"10.1007\/978-3-319-46484-8_21","volume-title":"Computer Vision \u2013 ECCV 2016","author":"Q Ye","year":"2016","unstructured":"Ye, Q., Yuan, S., Kim, T.-K.: Spatial attention deep net with partial PSO for hierarchical hybrid hand pose estimation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9912, pp. 346\u2013361. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46484-8_21"},{"key":"21_CR77","doi-asserted-by":"crossref","unstructured":"Yu, Z., Yu, J., Cui, Y., Tao, D., Tian, Q.: Deep modular co-attention networks for visual question answering. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00644"},{"key":"21_CR78","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"818","DOI":"10.1007\/978-3-319-10590-1_53","volume-title":"Computer Vision \u2013 ECCV 2014","author":"MD Zeiler","year":"2014","unstructured":"Zeiler, M.D., Fergus, R.: Visualizing and understanding convolutional networks. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8689, pp. 818\u2013833. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10590-1_53"},{"key":"21_CR79","unstructured":"Zeng, Y., Zhuge, Y., Lu, H., Zhang, L.: Joint learning of saliency detection and weakly supervised semantic segmentation. In: ICCV (2019)"},{"key":"21_CR80","unstructured":"Zhang, H., Goodfellow, I., Metaxas, D., Odena, A.: Self-attention generative adversarial networks. In: ICML (2019)"},{"key":"21_CR81","doi-asserted-by":"crossref","unstructured":"Zhang, X., Wei, Y., Feng, J., Yang, Y., Huang, T.S.: Adversarial complementary learning for weakly supervised object localization. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00144"},{"key":"21_CR82","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Nie, S., Liu, W., Xu, X., Zhang, D., Shen, H.T.: Sequence-to-sequence domain adaptation network for robust text image recognition. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00285"},{"key":"21_CR83","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Wang, W., Qi, S., Zhu, S.C.: Reasoning visual dialogs with structural and partial observations. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00683"},{"key":"21_CR84","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., Torralba, A.: Learning deep features for discriminative localization. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.319"},{"key":"21_CR85","doi-asserted-by":"crossref","unstructured":"Zhu, Z., Huang, T., Shi, B., Yu, M., Wang, B., Bai, X.: Progressive pose attention transfer for person image generation. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00245"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58536-5_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,3]],"date-time":"2024-11-03T00:42:29Z","timestamp":1730594549000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58536-5_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585358","9783030585365"],"references-count":85,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58536-5_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"3 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}