{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T16:44:05Z","timestamp":1777567445913,"version":"3.51.4"},"publisher-location":"Cham","reference-count":49,"publisher":"Springer International Publishing","isbn-type":[{"value":"9783030585228","type":"print"},{"value":"9783030585235","type":"electronic"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58523-5_21","type":"book-chapter","created":{"date-parts":[[2020,12,3]],"date-time":"2020-12-03T20:13:16Z","timestamp":1607026396000},"page":"353-370","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":18,"title":["Learning to Generate Grounded Visual Captions Without Localization Supervision"],"prefix":"10.1007","author":[{"given":"Chih-Yao","family":"Ma","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yannis","family":"Kalantidis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ghassan","family":"AlRegib","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Peter","family":"Vajda","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Marcus","family":"Rohrbach","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zsolt","family":"Kira","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,12,4]]},"reference":[{"key":"21_CR1","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"382","DOI":"10.1007\/978-3-319-46454-1_24","volume-title":"Computer Vision \u2013 ECCV 2016","author":"P Anderson","year":"2016","unstructured":"Anderson, P., Fernando, B., Johnson, M., Gould, S.: SPICE: semantic propositional image caption evaluation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9909, pp. 382\u2013398. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46454-1_24"},{"key":"21_CR2","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), p. 6 (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"21_CR3","unstructured":"Banerjee, S., Lavie, A.: Meteor: An automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization, vol. 29, pp. 65\u201372 (2005)"},{"key":"21_CR4","doi-asserted-by":"crossref","unstructured":"Chen, X., Lawrence Zitnick, C.: Mind\u2019s eye: a recurrent visual representation for image caption generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2422\u20132431 (2015)","DOI":"10.1109\/CVPR.2015.7298856"},{"key":"21_CR5","doi-asserted-by":"publisher","first-page":"90","DOI":"10.1016\/j.cviu.2017.10.001","volume":"163","author":"A Das","year":"2017","unstructured":"Das, A., Agrawal, H., Zitnick, L., Parikh, D., Batra, D.: Human attention in visual question answering: do humans and deep networks look at the same regions? Comput. Vis. Image Underst. 163, 90\u2013100 (2017)","journal-title":"Comput. Vis. Image Underst."},{"key":"21_CR6","doi-asserted-by":"crossref","unstructured":"Das, P., Xu, C., Doell, R.F., Corso, J.J.: A thousand frames in just a few words: lingual description of videos through latent topics and sparse object stitching. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2634\u20132641 (2013)","DOI":"10.1109\/CVPR.2013.340"},{"key":"21_CR7","doi-asserted-by":"crossref","unstructured":"Deng, C., Wu, Q., Wu, Q., Hu, F., Lyu, F., Tan, M.: Visual grounding via accumulated attention. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7746\u20137755 (2018)","DOI":"10.1109\/CVPR.2018.00808"},{"key":"21_CR8","doi-asserted-by":"crossref","unstructured":"Donahue, J., et al.: Long-term recurrent convolutional networks for visual recognition and description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2625\u20132634 (2015)","DOI":"10.1109\/CVPR.2015.7298878"},{"key":"21_CR9","unstructured":"Duan, X., Huang, W., Gan, C., Wang, J., Zhu, W., Huang, J.: Weakly supervised dense event captioning in videos. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 3063\u20133073 (2018)"},{"key":"21_CR10","unstructured":"He, D., et al.: Dual learning for machine translation. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 820\u2013828 (2016)"},{"key":"21_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: 2017 IEEE International Conference on Computer Vision (ICCV), pp. 2980\u20132988. IEEE (2017)","DOI":"10.1109\/ICCV.2017.322"},{"key":"21_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"269","DOI":"10.1007\/978-3-030-01216-8_17","volume-title":"Computer Vision \u2013 ECCV 2018","author":"LA Hendricks","year":"2018","unstructured":"Hendricks, L.A., Hu, R., Darrell, T., Akata, Z.: Grounding visual explanations. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11206, pp. 269\u2013286. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01216-8_17"},{"key":"21_CR13","unstructured":"Hosseini-Asl, E., Zhou, Y., Xiong, C., Socher, R.: Augmented cyclic adversarial learning for low resource domain adaptation. In: Proceedings of the International Conference on Learning Representations (ICLR) (2019)"},{"key":"21_CR14","doi-asserted-by":"crossref","unstructured":"Hu, R., Xu, H., Rohrbach, M., Feng, J., Saenko, K., Darrell, T.: Natural language object retrieval. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4555\u20134564 (2016)","DOI":"10.1109\/CVPR.2016.493"},{"key":"21_CR15","unstructured":"Huang, Q., Zhang, P., Wu, D., Zhang, L.: Turbo learning for captionbot and drawingbot. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 6456\u20136466 (2018)"},{"key":"21_CR16","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: Proceedings of the International Conference on Learning Representations (ICLR) (2015)"},{"issue":"12","key":"21_CR17","doi-asserted-by":"publisher","first-page":"2891","DOI":"10.1109\/TPAMI.2012.162","volume":"35","author":"G Kulkarni","year":"2013","unstructured":"Kulkarni, G., et al.: Babytalk: understanding and generating simple image descriptions. IEEE Trans. Pattern Anal. Mach. Intell. 35(12), 2891\u20132903 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"21_CR18","doi-asserted-by":"crossref","unstructured":"Li, Y., Yao, T., Pan, Y., Chao, H., Mei, T.: Jointly localizing and describing events for dense video captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7492\u20137500 (2018)","DOI":"10.1109\/CVPR.2018.00782"},{"key":"21_CR19","doi-asserted-by":"crossref","unstructured":"Liu, C., Mao, J., Sha, F., Yuille, A.: Attention correctness in neural image captioning. In: Thirty-First AAAI Conference on Artificial Intelligence (2017)","DOI":"10.1609\/aaai.v31i1.11197"},{"key":"21_CR20","doi-asserted-by":"crossref","unstructured":"Lu, J., Yang, J., Batra, D., Parikh, D.: Neural baby talk. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7219\u20137228 (2018)","DOI":"10.1109\/CVPR.2018.00754"},{"key":"21_CR21","doi-asserted-by":"crossref","unstructured":"Ma, C.Y., Kadav, A., Melvin, I., Kira, Z., AlRegib, G., Graf, H.P.: Attend and interact: higher-order object interactions for video understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00710"},{"key":"21_CR22","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting on Association for Computational Linguistics, pp. 311\u2013318. Association for Computational Linguistics (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"21_CR23","doi-asserted-by":"crossref","unstructured":"Park, J.S., Rohrbach, M., Darrell, T., Rohrbach, A.: Adversarial inference for multi-sentence video description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00676"},{"key":"21_CR24","doi-asserted-by":"crossref","unstructured":"Plummer, B.A., Wang, L., Cervantes, C.M., Caicedo, J.C., Hockenmaier, J., Lazebnik, S.: Flickr30k entities: collecting region-to-phrase correspondences for richer image-to-sentence models. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), pp. 2641\u20132649 (2015)","DOI":"10.1109\/ICCV.2015.303"},{"key":"21_CR25","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 91\u201399 (2015)"},{"key":"21_CR26","doi-asserted-by":"crossref","unstructured":"Rohrbach, A., Hendricks, L.A., Burns, K., Darrell, T., Saenko, K.: Object hallucination in image captioning. In: Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 4035\u20134045 (2018)","DOI":"10.18653\/v1\/D18-1437"},{"key":"21_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"817","DOI":"10.1007\/978-3-319-46448-0_49","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Rohrbach","year":"2016","unstructured":"Rohrbach, A., Rohrbach, M., Hu, R., Darrell, T., Schiele, B.: Grounding of textual phrases in images by reconstruction. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 817\u2013834. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_49"},{"key":"21_CR28","doi-asserted-by":"crossref","unstructured":"Rohrbach, A., Rohrbach, M., Tang, S., Joon Oh, S., Schiele, B.: Generating descriptions with grounded and co-referenced people. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4979\u20134989 (2017)","DOI":"10.1109\/CVPR.2017.447"},{"issue":"1","key":"21_CR29","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1007\/s11263-016-0987-1","volume":"123","author":"A Rohrbach","year":"2017","unstructured":"Rohrbach, A., et al.: Movie description. Int. J. Comput. Vis. 123(1), 94\u2013120 (2017)","journal-title":"Int. J. Comput. Vis."},{"key":"21_CR30","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Lee, S., Shen, Y., Jin, H., Batra, D., Parikh, D.: Taking a hint: Leveraging explanations to make vision and language models more grounded. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00268"},{"key":"21_CR31","doi-asserted-by":"crossref","unstructured":"Shah, M., Chen, X., Rohrbach, M., Parikh, D.: Cycle-consistency for robust visual question answering. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00681"},{"key":"21_CR32","doi-asserted-by":"crossref","unstructured":"Shetty, R., Rohrbach, M., Anne Hendricks, L., Fritz, M., Schiele, B.: Speaking the same language: Matching machine to human captions by adversarial training. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), pp. 4135\u20134144 (2017)","DOI":"10.1109\/ICCV.2017.445"},{"key":"21_CR33","doi-asserted-by":"crossref","unstructured":"Tang, D., et al.: Learning to collaborate for question answering and asking. In: Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers), vol. 1, pp. 1564\u20131574 (2018)","DOI":"10.18653\/v1\/N18-1141"},{"key":"21_CR34","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Advances in Neural Information Processing Systems (NeurIPS), pp. 5998\u20136008 (2017)"},{"key":"21_CR35","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Lawrence Zitnick, C., Parikh, D.: Cider: consensus-based image description evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4566\u20134575 (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"21_CR36","doi-asserted-by":"crossref","unstructured":"Venugopalan, S., Anne Hendricks, L., Rohrbach, M., Mooney, R., Darrell, T., Saenko, K.: Captioning images with diverse objects. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5753\u20135761 (2017)","DOI":"10.1109\/CVPR.2017.130"},{"key":"21_CR37","doi-asserted-by":"crossref","unstructured":"Venugopalan, S., Rohrbach, M., Donahue, J., Mooney, R., Darrell, T., Saenko, K.: Sequence to sequence-video to text. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), pp. 4534\u20134542 (2015)","DOI":"10.1109\/ICCV.2015.515"},{"key":"21_CR38","doi-asserted-by":"crossref","unstructured":"Vinyals, O., Toshev, A., Bengio, S., Erhan, D.: Show and tell: a neural image caption generator. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 3156\u20133164 (2015)","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"21_CR39","doi-asserted-by":"crossref","unstructured":"Wang, B., Ma, L., Zhang, W., Liu, W.: Reconstruction network for video captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 7622\u20137631 (2018)","DOI":"10.1109\/CVPR.2018.00795"},{"key":"21_CR40","doi-asserted-by":"crossref","unstructured":"Wang, F., Huang, Q., Guibas, L.J.: Image co-segmentation via consistent functional maps. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), December 2013","DOI":"10.1109\/ICCV.2013.110"},{"key":"21_CR41","doi-asserted-by":"crossref","unstructured":"Xiao, F., Sigal, L., Jae Lee, Y.: Weakly-supervised visual grounding of phrases with linguistic structures. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 5945\u20135954 (2017)","DOI":"10.1109\/CVPR.2017.558"},{"key":"21_CR42","doi-asserted-by":"crossref","unstructured":"Yao, T., Pan, Y., Li, Y., Qiu, Z., Mei, T.: Boosting image captioning with attributes. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), pp. 22\u201329 (2017)","DOI":"10.1109\/ICCV.2017.524"},{"key":"21_CR43","doi-asserted-by":"crossref","unstructured":"You, Q., Jin, H., Wang, Z., Fang, C., Luo, J.: Image captioning with semantic attention. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4651\u20134659 (2016)","DOI":"10.1109\/CVPR.2016.503"},{"key":"21_CR44","doi-asserted-by":"crossref","unstructured":"Zanfir, M., Marinoiu, E., Sminchisescu, C.: Spatio-temporal attention models for grounded video captioning. In: Asian Conference on Computer Vision, pp. 104\u2013119 (2016)","DOI":"10.1007\/978-3-319-54190-7_7"},{"key":"21_CR45","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Niebles, J.C., Soto, A.: Interpretable visual question answering by visual grounding from attention supervision mining. In: 2019 IEEE Winter Conference on Applications of Computer Vision (WACV), pp. 349\u2013357. IEEE (2019)","DOI":"10.1109\/WACV.2019.00043"},{"key":"21_CR46","doi-asserted-by":"crossref","unstructured":"Zhou, L., Kalantidis, Y., Chen, X., Corso, J.J., Rohrbach, M.: Grounded video description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2019)","DOI":"10.1109\/CVPR.2019.00674"},{"key":"21_CR47","unstructured":"Zhou, L., Louis, N., Corso, J.J.: Weakly-supervised video object grounding from text by loss weighting and object interaction. In: British Machine Vision Conference (BMVC) (2018)"},{"key":"21_CR48","doi-asserted-by":"crossref","unstructured":"Zhou, L., Xu, C., Koch, P., Corso, J.J.: Watch what you just said: image captioning with text-conditional attention. In: Proceedings of the on Thematic Workshops of ACM Multimedia 2017, pp. 305\u2013313. ACM (2017)","DOI":"10.1145\/3126686.3126717"},{"key":"21_CR49","doi-asserted-by":"crossref","unstructured":"Zhu, J.Y., Park, T., Isola, P., Efros, A.A.: Unpaired image-to-image translation using cycle-consistent adversarial networks. In: Proceedings of the IEEE International Conference on Computer Vision (ICCV), pp. 2223\u20132232 (2017)","DOI":"10.1109\/ICCV.2017.244"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58523-5_21","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:08:09Z","timestamp":1733184489000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58523-5_21"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585228","9783030585235"],"references-count":49,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58523-5_21","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"4 December 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}