{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T11:22:18Z","timestamp":1764588138764,"version":"3.40.3"},"publisher-location":"Cham","reference-count":66,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030585822"},{"type":"electronic","value":"9783030585839"}],"license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020]]},"DOI":"10.1007\/978-3-030-58583-9_42","type":"book-chapter","created":{"date-parts":[[2020,11,18]],"date-time":"2020-11-18T10:08:18Z","timestamp":1605694098000},"page":"701-719","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":19,"title":["Sound2Sight: Generating Visual Dynamics from Sound and Context"],"prefix":"10.1007","author":[{"given":"Moitreya","family":"Chatterjee","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Anoop","family":"Cherian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2020,11,19]]},"reference":[{"key":"42_CR1","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., Zisserman, A.: Look, listen and learn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 609\u2013617 (2017)","DOI":"10.1109\/ICCV.2017.73"},{"key":"42_CR2","unstructured":"ASMR, T.: Painting ASMR (2019). https:\/\/www.youtube.com\/playlist?list=PL5Y0dQ2DJHj47sK5jsbVkVpTQ9r7T090X. Accessed 5 Nov 2019"},{"key":"42_CR3","doi-asserted-by":"crossref","unstructured":"Aytar, Y., Vondrick, C., Torralba, A.: SoundNet: learning sound representations from unlabeled video. In: Proceedings of Advances in Neural Information Processing Systems, pp. 892\u2013900 (2016)","DOI":"10.1109\/CVPR.2016.18"},{"key":"42_CR4","unstructured":"Babaeizadeh, M., Finn, C., Erhan, D., Campbell, R.H., Levine, S.: Stochastic variational video prediction. arXiv preprint arXiv:1710.11252 (2017)"},{"key":"42_CR5","unstructured":"Brock, A., Donahue, J., Simonyan, K.: Large scale GAN training for high fidelity natural image synthesis. arXiv preprint arXiv:1809.11096 (2018)"},{"key":"42_CR6","doi-asserted-by":"crossref","unstructured":"Cardoso Duarte, A., et al.: Wav2Pix: speech-conditioned face generation using generative adversarial networks. In: Proceedings of IEEE International Conference on Acoustics, Speech, and Signal Processing, Brighton Conference Centre, Brighton, United Kingdom, 12\u201317 May 2019, pp. 8633\u20138637. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8682970"},{"key":"42_CR7","doi-asserted-by":"crossref","unstructured":"Chen, L., Maddox, R.K., Duan, Z., Xu, C.: Hierarchical cross-modal talking face generation with dynamic pixel-wise loss. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7832\u20137841 (2019)","DOI":"10.1109\/CVPR.2019.00802"},{"key":"42_CR8","doi-asserted-by":"crossref","unstructured":"Chen, L., Srivastava, S., Duan, Z., Xu, C.: Deep cross-modal audio-visual generation. In: Proceedings of the on Thematic Workshops of ACM Multimedia 2017. ACM (2017)","DOI":"10.1145\/3126686.3126723"},{"issue":"3","key":"42_CR9","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1002\/wps.20557","volume":"17","author":"PR Corlett","year":"2018","unstructured":"Corlett, P.R., Powers, A.R.: Conditioned hallucinations: historic insights and future directions. World Psychiatry 17(3), 361 (2018)","journal-title":"World Psychiatry"},{"key":"42_CR10","unstructured":"Denton, E., Fergus, R.: Stochastic video generation with a learned prior. In: Proceedings of International Conference on Machine Learning, pp. 1182\u20131191 (2018)"},{"key":"42_CR11","doi-asserted-by":"crossref","unstructured":"Deshpande, I., Zhang, Z., Schwing, A.G.: Generative modeling using the sliced Wasserstein distance. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3483\u20133491 (2018)","DOI":"10.1109\/CVPR.2018.00367"},{"key":"42_CR12","unstructured":"Finn, C., Goodfellow, I., Levine, S.: Unsupervised learning for physical interaction through video prediction. In: Proceedings of Advances in Neural Information Processing Systems, pp. 64\u201372 (2016)"},{"key":"42_CR13","unstructured":"Fragkiadaki, K., Agrawal, P., Levine, S., Malik, J.: Learning visual predictive models of physics for playing billiards. arXiv preprint arXiv:1511.07404 (2015)"},{"key":"42_CR14","doi-asserted-by":"crossref","unstructured":"Gemmeke, J.F., et al.: Audio set: an ontology and human-labeled dataset for audio events. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 776\u2013780. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"42_CR15","unstructured":"Goodfellow, I., et al.: Generative adversarial nets. In: Proceedings of Advances in Neural Information Processing Systems, pp. 2672\u20132680 (2014)"},{"key":"42_CR16","unstructured":"Gulrajani, I., Ahmed, F., Arjovsky, M., Dumoulin, V., Courville, A.C.: Improved training of Wasserstein GANs. In: Proceedings of Advances in Neural Information Processing Systems, pp. 5767\u20135777 (2017)"},{"key":"42_CR17","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"610","DOI":"10.1007\/978-3-030-01237-3_37","volume-title":"Computer Vision \u2013 ECCV 2018","author":"T Gupta","year":"2018","unstructured":"Gupta, T., Schwenk, D., Farhadi, A., Hoiem, D., Kembhavi, A.: Imagine this! Scripts to compositions to videos. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11212, pp. 610\u2013626. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01237-3_37"},{"key":"42_CR18","doi-asserted-by":"crossref","unstructured":"Hao, W., Zhang, Z., Guan, H.: CMCGAN: a uniform framework for cross-modal visual-audio mutual generation. In: Proceedings of Thirty-Second AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.12329"},{"key":"42_CR19","doi-asserted-by":"crossref","unstructured":"Hao, Z., Huang, X., Belongie, S.: Controllable video generation with sparse trajectories. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7854\u20137863 (2018)","DOI":"10.1109\/CVPR.2018.00819"},{"key":"42_CR20","unstructured":"Harwath, D., Torralba, A., Glass, J.: Unsupervised learning of spoken language with visual context. In: Proceedings of Advances in Neural Information Processing Systems, pp. 1858\u20131866 (2016)"},{"issue":"8","key":"42_CR21","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., Schmidhuber, J.: Long short-term memory. Neural Comput. 9(8), 1735\u20131780 (1997)","journal-title":"Neural Comput."},{"key":"42_CR22","unstructured":"Hsieh, J.T., Liu, B., Huang, D.A., Fei-Fei, L.F., Niebles, J.C.: Learning to decompose and disentangle representations for video prediction. In: Proceedings of Advances in Neural Information Processing Systems, pp. 517\u2013526 (2018)"},{"key":"42_CR23","unstructured":"Ioffe, S., Szegedy, C.: Batch normalization: accelerating deep network training by reducing internal covariate shift. In: Proceedings of International Conference on Machine Learning, pp. 448\u2013456 (2015)"},{"issue":"11","key":"42_CR24","doi-asserted-by":"publisher","first-page":"1767","DOI":"10.1007\/s11263-019-01150-y","volume":"127","author":"A Jamaludin","year":"2019","unstructured":"Jamaludin, A., Chung, J.S., Zisserman, A.: You said that?: Synthesising talking faces from audio. Int. J. Comput. Vis. 127(11), 1767\u20131779 (2019). https:\/\/doi.org\/10.1007\/s11263-019-01150-y","journal-title":"Int. J. Comput. Vis."},{"key":"42_CR25","unstructured":"Jia, X., De Brabandere, B., Tuytelaars, T., Gool, L.V.: Dynamic filter networks. In: Proceedings of Advances in Neural Information Processing Systems, pp. 667\u2013675 (2016)"},{"issue":"4","key":"42_CR26","doi-asserted-by":"publisher","first-page":"94","DOI":"10.1145\/3072959.3073658","volume":"36","author":"T Karras","year":"2017","unstructured":"Karras, T., Aila, T., Laine, S., Herva, A., Lehtinen, J.: Audio-driven facial animation by joint end-to-end learning of pose and emotion. ACM Trans. Graph. 36(4), 94 (2017)","journal-title":"ACM Trans. Graph."},{"key":"42_CR27","doi-asserted-by":"crossref","unstructured":"Kidron, E., Schechner, Y.Y., Elad, M.: Pixels that sound. In: Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition, vol. 1, pp. 88\u201395. IEEE (2005)","DOI":"10.1109\/CVPR.2005.274"},{"key":"42_CR28","doi-asserted-by":"crossref","unstructured":"Kim, D., Woo, S., Lee, J.Y., Kweon, I.S.: Deep video inpainting. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5792\u20135801 (2019)","DOI":"10.1109\/CVPR.2019.00594"},{"key":"42_CR29","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"42_CR30","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational Bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"42_CR31","unstructured":"Kolouri, S., Pope, P.E., Martin, C.E., Rohde, G.K.: Sliced-Wasserstein autoencoder: an embarrassingly simple generative model. arXiv preprint arXiv:1804.01947 (2018)"},{"key":"42_CR32","unstructured":"Lamb, A., Dumoulin, V., Courville, A.: Discriminative regularization for generative models. arXiv preprint arXiv:1602.03220 (2016)"},{"issue":"11","key":"42_CR33","doi-asserted-by":"publisher","first-page":"2278","DOI":"10.1109\/5.726791","volume":"86","author":"Y LeCun","year":"1998","unstructured":"LeCun, Y., Bottou, L., Bengio, Y., Haffner, P., et al.: Gradient-based learning applied to document recognition. Proc. IEEE 86(11), 2278\u20132324 (1998)","journal-title":"Proc. IEEE"},{"key":"42_CR34","doi-asserted-by":"crossref","unstructured":"Li, Y., Min, M.R., Shen, D., Carlson, D., Carin, L.: Video generation from text. In: Proceedings of Thirty-Second AAAI Conference on Artificial Intelligence (2018)","DOI":"10.1609\/aaai.v32i1.12233"},{"key":"42_CR35","doi-asserted-by":"crossref","unstructured":"Lindell, D.B., Wetzstein, G., Koltun, V.: Acoustic non-line-of-sight imaging. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6780\u20136789 (2019)","DOI":"10.1109\/CVPR.2019.00694"},{"key":"42_CR36","unstructured":"Liu, M.Y., Breuel, T., Kautz, J.: Unsupervised image-to-image translation networks. In: Proceedings of Advances in Neural Information Processing Systems, pp. 700\u2013708 (2017)"},{"key":"42_CR37","doi-asserted-by":"crossref","unstructured":"Luo, Z., Peng, B., Huang, D.A., Alahi, A., Fei-Fei, L.: Unsupervised learning of long-term motion dynamics for videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2203\u20132212 (2017)","DOI":"10.1109\/CVPR.2017.751"},{"key":"42_CR38","doi-asserted-by":"crossref","unstructured":"Oh, T.H., et al.: Speech2Face: learning the face behind a voice. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7539\u20137548 (2019)","DOI":"10.1109\/CVPR.2019.00772"},{"key":"42_CR39","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1007\/978-3-030-01231-1_39","volume-title":"Computer Vision \u2013 ECCV 2018","author":"A Owens","year":"2018","unstructured":"Owens, A., Efros, A.A.: Audio-visual scene analysis with self-supervised multisensory features. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11210, pp. 639\u2013658. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01231-1_39"},{"key":"42_CR40","doi-asserted-by":"crossref","unstructured":"Owens, A., Isola, P., McDermott, J., Torralba, A., Adelson, E.H., Freeman, W.T.: Visually indicated sounds. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2405\u20132413 (2016)","DOI":"10.1109\/CVPR.2016.264"},{"key":"42_CR41","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"801","DOI":"10.1007\/978-3-319-46448-0_48","volume-title":"Computer Vision \u2013 ECCV 2016","author":"A Owens","year":"2016","unstructured":"Owens, A., Wu, J., McDermott, J.H., Freeman, W.T., Torralba, A.: Ambient sound provides supervision for visual learning. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9905, pp. 801\u2013816. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46448-0_48"},{"key":"42_CR42","doi-asserted-by":"crossref","unstructured":"Pan, J., et al.: Video generation from single semantic label map. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3733\u20133742 (2019)","DOI":"10.1109\/CVPR.2019.00385"},{"key":"42_CR43","volume-title":"The work of the digestive glands","author":"IP Pavlov","year":"1910","unstructured":"Pavlov, I.P.: The work of the digestive glands. Charles Griffin, Limited, London (1910)"},{"key":"42_CR44","unstructured":"Ranzato, M., Szlam, A., Bruna, J., Mathieu, M., Collobert, R., Chopra, S.: Video (language) modeling: a baseline for generative models of natural videos. arXiv preprint arXiv:1412.6604 (2014)"},{"key":"42_CR45","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","volume-title":"Medical Image Computing and Computer-Assisted Intervention \u2013 MICCAI 2015","author":"O Ronneberger","year":"2015","unstructured":"Ronneberger, O., Fischer, P., Brox, T.: U-Net: convolutional networks for biomedical image segmentation. In: Navab, N., Hornegger, J., Wells, W.M., Frangi, A.F. (eds.) MICCAI 2015. LNCS, vol. 9351, pp. 234\u2013241. Springer, Cham (2015). https:\/\/doi.org\/10.1007\/978-3-319-24574-4_28"},{"key":"42_CR46","doi-asserted-by":"crossref","unstructured":"Saito, M., Matsumoto, E., Saito, S.: Temporal generative adversarial nets with singular value clipping. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2830\u20132839 (2017)","DOI":"10.1109\/ICCV.2017.308"},{"key":"42_CR47","doi-asserted-by":"crossref","unstructured":"Shlizerman, E., Dery, L., Schoen, H., Kemelmacher-Shlizerman, I.: Audio to body dynamics. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7574\u20137583 (2018)","DOI":"10.1109\/CVPR.2018.00790"},{"key":"42_CR48","unstructured":"Srivastava, N., Mansimov, E., Salakhudinov, R.: Unsupervised learning of video representations using LSTMs. In: Proceedings of International Conference on Machine Learning, pp. 843\u2013852 (2015)"},{"key":"42_CR49","unstructured":"Sutskever, I., Vinyals, O., Le, Q.V.: Sequence to sequence learning with neural networks. In: Proceedings of Advances in Neural Information Processing Systems, pp. 3104\u20133112 (2014)"},{"issue":"4","key":"42_CR50","doi-asserted-by":"publisher","first-page":"95","DOI":"10.1145\/3072959.3073640","volume":"36","author":"S Suwajanakorn","year":"2017","unstructured":"Suwajanakorn, S., Seitz, S.M., Kemelmacher-Shlizerman, I.: Synthesizing Obama: learning lip sync from audio. ACM Trans. Graph. 36(4), 95 (2017)","journal-title":"ACM Trans. Graph."},{"issue":"4","key":"42_CR51","doi-asserted-by":"publisher","first-page":"93","DOI":"10.1145\/3072959.3073699","volume":"36","author":"S Taylor","year":"2017","unstructured":"Taylor, S., et al.: A deep learning approach for generalized speech animation. ACM Trans. Graph. 36(4), 93 (2017)","journal-title":"ACM Trans. Graph."},{"key":"42_CR52","doi-asserted-by":"crossref","unstructured":"Tulyakov, S., Liu, M.Y., Yang, X., Kautz, J.: MoCoGAN: Decomposing motion and content for video generation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1526\u20131535 (2018)","DOI":"10.1109\/CVPR.2018.00165"},{"key":"42_CR53","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of Advances in Neural Information Processing Systems, pp. 5998\u20136008 (2017)"},{"key":"42_CR54","unstructured":"Villegas, R., Yang, J., Hong, S., Lin, X., Lee, H.: Decomposing motion and content for natural video sequence prediction. arXiv preprint arXiv:1706.08033 (2017)"},{"key":"42_CR55","unstructured":"Vondrick, C., Pirsiavash, H., Torralba, A.: Generating videos with scene dynamics. In: Proceedings of Advances in Neural Information Processing Systems, pp. 613\u2013621 (2016)"},{"key":"42_CR56","doi-asserted-by":"crossref","unstructured":"Vougioukas, K., Petridis, S., Pantic, M.: End-to-end speech-driven facial animation with temporal GANs. arXiv preprint arXiv:1805.09313 (2018)","DOI":"10.1007\/s11263-019-01251-8"},{"key":"42_CR57","doi-asserted-by":"crossref","unstructured":"Walker, J., Marino, K., Gupta, A., Hebert, M.: The pose knows: video forecasting by generating pose futures. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 3332\u20133341 (2017)","DOI":"10.1109\/ICCV.2017.361"},{"key":"42_CR58","doi-asserted-by":"crossref","unstructured":"Wan, C.H., Chuang, S.P., Lee, H.Y.: Towards audio to scene image synthesis using generative adversarial network. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing, pp. 496\u2013500. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8682383"},{"key":"42_CR59","unstructured":"Wang, T.C., et al.: Video-to-video synthesis. arXiv preprint arXiv:1808.06601 (2018)"},{"issue":"4","key":"42_CR60","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A.C., Sheikh, H.R., Simoncelli, E.P., et al.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004)","journal-title":"IEEE Trans. Image Process."},{"key":"42_CR61","doi-asserted-by":"crossref","unstructured":"Wu, J., et al.: Sliced Wasserstein generative models. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3713\u20133722 (2019)","DOI":"10.1109\/CVPR.2019.00383"},{"key":"42_CR62","unstructured":"Xue, T., Wu, J., Bouman, K., Freeman, B.: Visual dynamics: probabilistic future frame synthesis via cross convolutional networks. In: Proceedings of Advances in Neural Information Processing Systems, pp. 91\u201399 (2016)"},{"key":"42_CR63","doi-asserted-by":"crossref","unstructured":"Zhao, H., Gan, C., Ma, W., Torralba, A.: The sound of motions. CoRR abs\/1904.05979 (2019)","DOI":"10.1109\/ICCV.2019.00182"},{"key":"42_CR64","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"587","DOI":"10.1007\/978-3-030-01246-5_35","volume-title":"Computer Vision \u2013 ECCV 2018","author":"H Zhao","year":"2018","unstructured":"Zhao, H., Gan, C., Rouditchenko, A., Vondrick, C., McDermott, J., Torralba, A.: The sound of pixels. In: Ferrari, V., Hebert, M., Sminchisescu, C., Weiss, Y. (eds.) ECCV 2018. LNCS, vol. 11205, pp. 587\u2013604. Springer, Cham (2018). https:\/\/doi.org\/10.1007\/978-3-030-01246-5_35"},{"key":"42_CR65","doi-asserted-by":"crossref","unstructured":"Zhao, M., et al.: Through-wall human pose estimation using radio signals. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7356\u20137365 (2018)","DOI":"10.1109\/CVPR.2018.00768"},{"key":"42_CR66","doi-asserted-by":"crossref","unstructured":"Zhou, H., Liu, Z., Xu, X., Luo, P., Wang, X.: Vision-infused deep audio inpainting. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 283\u2013292 (2019)","DOI":"10.1109\/ICCV.2019.00037"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2020"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-58583-9_42","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,18]],"date-time":"2024-11-18T00:14:55Z","timestamp":1731888895000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-030-58583-9_42"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"ISBN":["9783030585822","9783030585839"],"references-count":66,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-58583-9_42","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2020]]},"assertion":[{"value":"19 November 2020","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Glasgow","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 August 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"16","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2020.eu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OpenReview","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5025","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1360","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"27% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"7","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"The conference was held virtually due to the COVID-19 pandemic. From the ECCV Workshops 249 full papers, 18 short papers, and 21 further contributions were published out of a total of 467 submissions.","order":10,"name":"additional_info_on_review_process","label":"Additional Info on Review Process","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}