{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T14:51:10Z","timestamp":1779375070047,"version":"3.53.1"},"publisher-location":"Cham","reference-count":70,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031198359","type":"print"},{"value":"9783031198366","type":"electronic"}],"license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022]]},"DOI":"10.1007\/978-3-031-19836-6_14","type":"book-chapter","created":{"date-parts":[[2022,10,21]],"date-time":"2022-10-21T09:04:58Z","timestamp":1666343098000},"page":"235-252","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Learning Visual Styles from\u00a0Audio-Visual Associations"],"prefix":"10.1007","author":[{"given":"Tingle","family":"Li","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yichen","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Andrew","family":"Owens","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hang","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,10,22]]},"reference":[{"key":"14_CR1","doi-asserted-by":"crossref","unstructured":"Afouras, T., Chung, J.S., Senior, A., Vinyals, O., Zisserman, A.: Deep audio-visual speech recognition. IEEE Trans. Pattern Anal. Mach. Intell. (2018)","DOI":"10.1109\/TPAMI.2018.2889052"},{"key":"14_CR2","doi-asserted-by":"crossref","unstructured":"Arandjelovic, R., Zisserman, A.: Look, listen and learn. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 609\u2013617 (2017)","DOI":"10.1109\/ICCV.2017.73"},{"key":"14_CR3","unstructured":"Asano, Y.M., Patrick, M., Rupprecht, C., Vedaldi, A.: Labelling unlabelled videos from scratch with multi-modal self-supervision. In: Advances in Neural Information Processing Systems (2020)"},{"key":"14_CR4","unstructured":"Bau, D., et al.: Paint by word. arXiv:2103.10951 (2021)"},{"key":"14_CR5","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"701","DOI":"10.1007\/978-3-030-58583-9_42","volume-title":"Computer Vision \u2013 ECCV 2020","author":"M Chatterjee","year":"2020","unstructured":"Chatterjee, M., Cherian, A.: Sound2Sight: generating visual dynamics from sound and context. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12372, pp. 701\u2013719. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58583-9_42"},{"key":"14_CR6","doi-asserted-by":"crossref","unstructured":"Chen, H., Xie, W., Afouras, T., Nagrani, A., Vedaldi, A., Zisserman, A.: Localizing visual sounds the hard way. In: Proceedings of the Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPR46437.2021.01659"},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"Chen, H., Xie, W., Vedaldi, A., Zisserman, A.: VGGSound: a large-scale audio-visual dataset. In: ICASSP 2020\u20132020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 721\u2013725. IEEE (2020)","DOI":"10.1109\/ICASSP40776.2020.9053174"},{"key":"14_CR8","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.E.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607 (2020)"},{"key":"14_CR9","unstructured":"Chen, Z., Hu, X., Owens, A.: Structure from silence: learning scene structure from ambient sound. In: 5th Annual Conference on Robot Learning (2021)"},{"key":"14_CR10","doi-asserted-by":"publisher","first-page":"161981","DOI":"10.1109\/ACCESS.2020.3019084","volume":"8","author":"KW Cheuk","year":"2020","unstructured":"Cheuk, K.W., Anderson, H., Agres, K., Herremans, D.: nnAudio: an on-the-fly GPU audio to spectrogram conversion toolbox using 1D convolutional neural networks. IEEE Access 8, 161981\u2013162003 (2020)","journal-title":"IEEE Access"},{"key":"14_CR11","unstructured":"Chung, J.S., Jamaludin, A., Zisserman, A.: You said that? In: British Machine Vision Conference (2017)"},{"key":"14_CR12","doi-asserted-by":"crossref","unstructured":"Cramer, J., Wu, H.H., Salamon, J., Bello, J.P.: Look, listen, and learn more: design choices for deep audio embeddings. In: ICASSP 2019\u20132019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 3852\u20133856. IEEE (2019)","DOI":"10.1109\/ICASSP.2019.8682475"},{"key":"14_CR13","doi-asserted-by":"crossref","unstructured":"Dong, H., Yu, S., Wu, C., Guo, Y.: Semantic image synthesis via adversarial learning. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5706\u20135714 (2017)","DOI":"10.1109\/ICCV.2017.608"},{"key":"14_CR14","doi-asserted-by":"crossref","unstructured":"Ephrat, A., et al.: Looking to listen at the cocktail party: a speaker-independent audio-visual model for speech separation. ACM Trans. Graph. (TOG) 37(4) (2016)","DOI":"10.1145\/3197517.3201357"},{"key":"14_CR15","unstructured":"Fu, T.J., Wang, X.E., Wang, W.Y.: Language-driven image style transfer. arXiv preprint arXiv:2106.00178 (2021)"},{"key":"14_CR16","doi-asserted-by":"crossref","unstructured":"Gan, C., Huang, D., Zhao, H., Tenenbaum, J.B., Torralba, A.: Music gesture for visual sound separation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10478\u201310487 (2020)","DOI":"10.1109\/CVPR42600.2020.01049"},{"key":"14_CR17","doi-asserted-by":"crossref","unstructured":"Gao, R., Feris, R., Grauman, K.: Learning to separate object sounds by watching unlabeled video. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 35\u201353 (2018)","DOI":"10.1007\/978-3-030-01219-9_3"},{"key":"14_CR18","doi-asserted-by":"crossref","unstructured":"Gao, R., Grauman, K.: 2.5 D visual sound. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 324\u2013333 (2019)","DOI":"10.1109\/CVPR.2019.00041"},{"key":"14_CR19","doi-asserted-by":"crossref","unstructured":"Gatys, L.A., Ecker, A.S., Bethge, M.: A neural algorithm of artistic style. arXiv preprint arXiv:1508.06576 (2015)","DOI":"10.1167\/16.12.326"},{"key":"14_CR20","doi-asserted-by":"crossref","unstructured":"Gemmeke, J.F., et al.: Audio set: an ontology and human-labeled dataset for audio events. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 776\u2013780. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"14_CR21","doi-asserted-by":"crossref","unstructured":"Ginosar, S., Bar, A., Kohavi, G., Chan, C., Owens, A., Malik, J.: Learning individual styles of conversational gesture. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3497\u20133506 (2019)","DOI":"10.1109\/CVPR.2019.00361"},{"key":"14_CR22","unstructured":"Goodfellow, I., et al.: Generative adversarial networks. In: Advances in Neural Information Processing Systems, pp. 2672\u20132680 (2014)"},{"key":"14_CR23","unstructured":"Gutmann, M., Hyv\u00e4rinen, A.: Noise-contrastive estimation: a new estimation principle for unnormalized statistical models. In: Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics, pp. 297\u2013304 (2010)"},{"key":"14_CR24","doi-asserted-by":"crossref","unstructured":"Harwath, D., Recasens, A., Sur\u00eds, D., Chuang, G., Torralba, A., Glass, J.: Jointly discovering visual objects and spoken words from raw sensory input. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 649\u2013665 (2018)","DOI":"10.1007\/978-3-030-01231-1_40"},{"key":"14_CR25","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"14_CR26","doi-asserted-by":"crossref","unstructured":"Hershey, S., et al.: CNN architectures for large-scale audio classification. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 131\u2013135. IEEE (2017)","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"14_CR27","doi-asserted-by":"crossref","unstructured":"Hertzmann, A., Jacobs, C.E., Oliver, N., Curless, B., Salesin, D.H.: Image analogies. In: Proceedings of the 28th Annual Conference on Computer Graphics and Interactive Techniques, pp. 327\u2013340 (2001)","DOI":"10.1145\/383259.383295"},{"key":"14_CR28","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local nash equilibrium. In: Advances in Neural Information Processing Systems (2017)"},{"key":"14_CR29","unstructured":"Hu, C., Tian, Q., Li, T., Wang, Y., Wang, Y., Zhao, H.: Neural dubber: dubbing for videos according to scripts. In: Advances in Neural Information Processing Systems (2021)"},{"key":"14_CR30","doi-asserted-by":"crossref","unstructured":"Huang, X., Belongie, S.: Arbitrary style transfer in real-time with adaptive instance normalization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1501\u20131510 (2017)","DOI":"10.1109\/ICCV.2017.167"},{"key":"14_CR31","unstructured":"Iashin, V., Rahtu, E.: Taming visually guided sound generation. arXiv preprint arXiv:2110.08791 (2021)"},{"key":"14_CR32","doi-asserted-by":"crossref","unstructured":"Isola, P., Zhu, J.Y., Zhou, T., Efros, A.A.: Image-to-image translation with conditional adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1125\u20131134 (2017)","DOI":"10.1109\/CVPR.2017.632"},{"key":"14_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"694","DOI":"10.1007\/978-3-319-46475-6_43","volume-title":"Computer Vision \u2013 ECCV 2016","author":"J Johnson","year":"2016","unstructured":"Johnson, J., Alahi, A., Fei-Fei, L.: Perceptual losses for real-time style transfer and super-resolution. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9906, pp. 694\u2013711. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46475-6_43"},{"key":"14_CR34","doi-asserted-by":"crossref","unstructured":"Johnson, J., Gupta, A., Fei-Fei, L.: Image generation from scene graphs. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1219\u20131228 (2018)","DOI":"10.1109\/CVPR.2018.00133"},{"key":"14_CR35","unstructured":"Kim, T., Cha, M., Kim, H., Lee, J.K., Kim, J.: Learning to discover cross-domain relations with generative adversarial networks. In: International Conference on Machine Learning, pp. 1857\u20131865. PMLR (2017)"},{"key":"14_CR36","unstructured":"Kingma, D.P., Ba, J.: Adam: a method for stochastic optimization. In: International Conference for Learning Representations (2015)"},{"key":"14_CR37","unstructured":"Korbar, B., Tran, D., Torresani, L.: Cooperative learning of audio and video models from self-supervised synchronization. In: Proceedings of the Advances in Neural Information Processing Systems (2018)"},{"issue":"4","key":"14_CR38","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2601097.2601101","volume":"33","author":"PY Laffont","year":"2014","unstructured":"Laffont, P.Y., Ren, Z., Tao, X., Qian, C., Hays, J.: Transient attributes for high-level understanding and editing of outdoor scenes. ACM Trans. Graph. (TOG) 33(4), 1\u201311 (2014)","journal-title":"ACM Trans. Graph. (TOG)"},{"issue":"4","key":"14_CR39","first-page":"1","volume":"33","author":"TR Langlois","year":"2014","unstructured":"Langlois, T.R., James, D.L.: Inverse-foley animation: synchronizing rigid-body motions to sound. ACM Trans. Graph. (TOG) 33(4), 1\u201311 (2014)","journal-title":"ACM Trans. Graph. (TOG)"},{"key":"14_CR40","unstructured":"Lee, S.H., et al.: Sound-guided semantic image manipulation. arXiv preprint arXiv:2112.00007 (2021)"},{"key":"14_CR41","doi-asserted-by":"crossref","unstructured":"Levine, S., Kr\u00e4henb\u00fchl, P., Thrun, S., Koltun, V.: Gesture controllers. In: ACM SIGGRAPH, pp. 1\u201311 (2010)","DOI":"10.1145\/1778765.1778861"},{"key":"14_CR42","doi-asserted-by":"crossref","unstructured":"Mahajan, D., et al.: Exploring the limits of weakly supervised pretraining. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 181\u2013196 (2018)","DOI":"10.1007\/978-3-030-01216-8_12"},{"key":"14_CR43","unstructured":"Mikolov, T., Chen, K., Corrado, G., Dean, J.: Efficient estimation of word representations in vector space. arXiv preprint arXiv:1301.3781 (2013)"},{"key":"14_CR44","unstructured":"Morgado, P., Vasconcelos, N., Langlois, T., Wang, O.: Self-supervised generation of spatial audio for 360 video. In: Advances in Neural Information Processing Systems (2018)"},{"key":"14_CR45","doi-asserted-by":"crossref","unstructured":"Morgado, P., Vasconcelos, N., Misra, I.: Audio-visual instance discrimination with cross-modal agreement. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12475\u201312486 (2021)","DOI":"10.1109\/CVPR46437.2021.01229"},{"key":"14_CR46","unstructured":"Nam, S., Kim, Y., Kim, S.J.: Text-adaptive generative adversarial networks: manipulating images with natural language. In: Advances in Neural Information Processing Systems (2018)"},{"key":"14_CR47","unstructured":"Ngiam, J., Khosla, A., Kim, M., Nam, J., Lee, H., Ng, A.Y.: Multimodal deep learning. In: ICML (2011)"},{"key":"14_CR48","doi-asserted-by":"crossref","unstructured":"Owens, A., Efros, A.A.: Audio-visual scene analysis with self-supervised multisensory features. In: Proceedings of the European Conference on Computer Vision (2018)","DOI":"10.1007\/978-3-030-01231-1_39"},{"key":"14_CR49","doi-asserted-by":"crossref","unstructured":"Owens, A., Isola, P., McDermott, J., Torralba, A., Adelson, E.H., Freeman, W.T.: Visually indicated sounds. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2405\u20132413 (2016)","DOI":"10.1109\/CVPR.2016.264"},{"key":"14_CR50","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1007\/978-3-030-58545-7_19","volume-title":"Computer Vision \u2013 ECCV 2020","author":"T Park","year":"2020","unstructured":"Park, T., Efros, A.A., Zhang, R., Zhu, J.-Y.: Contrastive learning for unpaired image-to-image translation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12354, pp. 319\u2013345. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58545-7_19"},{"key":"14_CR51","unstructured":"Plakal, M., Ellis, D.: YAMNet, January 2020. https:\/\/github.com\/tensorflow\/models\/tree\/master\/research\/audioset\/yamnet"},{"key":"14_CR52","doi-asserted-by":"crossref","unstructured":"Prajwal, K., Mukhopadhyay, R., Namboodiri, V.P., Jawahar, C.: Learning individual speaking styles for accurate lip to speech synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13796\u201313805 (2020)","DOI":"10.1109\/CVPR42600.2020.01381"},{"key":"14_CR53","doi-asserted-by":"crossref","unstructured":"Prajwal, K., Mukhopadhyay, R., Namboodiri, V.P., Jawahar, C.: A lip sync expert is all you need for speech to lip generation in the wild. In: Proceedings of the 28th ACM International Conference on Multimedia, pp. 484\u2013492 (2020)","DOI":"10.1145\/3394171.3413532"},{"key":"14_CR54","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning (2021)"},{"key":"14_CR55","unstructured":"Ramesh, A., et al.: Zero-shot text-to-image generation. arXiv preprint arXiv:2102.12092 (2021)"},{"key":"14_CR56","unstructured":"Reed, S., Akata, Z., Yan, X., Logeswaran, L., Schiele, B., Lee, H.: Generative adversarial text to image synthesis. In: International Conference on Machine Learning, pp. 1060\u20131069 (2016)"},{"key":"14_CR57","unstructured":"de Sa, V.R.: Learning classification with unlabeled data. In: Advances in Neural Information Processing Systems, pp. 112\u2013119. Citeseer (1994)"},{"key":"14_CR58","doi-asserted-by":"crossref","unstructured":"Shlizerman, E., Dery, L., Schoen, H., Kemelmacher-Shlizerman, I.: Audio to body dynamics. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7574\u20137583 (2018)","DOI":"10.1109\/CVPR.2018.00790"},{"issue":"6","key":"14_CR59","doi-asserted-by":"publisher","first-page":"1247","DOI":"10.1162\/089976600300015349","volume":"12","author":"JB Tenenbaum","year":"2000","unstructured":"Tenenbaum, J.B., Freeman, W.T.: Separating style and content with bilinear models. Neural Comput. 12(6), 1247\u20131283 (2000)","journal-title":"Neural Comput."},{"key":"14_CR60","doi-asserted-by":"crossref","unstructured":"Wang, W., Tran, D., Feiszli, M.: What makes training multi-modal classification networks hard? In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12695\u201312705 (2020)","DOI":"10.1109\/CVPR42600.2020.01271"},{"key":"14_CR61","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1007\/978-3-030-58452-8_4","volume-title":"Computer Vision \u2013 ECCV 2020","author":"C Wu","year":"2020","unstructured":"Wu, C., Timm, M., Maji, S.: Describing textures using natural language. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12346, pp. 52\u201370. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58452-8_4"},{"key":"14_CR62","doi-asserted-by":"crossref","unstructured":"Yang, K., Russell, B., Salamon, J.: Telling left from right: learning spatial correspondence of sight and sound. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9932\u20139941 (2020)","DOI":"10.1109\/CVPR42600.2020.00995"},{"key":"14_CR63","doi-asserted-by":"crossref","unstructured":"Yi, Z., Zhang, H., Tan, P., Gong, M.: DualGAN: unsupervised dual learning for image-to-image translation. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2849\u20132857 (2017)","DOI":"10.1109\/ICCV.2017.310"},{"key":"14_CR64","doi-asserted-by":"crossref","unstructured":"Zhang, Z., et al.: Generative modeling of audible shapes for object perception. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 1251\u20131260 (2017)","DOI":"10.1109\/ICCV.2017.141"},{"key":"14_CR65","doi-asserted-by":"crossref","unstructured":"Zhao, H., Gan, C., Ma, W.C., Torralba, A.: The sound of motions. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1735\u20131744 (2019)","DOI":"10.1109\/ICCV.2019.00182"},{"key":"14_CR66","doi-asserted-by":"crossref","unstructured":"Zhao, H., Gan, C., Rouditchenko, A., Vondrick, C., McDermott, J., Torralba, A.: The sound of pixels. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 570\u2013586 (2018)","DOI":"10.1007\/978-3-030-01246-5_35"},{"issue":"6","key":"14_CR67","doi-asserted-by":"publisher","first-page":"1452","DOI":"10.1109\/TPAMI.2017.2723009","volume":"40","author":"B Zhou","year":"2017","unstructured":"Zhou, B., Lapedriza, A., Khosla, A., Oliva, A., Torralba, A.: Places: a 10 million image database for scene recognition. IEEE Trans. Pattern Anal. Mach. Intell. 40(6), 1452\u20131464 (2017)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"14_CR68","doi-asserted-by":"crossref","unstructured":"Zhou, H., Liu, Y., Liu, Z., Luo, P., Wang, X.: Talking face generation by adversarially disentangled audio-visual representation. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 9299\u20139306 (2019)","DOI":"10.1609\/aaai.v33i01.33019299"},{"key":"14_CR69","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Wang, Z., Fang, C., Bui, T., Berg, T.L.: Visual to sound: generating natural sound for videos in the wild. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3550\u20133558 (2018)","DOI":"10.1109\/CVPR.2018.00374"},{"key":"14_CR70","doi-asserted-by":"crossref","unstructured":"Zhu, J.Y., Park, T., Isola, P., Efros, A.A.: Unpaired image-to-image translation using cycle-consistent adversarial networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 2223\u20132232 (2017)","DOI":"10.1109\/ICCV.2017.244"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2022"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-19836-6_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,10,24]],"date-time":"2022-10-24T23:07:58Z","timestamp":1666652878000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-19836-6_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"ISBN":["9783031198359","9783031198366"],"references-count":70,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-19836-6_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]},"assertion":[{"value":"22 October 2022","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tel Aviv","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Israel","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2022","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23 October 2022","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27 October 2022","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"17","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2022","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2022.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"CMT","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"5804","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"1645","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"28% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.21","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3.91","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}