{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,25]],"date-time":"2026-06-25T02:02:07Z","timestamp":1782352927760,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,10,23]],"date-time":"2017-10-23T00:00:00Z","timestamp":1508716800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2017,10,23]]},"DOI":"10.1145\/3126686.3126723","type":"proceedings-article","created":{"date-parts":[[2017,10,23]],"date-time":"2017-10-23T19:20:32Z","timestamp":1508786432000},"page":"349-357","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":155,"title":["Deep Cross-Modal Audio-Visual Generation"],"prefix":"10.1145","author":[{"given":"Lele","family":"Chen","sequence":"first","affiliation":[{"name":"University of Rochester, Rochester, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sudhanshu","family":"Srivastava","sequence":"additional","affiliation":[{"name":"University of Rochester, Rochester, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Zhiyao","family":"Duan","sequence":"additional","affiliation":[{"name":"University of Rochester, Rochester, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chenliang","family":"Xu","sequence":"additional","affiliation":[{"name":"University of Rochester, Rochester, NY, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2017,10,23]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Advances in Neural Information Processing Systems Workshop.","author":"Behpour Sima","year":"2016","unstructured":"Sima Behpour and Brian D Ziebart . 2016 . Adversarial methods improve object localization . Advances in Neural Information Processing Systems Workshop. Sima Behpour and Brian D Ziebart. 2016. Adversarial methods improve object localization. Advances in Neural Information Processing Systems Workshop."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_4"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1016\/0028-3932(73)90060-2"},{"key":"e_1_3_2_1_4_1","volume-title":"Imagenet: A large-scale hierarchical image database IEEE Conference on Computer Vision and Pattern Recognition.","author":"Deng Jia","year":"2009","unstructured":"Jia Deng , Wei Dong , Richard Socher , Li-Jia Li , Kai Li , and Li Fei-Fei . 2009 . Imagenet: A large-scale hierarchical image database IEEE Conference on Computer Vision and Pattern Recognition. Jia Deng, Wei Dong, Richard Socher, Li-Jia Li, Kai Li, and Li Fei-Fei. 2009. Imagenet: A large-scale hierarchical image database IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_5_1","unstructured":"Emily Denton Soumith Chintala Arthur Szlam and Rob Fergus. 2015. Deep generative image models using a Laplacian pyramid of adversarial networks Advances in Neural Information Processing Systems.   Emily Denton Soumith Chintala Arthur Szlam and Rob Fergus. 2015. Deep generative image models using a Laplacian pyramid of adversarial networks Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2654902"},{"key":"e_1_3_2_1_7_1","unstructured":"Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative adversarial nets. In Advances in Neural Information Processing Systems.   Ian Goodfellow Jean Pouget-Abadie Mehdi Mirza Bing Xu David Warde-Farley Sherjil Ozair Aaron Courville and Yoshua Bengio. 2014. Generative adversarial nets. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_8_1","volume-title":"Efros","author":"Isola Phillip","year":"2017","unstructured":"Phillip Isola , Jun-Yan Zhu , Tinghui Zhou , and Alexei A . Efros . 2017 . Image-to-image translation with conditional adversarial networks IEEE Conference on Computer Vision and Pattern Recognition . Phillip Isola, Jun-Yan Zhu, Tinghui Zhou, and Alexei A. Efros. 2017. Image-to-image translation with conditional adversarial networks IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"crossref","unstructured":"S. Kumar V. Dhiman and J. J. Corso. 2014. Learning compositional sparse models of bimodal percepts AAAI Conference on Artificial Intelligence.   S. Kumar V. Dhiman and J. J. Corso. 2014. Learning compositional sparse models of bimodal percepts AAAI Conference on Artificial Intelligence.","DOI":"10.1609\/aaai.v28i1.8753"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952688"},{"key":"e_1_3_2_1_11_1","unstructured":"Bochen Li Xinzhao Liu Karthik Dinesh Zhiyao Duan and Gaurav Sharma. 2016. Creating a classical musical performance dataset for multimodal music analysis: Challenges Insights and Applications. In arXiv:1612.08727.  Bochen Li Xinzhao Liu Karthik Dinesh Zhiyao Duan and Gaurav Sharma. 2016. Creating a classical musical performance dataset for multimodal music analysis: Challenges Insights and Applications. In arXiv:1612.08727."},{"key":"e_1_3_2_1_12_1","unstructured":"Pauline Luc Camille Couprie Soumith Chintala and Jakob Verbeek. 2016. Semantic segmentation using adversarial networks. arXiv:1611.08408.  Pauline Luc Camille Couprie Soumith Chintala and Jakob Verbeek. 2016. Semantic segmentation using adversarial networks. arXiv:1611.08408."},{"key":"e_1_3_2_1_13_1","volume-title":"International Conference on Learning Representations.","author":"Makhzani Alireza","year":"2016","unstructured":"Alireza Makhzani , Jonathon Shlens , Navdeep Jaitly , Ian Goodfellow , and Brendan Frey . 2016 . Adversarial autoencoders . In International Conference on Learning Representations. Alireza Makhzani, Jonathon Shlens, Navdeep Jaitly, Ian Goodfellow, and Brendan Frey. 2016. Adversarial autoencoders. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/259964.260075"},{"key":"e_1_3_2_1_15_1","unstructured":"Mehdi Mirza and Simon Osindero. 2014. Conditional generative adversarial nets. In arXiv:1411.1784.  Mehdi Mirza and Simon Osindero. 2014. Conditional generative adversarial nets. In arXiv:1411.1784."},{"key":"e_1_3_2_1_16_1","volume-title":"International Conference on Machine Learning.","author":"Ngiam Jiquan","unstructured":"Jiquan Ngiam , Aditya Khosla , Mingyu Kim , Juhan Nam , Honglak Lee , and Andrew Y. Ng . 2011. Multimodal deep learning . In International Conference on Machine Learning. Jiquan Ngiam, Aditya Khosla, Mingyu Kim, Juhan Nam, Honglak Lee, and Andrew Y. Ng. 2011. Multimodal deep learning. In International Conference on Machine Learning."},{"key":"e_1_3_2_1_17_1","volume-title":"IEEE Conference on Computer Vision and Pattern Recognition.","author":"Owens Andrew","unstructured":"Andrew Owens , Phillip Isola , Josh McDermott , Antonio Torralba , Edward H. Adelson , and William T. Freeman . 2016. Visually indicated sounds . In IEEE Conference on Computer Vision and Pattern Recognition. Andrew Owens, Phillip Isola, Josh McDermott, Antonio Torralba, Edward H. Adelson, and William T. Freeman. 2016. Visually indicated sounds. In IEEE Conference on Computer Vision and Pattern Recognition."},{"key":"e_1_3_2_1_18_1","volume-title":"SEGAN: Speech Enhancement Generative Adversarial Network arXiv:1703.09452.","author":"Pascual Santiago","year":"2017","unstructured":"Santiago Pascual , Antonio Bonafonte , and Joan Serr\u00e0 . 2017 . SEGAN: Speech Enhancement Generative Adversarial Network arXiv:1703.09452. Santiago Pascual, Antonio Bonafonte, and Joan Serr\u00e0. 2017. SEGAN: Speech Enhancement Generative Adversarial Network arXiv:1703.09452."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.142"},{"key":"e_1_3_2_1_20_1","unstructured":"Alec Radford Luke Metz and Soumith Chintala. 2015. Unsupervised representation learning with deep convolutional generative adversarial networks International Conference on Learning Representations.  Alec Radford Luke Metz and Soumith Chintala. 2015. Unsupervised representation learning with deep convolutional generative adversarial networks International Conference on Learning Representations."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1873987"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"crossref","unstructured":"Scott Reed Zeynep Akata Honglak Lee and Bernt Schiele. 2016. Learning deep representations of fine-grained visual descriptions IEEE Conference on Computer Vision and Pattern Recognition.  Scott Reed Zeynep Akata Honglak Lee and Bernt Schiele. 2016. Learning deep representations of fine-grained visual descriptions IEEE Conference on Computer Vision and Pattern Recognition.","DOI":"10.1109\/CVPR.2016.13"},{"key":"e_1_3_2_1_23_1","volume-title":"International Conference on Machine Learning.","author":"Reed Scott","year":"2016","unstructured":"Scott Reed , Zeynep Akata , Xinchen Yan , Lajanugen Logeswaran , Bernt Schiele , and Honglak Lee . 2016 . Generative adversarial text-to-image synthesis . In International Conference on Machine Learning. Scott Reed, Zeynep Akata, Xinchen Yan, Lajanugen Logeswaran, Bernt Schiele, and Honglak Lee. 2016. Generative adversarial text-to-image synthesis. In International Conference on Machine Learning."},{"key":"e_1_3_2_1_24_1","unstructured":"Tim Salimans Ian Goodfellow Wojciech Zaremba Vicki Cheung Alec Radford and Xi Chen. 2016. Improved techniques for training GANs. In Advances in Neural Information Processing Systems.  Tim Salimans Ian Goodfellow Wojciech Zaremba Vicki Cheung Alec Radford and Xi Chen. 2016. Improved techniques for training GANs. In Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"crossref","unstructured":"Nasim Souly Concetto Spampinato and Mubarak Shah. 2017. Semi and Weakly Supervised Semantic Segmentation Using Generative Adversarial Networks arXiv:1703.09695.  Nasim Souly Concetto Spampinato and Mubarak Shah. 2017. Semi and Weakly Supervised Semantic Segmentation Using Generative Adversarial Networks arXiv:1703.09695.","DOI":"10.1109\/ICCV.2017.606"},{"key":"e_1_3_2_1_26_1","volume-title":"Salakhutdinov","author":"Srivastava Nitish","year":"2012","unstructured":"Nitish Srivastava and Ruslan R . Salakhutdinov . 2012 . Multimodal learning with deep Boltzmann machines. Advances in Neural Information Processing Systems . Nitish Srivastava and Ruslan R. Salakhutdinov. 2012. Multimodal learning with deep Boltzmann machines. Advances in Neural Information Processing Systems."},{"key":"e_1_3_2_1_27_1","volume-title":"The merging of the senses","author":"Stein Barry E.","unstructured":"Barry E. Stein and M. Alex Meredith . 1993. The merging of the senses . The MIT Press . Barry E. Stein and M. Alex Meredith. 1993. The merging of the senses. The MIT Press."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806350"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.cognition.2005.09.003"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1037\/0096-1523.26.5.1583"},{"key":"e_1_3_2_1_32_1","unstructured":"Kaiye Wang Qiyue Yin Wei Wang Shu Wu and Liang Wang. 2016. A Comprehensive Survey on Cross-modal Retrieval. arXiv:1607.06215.  Kaiye Wang Qiyue Yin Wei Wang Shu Wu and Liang Wang. 2016. A Comprehensive Survey on Cross-modal Retrieval. arXiv:1607.06215."},{"key":"e_1_3_2_1_33_1","unstructured":"Hang Zhang and Kristin Dana. 2017. Multi-style generative network for real-time transfer arXiv:1703.06953.  Hang Zhang and Kristin Dana. 2017. Multi-style generative network for real-time transfer arXiv:1703.06953."}],"event":{"name":"MM '17: ACM Multimedia Conference","location":"Mountain View California USA","acronym":"MM '17","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the on Thematic Workshops of ACM Multimedia 2017"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3126686.3126723","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3126686.3126723","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T02:10:53Z","timestamp":1750212653000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3126686.3126723"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,10,23]]},"references-count":32,"alternative-id":["10.1145\/3126686.3126723","10.1145\/3126686"],"URL":"https:\/\/doi.org\/10.1145\/3126686.3126723","relation":{},"subject":[],"published":{"date-parts":[[2017,10,23]]},"assertion":[{"value":"2017-10-23","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}