{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T18:50:36Z","timestamp":1755802236848,"version":"3.44.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,30]],"date-time":"2024-05-30T00:00:00Z","timestamp":1717027200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,30]]},"DOI":"10.1145\/3652583.3657586","type":"proceedings-article","created":{"date-parts":[[2024,6,7]],"date-time":"2024-06-07T06:30:40Z","timestamp":1717741840000},"page":"1207-1213","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Mapping the Audio Landscape for Innovative Music Sample Generation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4903-3933","authenticated-orcid":false,"given":"Christian","family":"Limberg","sequence":"first","affiliation":[{"name":"National Institute of Informatics, Tokyo, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1003-6093","authenticated-orcid":false,"given":"Zhe","family":"Zhang","sequence":"additional","affiliation":[{"name":"National Institute of Informatics, Tokyo, Japan"}]}],"member":"320","published-online":{"date-parts":[[2024,6,7]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","unstructured":"Andrea Agostinelli Timo I. Denk Zal\u00e1n Borsos Jesse Engel Mauro Verzetti Antoine Caillon Qingqing Huang Aren Jansen Adam Roberts Marco Tagliasacchi Matt Sharifi Neil Zeghidour and Christian Frank. 2023. MusicLM : Generating Music From Text. https:\/\/doi.org\/10.48550\/arXiv.2301.11325 arxiv: 2301.11325 [cs eess]","DOI":"10.48550\/arXiv.2301.11325"},{"key":"e_1_3_2_1_2_1","unstructured":"Cyran Aouameur P. Esling and Ga\u00ebtan Hadjeres. 2019. Neural Drum Machine: An Interactive System for Real-Time Synthesis of Drum Sounds. (2019)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","unstructured":"Zal\u00e1n Borsos Rapha\u00ebl Marinier Damien Vincent Eugene Kharitonov Olivier Pietquin Matt Sharifi Dominik Roblek Olivier Teboul David Grangier Marco Tagliasacchi and Neil Zeghidour. 2023. AudioLM : A Language Modeling Approach to Audio Generation. https:\/\/doi.org\/10.48550\/arXiv.2209.03143 arxiv: 2209.03143 [cs eess]","DOI":"10.48550\/arXiv.2209.03143"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-018-3813-6"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2111.05011"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9415047"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2009.00713"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","unstructured":"Jade Copet Felix Kreuk Itai Gat Tal Remez David Kant Gabriel Synnaeve Yossi Adi and Alexandre D\u00e9fossez. 2024. Simple and Controllable Music Generation. https:\/\/doi.org\/10.48550\/arXiv.2306.05284 arxiv: 2306.05284 [cs eess]","DOI":"10.48550\/arXiv.2306.05284"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096670"},{"key":"e_1_3_2_1_10_1","unstructured":"Chris Donahue Julian McAuley and M. Puckette. 2018a. Adversarial Audio Synthesis. (2018)."},{"key":"e_1_3_2_1_11_1","unstructured":"Chris Donahue Julian McAuley and M. Puckette. 2018b. Synthesizing Audio with Generative Adversarial Networks. ArXiv Vol. abs\/1802.04208 (2018) null."},{"key":"e_1_3_2_1_12_1","unstructured":"Jake Drysdale Maciej Tomczak and Jason Hockman. 2020. Adversarial Synthesis of Drum Sounds. (2020)."},{"key":"e_1_3_2_1_13_1","unstructured":"Alexandre D\u00e9fossez Jade Copet Gabriel Synnaeve and Yossi Adi. 2022. High Fidelity Neural Audio Compression. arxiv: 2210.13438 [eess.AS]"},{"key":"e_1_3_2_1_14_1","volume-title":"GANSynth: Adversarial Neural Audio Synthesis. In International Conference on Learning Representations.","author":"Engel Jesse","year":"2018","unstructured":"Jesse Engel, Kumar Krishna Agrawal, Shuo Chen, Ishaan Gulrajani, Chris Donahue, and Adam Roberts. 2018. GANSynth: Adversarial Neural Audio Synthesis. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_15_1","volume-title":"Proceedings of the 34th International Conference on Machine Learning -","volume":"70","author":"Engel Jesse","year":"2017","unstructured":"Jesse Engel, Cinjon Resnick, Adam Roberts, Sander Dieleman, Mohammad Norouzi, Douglas Eck, and Karen Simonyan. 2017. Neural Audio Synthesis of Musical Notes with WaveNet Autoencoders. In Proceedings of the 34th International Conference on Machine Learning - Volume 70. 1068--1077."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","unstructured":"Chitralekha Gupta Purnima Kamath and L. Wyse. 2021. Signal Representations for Synthesizing Audio Textures with Generative Adversarial Networks. ArXiv Vol. abs\/2103.07390 (2021) null. https:\/\/doi.org\/10.5281\/zenodo.5054145","DOI":"10.5281\/zenodo.5054145"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","unstructured":"Martin Heusel Hubert Ramsauer Thomas Unterthiner Bernhard Nessler and Sepp Hochreiter. 2018. GANs Trained by a Two Time-Scale Update Rule Converge to a Local Nash Equilibrium. https:\/\/doi.org\/10.48550\/arXiv.1706.08500 arxiv: 1706.08500 [cs stat]","DOI":"10.48550\/arXiv.1706.08500"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","unstructured":"Rongjie Huang Max W. Y. Lam Jun Wang Dan Su Dong Yu Yi Ren and Zhou Zhao. 2022. FastDiff: A Fast Conditional Diffusion Model for High-Quality Speech Synthesis. https:\/\/doi.org\/10.48550\/arXiv.2204.09934 arxiv: 2204.09934 [cs eess]","DOI":"10.48550\/arXiv.2204.09934"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2011.06801"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1312.6114"},{"key":"e_1_3_2_1_21_1","volume-title":"DiffWave: A Versatile Diffusion Model for Audio Synthesis. ArXiv","author":"Kong Zhifeng","year":"2020","unstructured":"Zhifeng Kong, Wei Ping, Jiaji Huang, Kexin Zhao, and Bryan Catanzaro. 2020. DiffWave: A Versatile Diffusion Model for Audio Synthesis. ArXiv, Vol. abs\/2009.09761 (2020), null."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","unstructured":"Felix Kreuk Gabriel Synnaeve Adam Polyak Uriel Singer Alexandre D\u00e9fossez Jade Copet Devi Parikh Yaniv Taigman and Yossi Adi. 2023. AudioGen: Textually Guided Audio Generation. https:\/\/doi.org\/10.48550\/arXiv.2209.15352 arxiv: 2209.15352 [cs eess]","DOI":"10.48550\/arXiv.2209.15352"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","unstructured":"Gaku Narita Junichi Shimizu and Taketo Akama. 2023. GANStrument: Adversarial Instrument Sound Synthesis with Pitch-Invariant Instance Conditioning. In ICASSP 2023 - 2023 IEEE International Conference on Acoustics Speech and Signal Processing (ICASSP ). 1--5. https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10097250","DOI":"10.1109\/ICASSP49357.2023.10097250"},{"key":"e_1_3_2_1_24_1","unstructured":"J. Nistal S. Lattner and G. Richard. 2021. DarkGAN : Exploiting Knowledge Distillation for Comprehensible Audio Synthesis with GANs. (2021)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","unstructured":"J. Nistal S. Lattner and G. Richard. 2022. DrumGAN: Synthesis of Drum Sounds With Timbral Feature Conditioning Using Generative Adversarial Networks. https:\/\/doi.org\/10.48550\/arXiv.2008.12073 arxiv: 2008.12073 [cs eess]","DOI":"10.48550\/arXiv.2008.12073"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053128"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3140549"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413519"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","unstructured":"Aaron van den Oord Sander Dieleman Heiga Zen Karen Simonyan Oriol Vinyals Alex Graves Nal Kalchbrenner Andrew Senior and Koray Kavukcuoglu. 2016. WaveNet : A Generative Model for Raw Audio. https:\/\/doi.org\/10.48550\/arXiv.1609.03499 arxiv: 1609.03499 [cs]","DOI":"10.48550\/arXiv.1609.03499"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1906.01083"},{"key":"e_1_3_2_1_31_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2023. Attention Is All You Need. arxiv: 1706.03762 [cs.CL]"},{"key":"e_1_3_2_1_32_1","volume-title":"Repulsion Loss: Detecting Pedestrians in a Crowd. CoRR","author":"Wang Xinlong","year":"2017","unstructured":"Xinlong Wang, Tete Xiao, Yuning Jiang, Shuai Shao, Jian Sun, and Chunhua Shen. 2017. Repulsion Loss: Detecting Pedestrians in a Crowd. CoRR, Vol. abs\/1711.07752 (2017). [arXiv]1711.07752 http:\/\/arxiv.org\/abs\/1711.07752"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096233"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","unstructured":"Yueh-Kao Wu Ching-Yu Chiu and Yi-Hsuan Yang. 2022. JukeDrummer: Conditional Beat-Aware Audio-Domain Drum Accompaniment Generation via Transformer VQ-VAE. (2022). https:\/\/doi.org\/10.48550\/arXiv.2210.06007","DOI":"10.48550\/arXiv.2210.06007"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3268730"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","unstructured":"Yen-Tung Yeh Bo-Yu Chen and Yi-Hsuan Yang. 2022. Exploiting Pre-Trained Feature Networks for Generative Adversarial Networks in Audio-Domain Loop Generation. (2022). https:\/\/doi.org\/10.48550\/arXiv.2209.01751","DOI":"10.48550\/arXiv.2209.01751"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","unstructured":"Zhe Zhang and Taketo Akama. 2024. HyperGANStrument: Instrument Sound Synthesis and Editing with Pitch-Invariant Hypernetworks. https:\/\/doi.org\/10.48550\/arXiv.2401.04558 arxiv: 2401.04558 [cs eess]","DOI":"10.48550\/arXiv.2401.04558"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-023-08728-1"}],"event":{"name":"ICMR '24: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia","SIGSOFT ACM Special Interest Group on Software Engineering"],"location":"Phuket Thailand","acronym":"ICMR '24"},"container-title":["Proceedings of the 2024 International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3657586","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3652583.3657586","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,21]],"date-time":"2025-08-21T08:50:14Z","timestamp":1755766214000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3652583.3657586"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,30]]},"references-count":38,"alternative-id":["10.1145\/3652583.3657586","10.1145\/3652583"],"URL":"https:\/\/doi.org\/10.1145\/3652583.3657586","relation":{},"subject":[],"published":{"date-parts":[[2024,5,30]]},"assertion":[{"value":"2024-06-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}