{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T06:51:20Z","timestamp":1764399080649,"version":"3.46.0"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/apsipaasc65261.2025.11249004","type":"proceedings-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:40:26Z","timestamp":1764355226000},"page":"198-203","source":"Crossref","is-referenced-by-count":0,"title":["Drum-to-Vocal Percussion Sound Conversion and Its Evaluation Methodology"],"prefix":"10.1109","author":[{"given":"Rinka","family":"Nobukawa","sequence":"first","affiliation":[{"name":"University of Tokyo,Graduate School of Information Science and Technology,Tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Makito","family":"Kitamura","sequence":"additional","affiliation":[{"name":"University of Tokyo,Graduate School of Information Science and Technology,Tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tomohiko","family":"Nakamura","sequence":"additional","affiliation":[{"name":"National Institute of Advanced Industrial Science and Technology (AIST),Tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shinnosuke","family":"Takamichi","sequence":"additional","affiliation":[{"name":"Keio University,Faculty of Science and Technology,Yokohama,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hiroshi","family":"Saruwatari","sequence":"additional","affiliation":[{"name":"University of Tokyo,Graduate School of Information Science and Technology,Tokyo,Japan"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"volume-title":"Our drums and drummers","year":"1968","author":"Nketia","key":"ref1"},{"key":"ref2","first-page":"925","article-title":"Acoustic and perceptual comparison of speech and drum sounds in the North Indian tabla tradition: An empirical study of sound symbolism","volume-title":"in Proc. Int. Congr. Phonetic Sci.","author":"Patel","year":"2003"},{"key":"ref3","first-page":"15","article-title":"Rhythm-speak: Mnemonic, language play or song","volume-title":"in Proc. Int. Conf. Music Commun. Sci.","author":"Atherton","year":"2007"},{"journal-title":"A Cappella 101. Hal Leonard","year":"2022","author":"Dietz","key":"ref4"},{"key":"ref5","first-page":"170","article-title":"Query by beatboxing: Music information retrieval for the DJ","volume-title":"in Proc. Int. Soc. Music Inf. Retrieval Conf.","author":"Kapur","year":"2004"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2021.102468"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/2598153.2598189"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/SLT61566.2024.10832340"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1631"},{"journal-title":"Good practices for evaluation of synthesized speech","year":"2025","author":"Cooper","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/PACRIM.1993.407206"},{"key":"ref12","article-title":"Evaluating generative audio systems and their metrics","volume-title":"in Proc. Int. Soc. Music Inf. Retrieval Conf.","author":"Vinay","year":"2022"},{"volume-title":"RAVE: A variational autoencoder for fast and high-quality neural audio synthesis","year":"2021","author":"Caillon","key":"ref13"},{"key":"ref14","first-page":"33","article-title":"Conceptualization of human beatbox in Japan: The global trend and relationship with a Japanese beatboxer \u201cAfra","volume":"17","author":"Kawamoto","year":"2019","journal-title":"Jpn. Music Expr. Soc."},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1121\/1.4773865"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1121\/10.0002921"},{"journal-title":"Beatboxing techniques","year":"2025","author":"Tyte","key":"ref17"},{"key":"ref18","article-title":"Auto-encoding variational Bayes","volume-title":"in Proc. Int. Conf. Learn. Representations","author":"Kingma","year":"2014"},{"key":"ref19","article-title":"Neural discrete representation learning","volume-title":"in Proc. Adv. Neural Inf. Process. Systems","volume":"30","author":"van den Oord","year":"2017"},{"key":"ref20","article-title":"DDSP: Differentiable digital signal processing","volume-title":"in Proc. Int. Conf. Learn. Representations","author":"Engel","year":"2020"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.5555\/2969033.2969125"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053795"},{"key":"ref23","article-title":"MelGAN: Generative adversarial networks for conditional waveform synthesis","volume-title":"in Proc. Adv. Neural Inf. Process. Systems","volume":"32","author":"Kumar","year":"2019"},{"key":"ref24","first-page":"17 022","article-title":"HiFi-GAN: Generative adversarial networks for efficient and high fidelity speech synthesis","volume-title":"in Proc. Int. Conf. Neural Inf. Process. Systems","volume":"33","author":"Kong","year":"2020"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00917"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095569"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.2307\/2331986"}],"event":{"name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2025,10,22]]},"location":"Singapore, Singapore","end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11248853\/11248968\/11249004.pdf?arnumber=11249004","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T06:50:01Z","timestamp":1764399001000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11249004\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc65261.2025.11249004","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}