{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T16:47:47Z","timestamp":1765039667586,"version":"3.28.0"},"reference-count":25,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T00:00:00Z","timestamp":1685836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,6,4]],"date-time":"2023-06-04T00:00:00Z","timestamp":1685836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,6,4]]},"DOI":"10.1109\/icassp49357.2023.10097250","type":"proceedings-article","created":{"date-parts":[[2023,5,5]],"date-time":"2023-05-05T17:28:30Z","timestamp":1683307710000},"page":"1-5","source":"Crossref","is-referenced-by-count":9,"title":["GANStrument: Adversarial Instrument Sound Synthesis with Pitch-Invariant Instance Conditioning"],"prefix":"10.1109","author":[{"given":"Gaku","family":"Narita","sequence":"first","affiliation":[{"name":"Sony Computer Science Laboratories,Tokyo,Japan"}]},{"given":"Junichi","family":"Shimizu","sequence":"additional","affiliation":[{"name":"Sony Computer Science Laboratories,Tokyo,Japan"}]},{"given":"Taketo","family":"Akama","sequence":"additional","affiliation":[{"name":"Sony Computer Science Laboratories,Tokyo,Japan"}]}],"member":"263","reference":[{"key":"ref13","article-title":"Hifi-gan: Generative adversarial networks for efficient and high fidelity speech synthesis","author":"kong","year":"2020","journal-title":"Proc NeurIPS"},{"key":"ref12","article-title":"Melgan: Generative adversarial networks for conditional waveform synthesis","author":"kumar","year":"2019","journal-title":"Proc NeurIPS"},{"key":"ref15","article-title":"Ddsp-based singing vocoders: A new subtractive-based synthesizer and a comprehensive evaluation","author":"wu","year":"2022","journal-title":"Proc ISMIR"},{"key":"ref14","article-title":"Bigvgan: A universal neural vocoder with large-scale training","author":"lee","year":"2022","journal-title":"preprint arXiv 2206 04658"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461368"},{"key":"ref10","article-title":"A universal music translation network","author":"noam mor","year":"2019","journal-title":"Proc ICLR"},{"key":"ref2","article-title":"Gansynth: Adversarial neural audio synthesis","author":"engel","year":"2018","journal-title":"Proc ICLR"},{"key":"ref1","article-title":"Neural audio synthesis of musical notes with wavenet autoencoders","author":"engel","year":"2017","journal-title":"Proc ICML"},{"article-title":"Melnet: A generative model for audio in the frequency domain","year":"2019","author":"vasquez","key":"ref17"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746826"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1137\/0916069"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164317"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3181070"},{"key":"ref23","article-title":"cgans with projection discriminator","author":"miyato","year":"2018","journal-title":"Proc ICLR"},{"article-title":"Wavenet: A generative model for raw audio","year":"2016","author":"oord","key":"ref25"},{"article-title":"Torchaudio: Building blocks for audio and speech processing","year":"2021","author":"yang","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"ref21","first-page":"138","article-title":"A real-time system for measuring sound goodness in instrumental sounds","author":"romani picas","year":"2015","journal-title":"Audio Engineering Society Convention"},{"key":"ref8","first-page":"2096","article-title":"Domain-adversarial training of neural networks","volume":"17","author":"ganin","year":"2016","journal-title":"The Journal of Machine Learning Research"},{"key":"ref7","article-title":"Instance-conditioned gan","author":"casanova","year":"2021","journal-title":"Proc NeurIPS"},{"key":"ref9","article-title":"Fader networks: Manipulating images by sliding attributes","author":"lample","year":"2017","journal-title":"Proc NeurIPS"},{"key":"ref4","article-title":"Drumgan: Synthesis of drum sounds with timbral feature conditioning using generative adversarial networks","author":"nistal","year":"2020","journal-title":"Proc ISMIR"},{"key":"ref3","article-title":"Learning disentangled representations of timbre and pitch for musical instrument sounds using gaussian mixture variational autoencoders","author":"luo","year":"2019","journal-title":"Proc ISMIR"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746940"},{"key":"ref5","article-title":"Ddsp: Differentiable digital signal processing","author":"engel","year":"2020","journal-title":"Proc ICLR"}],"event":{"name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2023,6,4]]},"location":"Rhodes Island, Greece","end":{"date-parts":[[2023,6,10]]}},"container-title":["ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10094559\/10094560\/10097250.pdf?arnumber=10097250","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,13]],"date-time":"2023-11-13T19:00:30Z","timestamp":1699902030000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10097250\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,4]]},"references-count":25,"URL":"https:\/\/doi.org\/10.1109\/icassp49357.2023.10097250","relation":{},"subject":[],"published":{"date-parts":[[2023,6,4]]}}}