{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,29]],"date-time":"2025-05-29T04:01:46Z","timestamp":1748491306883,"version":"3.41.0"},"reference-count":21,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,4,6]],"date-time":"2025-04-06T00:00:00Z","timestamp":1743897600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,4,6]]},"DOI":"10.1109\/icasspw65056.2025.11011256","type":"proceedings-article","created":{"date-parts":[[2025,5,27]],"date-time":"2025-05-27T17:05:14Z","timestamp":1748365514000},"page":"1-5","source":"Crossref","is-referenced-by-count":1,"title":["Closing the Loop on Speech to Music Translation: Automatically Generating Synthetic Percussive Sequences on the Mridangam from Konnakol"],"prefix":"10.1109","author":[{"given":"Gopika","family":"Krishnan","sequence":"first","affiliation":[{"name":"Universitat Pompeu Fabra,Department of Engineering,Barcelona,Spain"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Julia","family":"Drabek","sequence":"additional","affiliation":[{"name":"Johns Hopkins University,Department of Electrical and Computer Engineering,Baltimore,Maryland,United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Akshay","family":"Anantapadmanabhan","sequence":"additional","affiliation":[{"name":"Freelance Musician,India"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kaustuv Kanti","family":"Ganguli","sequence":"additional","affiliation":[{"name":"Zayed University,Department of Computational Systems,Abu Dhabi,United Arab Emirates"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Carlos","family":"Guedes","sequence":"additional","affiliation":[{"name":"New York University Abu Dhabi,Music and Sound Cultures Research Group,Abu Dhabi,United Arab Emirates"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"article-title":"Questioning the fundamental problem-definition of mridangam transcription","volume-title":"Timbre 2020: International Conference on Timbre","author":"Guedes","key":"ref1"},{"article-title":"Automatic Transcription of Drum Strokes in Carnatic Music","year":"2022","author":"Chandramouli","key":"ref2"},{"key":"ref3","article-title":"Automatic transcription of drums and vocalised percussion","volume-title":"Master\u2019s thesis","author":"Ramires","year":"2017"},{"article-title":"The kaldi speech recognition toolkit","volume-title":"IEEE 2011 workshop on automatic speech recognition and understanding.","author":"Povey","key":"ref4"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2021.102468"},{"article-title":"A passive approach to evaluating mridangam transcription via perceptual experiment","volume-title":"nternational Conference of Music Perception and Cognition: ICMPC-ESCOM","author":"Ganguli","key":"ref6"},{"key":"ref7","first-page":"lP","article-title":"Optimizing the mridangam stroke transcription pipeline: Addressing key challenges","volume-title":"Proceedings of the International Society for Music Information Retrieval Conference (ISMIR)","author":"Krishnan"},{"key":"ref8","first-page":"107","article-title":"Segmentation and recognition of tabla strokes","volume":"20056","author":"Chordia","year":"2005","journal-title":"ISMIR"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1080\/09298215.2011.576318"},{"key":"ref10","doi-asserted-by":"crossref","DOI":"10.1109\/ICASSPW65056.2025.11011110","article-title":"Investigating temporal convolutional networks for automated stroke transcription in the mridangam","volume-title":"Proceedings of the Workshop on Indian Music Analysis and Generative Applications (WIMAGA)","author":"Krishnan"},{"key":"ref11","article-title":"Camel: Carnatic percussion music generation using n-gram models","volume-title":"Proceedings of 13th sound and music computing conference (SMC)","volume":"31","author":"Trochidis"},{"article-title":"A data-driven approach for carnatic percussion music generation","volume-title":"Proceedings of the 10th International Workshop on Machine Learning and Music","author":"Trochidis","key":"ref12"},{"article-title":"Modeling carnatic rhythm generation: A data-driven approach based on rhythmic analysis","volume-title":"Proceedings of the 15th Sound & Music Computing Conference","author":"Guedes","key":"ref13"},{"issue":"3","key":"ref14","first-page":"11","article-title":"An approach to adding knowledge constraints to a data-driven generative model for carnatic rhythm sequence","volume":"9","author":"Ganguli","year":"2019","journal-title":"Trends in Electrical Engineering"},{"volume-title":"The konnakol typewriter","year":"2021","author":"Guedes","key":"ref15"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461870"},{"key":"ref17","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"33","author":"Baevski","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462105"},{"key":"ref19","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"International conference on machine learning","author":"Radford"},{"article-title":"whisper-timestamped","year":"2023","author":"Louradour","key":"ref20"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-94-009-5113-6_28"}],"event":{"name":"2025 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)","start":{"date-parts":[[2025,4,6]]},"location":"Hyderabad, India","end":{"date-parts":[[2025,4,11]]}},"container-title":["2025 IEEE International Conference on Acoustics, Speech, and Signal Processing Workshops (ICASSPW)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11010992\/11010997\/11011256.pdf?arnumber=11011256","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,28]],"date-time":"2025-05-28T04:55:11Z","timestamp":1748408111000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11011256\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,6]]},"references-count":21,"URL":"https:\/\/doi.org\/10.1109\/icasspw65056.2025.11011256","relation":{},"subject":[],"published":{"date-parts":[[2025,4,6]]}}}