{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,24]],"date-time":"2026-03-24T11:48:08Z","timestamp":1774352888770,"version":"3.50.1"},"reference-count":18,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,30]],"date-time":"2025-06-30T00:00:00Z","timestamp":1751241600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,6,30]]},"DOI":"10.1109\/icmew68306.2025.11152264","type":"proceedings-article","created":{"date-parts":[[2025,9,10]],"date-time":"2025-09-10T17:41:25Z","timestamp":1757526085000},"page":"1-6","source":"Crossref","is-referenced-by-count":1,"title":["Exploiting Music Source Separation for Automatic Lyrics Transcription with Whisper"],"prefix":"10.1109","author":[{"given":"Jaza","family":"Syed","sequence":"first","affiliation":[{"name":"Queen Mary University of London,School of Electronic Engineering and Computer Science"}]},{"given":"Ivan Meresman","family":"Higgs","sequence":"additional","affiliation":[{"name":"Queen Mary University of London,School of Electronic Engineering and Computer Science"}]},{"given":"Ond\u0159ej","family":"C\u00edfka","sequence":"additional","affiliation":[{"name":"AudioShake"}]},{"given":"Mark","family":"Sandler","sequence":"additional","affiliation":[{"name":"Queen Mary University of London,School of Electronic Engineering and Computer Science"}]}],"member":"263","reference":[{"key":"ref1","first-page":"28492","article-title":"Robust Speech Recognition via Large-Scale Weak Super-vision","volume-title":"Proceedings of the 40th International Conference on Machine Learning","author":"Radford"},{"key":"ref2","first-page":"737","article-title":"Lyrics Transcription for Humans: A Readability-Aware Benchmark","volume-title":"Proceedings of the 25th International Society for Music Information Retrieval Conference, ISMIR 2024, San Francisco, California, USA and Online, November 10-14, 2024","author":"C\u00edfka"},{"key":"ref3","first-page":"151","article-title":"MSTRE-Net: Multistreaming Acoustic Modeling for Automatic Lyrics Transcription","volume-title":"Proceedings of the 22nd International Society for Music Information Retrieval Conference, ISMIR 2021, Online, November","author":"Demirel"},{"key":"ref4","first-page":"891","article-title":"Transfer Learning of wav2vec 2.0 for Automatic Lyric Transcription","volume-title":"Proceedings of the 23rd International Society for Music Information Retrieval Conference, ISMIR 2022, Bengaluru, India, December 4-8, 2022","author":"Ou"},{"key":"ref5","first-page":"343","article-title":"LyricWhiz: Robust Multilingual Zero-Shot Lyrics Transcription by Whispering to ChatGPT","volume-title":"Proceedings of the 24th International Society for Music Information Retrieval Conference, ISMIR 2023, Milan, Italy, November 5-9, 2023","author":"Zhuo"},{"key":"ref6","first-page":"688","article-title":"PolySinger: Singing-Voice to Singing-Voice Translation From English to Japanese","volume-title":"Proceedings of the 25th International Society for Music Information Retrieval Conference, ISMIR 2024, San Francisco, California, USA and Online, November 10-14, 2024","author":"Antonisen"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2014.00809"},{"key":"ref8","article-title":"Hybrid Spectrogram and Waveform Source Separation","volume-title":"CoRR","volume":"abs\/2111.03600","author":"D\u00e9fossez","year":"2021"},{"key":"ref9","article-title":"Non-lexical vocables in scottish traditional music","volume-title":"PhD thesis","author":"Chambers","year":"1980"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2024.3485465"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683470"},{"key":"ref12","first-page":"431","article-title":"DALI: A Large Dataset of Synchronized Audio, Lyrics and notes, Automatically Created using Teacher-student Machine Learning Paradigm","volume-title":"Proceedings of the 19th International Society for Music Information Retrieval Conference, ISMIR 2018, Paris, France, September","author":"Meseguer-Brocal"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3091817"},{"key":"ref14","article-title":"MUSDB18-HQ - an uncompressed version of MUSDB18","author":"Rafii","year":"2019"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-78"},{"key":"ref16","article-title":"Spurious Correlations in Machine Learning: A Survey","author":"Ye","year":"2024"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP55844.2023.10285863"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-731"}],"event":{"name":"2025 IEEE International Conference on Multimedia and Expo Workshops (ICMEW)","location":"Nantes, France","start":{"date-parts":[[2025,6,30]]},"end":{"date-parts":[[2025,7,4]]}},"container-title":["2025 IEEE International Conference on Multimedia and Expo Workshops (ICMEW)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11152022\/11152034\/11152264.pdf?arnumber=11152264","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T04:47:51Z","timestamp":1757566071000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11152264\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,30]]},"references-count":18,"URL":"https:\/\/doi.org\/10.1109\/icmew68306.2025.11152264","relation":{},"subject":[],"published":{"date-parts":[[2025,6,30]]}}}