{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T15:59:27Z","timestamp":1776095967800,"version":"3.50.1"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,5,1]],"date-time":"2020-05-01T00:00:00Z","timestamp":1588291200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,5]]},"DOI":"10.1109\/icassp40776.2020.9054567","type":"proceedings-article","created":{"date-parts":[[2020,4,9]],"date-time":"2020-04-09T20:21:13Z","timestamp":1586463673000},"page":"496-500","source":"Crossref","is-referenced-by-count":32,"title":["Automatic Lyrics Alignment and Transcription in Polyphonic Music: Does Background Music Help?"],"prefix":"10.1109","author":[{"given":"Chitralekha","family":"Gupta","sequence":"first","affiliation":[]},{"given":"Emre","family":"Yilmaz","sequence":"additional","affiliation":[]},{"given":"Haizhou","family":"Li","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1099"},{"key":"ref32","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1456"},{"key":"ref30","first-page":"5206","article-title":"Lib-rispeech: An ASR corpus based on public domain audio books","author":"panayotov","year":"2015","journal-title":"Proc ICASSP"},{"key":"ref36","article-title":"Mirex2018: Lyrics-to-audio alignment for instrument accompanied singings","author":"wang","year":"2018","journal-title":"Mirex 2018"},{"key":"ref35","first-page":"281","article-title":"Modeling of phoneme durations for alignment between polyphonic audio and lyrics","author":"dzhambazov","year":"2015","journal-title":"Sound and Music Computing Conference"},{"key":"ref34","first-page":"358","article-title":"Bootstrapping a system for phoneme recognition and keyword spotting in unaccompanied singing","author":"kruspe","year":"2016","journal-title":"ISMIR"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1520"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683470"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1525\/mp.2015.32.5.470"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2378"},{"key":"ref14","article-title":"Knowledge-based Probabilistic Modeling for Tracking Lyrics in Music Audio Signals","author":"dzhambazov","year":"2017","journal-title":"Ph D thesis"},{"key":"ref15","article-title":"Dali: A large dataset of synchronized audio, lyrics and notes, automatically created using teacher-student machine learning paradigm","author":"meseguer-brocal","year":"2018","journal-title":"Proc ISMIR"},{"key":"ref16","first-page":"464","article-title":"Discourse analysis of lyric and lyric-based classification of music","author":"fang","year":"2017","journal-title":"ISMIR"},{"key":"ref17","first-page":"ii?1764","article-title":"Towards end-to-end speech recognition with recurrent neural networks","author":"graves","year":"0","journal-title":"Proceedings of the 31st International Conference on International Conference on Machine Learning - Volume 32 2014 ICML&#x2019;14"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472621"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2763455"},{"key":"ref4","first-page":"281","article-title":"Hyperlinking lyrics: A method for creating hyperlinks between phrases in song lyrics","author":"fujihara","year":"2008","journal-title":"ISMIR"},{"key":"ref28","first-page":"3586","article-title":"Audio augmentation for speech recognition","author":"ko","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref3","first-page":"532","article-title":"Lyrics recognition from a singing voice based on finite state automaton for music information retrieval","author":"hosoya","year":"2005","journal-title":"ISMIR"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1417"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1186\/1687-4722-2010-546047"},{"key":"ref5","first-page":"369","article-title":"Phoneme recognition in popular music","author":"gruhne","year":"2007","journal-title":"ISMIR"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1267"},{"key":"ref8","first-page":"396","article-title":"Automatic lyrics-to-audio alignment on polyphonic music using singing-adapted acoustic models","author":"gupta","year":"2019","journal-title":"Proc ICASSP IEEE"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2008.4518002"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1177\/0305735614528833"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1515\/comm.1981.7.1.3"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2011.2159577"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2002.800560"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2159595"},{"key":"ref21","first-page":"494","article-title":"Recognition of phonemes in a-cappella recordings using temporal patterns and mel frequency cepstral coefficients","author":"hansen","year":"2012","journal-title":"9th Sound and Music Computing Conference (SMC)"},{"key":"ref24","article-title":"Wave-u-net: A multi-scale neural network for end-to-end audio source separation","author":"stoller","year":"2018","journal-title":"Proc ISMIR"},{"key":"ref23","article-title":"Musical genre recognition using a cnn","year":"0"},{"key":"ref26","article-title":"The Kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"Proc ASRU"},{"key":"ref25","article-title":"Implementation of the wave-u-net for audio source separation","year":"0"}],"event":{"name":"ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Barcelona, Spain","start":{"date-parts":[[2020,5,4]]},"end":{"date-parts":[[2020,5,8]]}},"container-title":["ICASSP 2020 - 2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9040208\/9052899\/09054567.pdf?arnumber=9054567","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,28]],"date-time":"2022-06-28T00:16:36Z","timestamp":1656375396000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9054567\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,5]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/icassp40776.2020.9054567","relation":{},"subject":[],"published":{"date-parts":[[2020,5]]}}}