{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T18:21:40Z","timestamp":1769019700415,"version":"3.49.0"},"reference-count":59,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,1,1]],"date-time":"2020-01-01T00:00:00Z","timestamp":1577836800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["NRF-2015R1C1A1A02036962"],"award-info":[{"award-number":["NRF-2015R1C1A1A02036962"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2020]]},"DOI":"10.1109\/taslp.2020.2993893","type":"journal-article","created":{"date-parts":[[2020,5,20]],"date-time":"2020-05-20T20:52:20Z","timestamp":1590007940000},"page":"1656-1668","source":"Crossref","is-referenced-by-count":15,"title":["Semantic Tagging of Singing Voices in Popular Music Recordings"],"prefix":"10.1109","volume":"28","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8167-5752","authenticated-orcid":false,"given":"Keunhyoung Luke","family":"Kim","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1126-0081","authenticated-orcid":false,"given":"Jongpil","family":"Lee","sequence":"additional","affiliation":[]},{"given":"Sangeun","family":"Kum","sequence":"additional","affiliation":[]},{"given":"Chae Lin","family":"Park","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2664-2119","authenticated-orcid":false,"given":"Juhan","family":"Nam","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2655019"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-27169-4_9"},{"key":"ref33","article-title":"Building k-pop singing voice tag dataset: A progress report","author":"kim","year":"0","journal-title":"Proc Late-Breaking\/Demo 18th Int Soc Musical Inf Retrieval Conf"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1080\/09298210802479300"},{"key":"ref31","article-title":"Five approaches to collecting tags for music","author":"turnbull","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref30","article-title":"Open-unmix - A reference implementation for music source separation","author":"st\u00f6ter","year":"0","journal-title":"Open Source Software"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2004.1326806"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2008.4518002"},{"key":"ref35","first-page":"287","article-title":"RWC music database: popular, classical and jazz music databases","author":"goto","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval"},{"key":"ref34","article-title":"Exploring data augmentation for improved singing voice detection with neural networks","author":"schl\u00fcter","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref28","article-title":"Automatic record reviews","author":"whitman","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref27","first-page":"31","article-title":"Modeling genre with the Music Genome Project: Comparing human-labeled attributes and audio features","author":"prockup","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref29","first-page":"334","article-title":"WAVE-U-NET: A multi-scale neural network for end-to-end audio source separation","author":"stoller","year":"0","journal-title":"Proc Int Soc for Music Inf Retrieval Conf"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2018.2875133"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICOSP.2014.7015431"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2014.6890290"},{"key":"ref22","first-page":"272","author":"donnadieu","year":"2007","journal-title":"Mental Representation of the Timbre of Complex Sounds"},{"key":"ref21","first-page":"591","article-title":"The million song dataset","author":"bertin-mahieux","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref24","first-page":"159","article-title":"Sharpness as an attribute of the timbre of steady sounds","volume":"30","author":"bismarck","year":"1974","journal-title":"Acustica"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1037\/h0053526"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1121\/1.5002886"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(02)00084-5"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/ACII.2013.47"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.3389\/neuro.06.004.2008"},{"key":"ref59","article-title":"A hybrid of deep audio feature and i-vector for artist recognition","author":"park","year":"0","journal-title":"Proc 35 th Int Conf Mach Learn Joint Workshop Mach Learn Music"},{"key":"ref58","first-page":"717","article-title":"Representation learning of music using artist labels","author":"park","year":"0","journal-title":"Proc Int Soc for Music Inf Retrieval Conf"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2012.07.007"},{"key":"ref56","first-page":"1706","article-title":"An automatic singing skill evaluation method for unknown melodies using pitch interval accuracy and vibrato features","author":"nakano","year":"0","journal-title":"Proc Int Conf Speech Commun Technol"},{"key":"ref55","article-title":"Music highlight extraction via convolutional recurrent attention networks","author":"ha","year":"0","journal-title":"Proc 34th Int Conf Mach Learn Music Discovery Workshop"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.5334\/tismir.14"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2004.840597"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1561\/1500000042"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2174224"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.911559"},{"key":"ref40","first-page":"368","article-title":"Ranking-based emotion recognition for experimental music","author":"fan","year":"0","journal-title":"Proc Int Soc for Music Inf Retrieval Conf"},{"key":"ref12","article-title":"Singer identification in popular music recordings using voice coding features","author":"kim","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2041386"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854595"},{"key":"ref15","first-page":"1244","article-title":"An automatic singing impression estimation method using factor analysis and multiple regression","author":"kanato","year":"0","journal-title":"Proceedings ICMC SMC"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.913750"},{"key":"ref17","first-page":"387","article-title":"Evaluation of algorithms using games: The case of music tagging","author":"law","year":"0","journal-title":"Proc 10th Int Conf Music Inf Retrieval"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2018.2874383"},{"key":"ref19","first-page":"465","article-title":"Prediction of time-varying musical mood distributions from audio","author":"schmidt","year":"0","journal-title":"Proc 11th Int Soc for Music Inf Retrieval Conf"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2042124"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2008.4517573"},{"key":"ref6","first-page":"819","article-title":"Melody extraction on vocal segments using multi-column deep neural networks","author":"kum","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval (ISMIR)"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2013.2271648"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854174"},{"key":"ref7","first-page":"532","article-title":"Lyrics recognition from a singing voice based on finite state automaton for music information retrieval","author":"hosoya","year":"0","journal-title":"Proc Int Society for Music Inf Retrieval Conf (ISMIR)"},{"key":"ref49","first-page":"805","article-title":"Automatic tagging using deep convolutional neural networks","author":"choi","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref9","article-title":"Mid-level music melody representation of polyphonic audio for query-by-humming system","author":"song","year":"0","journal-title":"Proc 3rd Int Conf Music Inf Retrieval (ISMIR)"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2017.2771298"},{"key":"ref45","first-page":"297","article-title":"Improving auto-tagging by modeling semantic co-occurrences","author":"miotto","year":"0","journal-title":"Proc 11th Int Soc for Music Inf Retrieval Conf"},{"key":"ref48","first-page":"141","article-title":"Transfer learning for music classification and regression tasks","author":"choi","year":"0","journal-title":"Proc Int Soc for Music Inf Retrieval Conf"},{"key":"ref47","article-title":"Modern hierarchical, agglomerative clustering algorithms","author":"m\u00fcllner","year":"2011","journal-title":"arXiv 1109 2378"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1080\/09298215.2019.1613436"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1080\/09298215.2016.1200631"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1177\/001316447003000105"},{"key":"ref43","first-page":"578","article-title":"An analysis of agreement in classical music perception and its relationship to listener characteristics","author":"schedl","year":"0","journal-title":"Proc Int Soc for Music Inf Retrieval Conf"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/8938144\/09097399.pdf?arnumber=9097399","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,4,27]],"date-time":"2022-04-27T17:31:25Z","timestamp":1651080685000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9097399\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020]]},"references-count":59,"URL":"https:\/\/doi.org\/10.1109\/taslp.2020.2993893","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020]]}}}