{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,4]],"date-time":"2025-12-04T10:00:35Z","timestamp":1764842435750,"version":"3.37.3"},"reference-count":58,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61902280","61373104","61771340"],"award-info":[{"award-number":["61902280","61373104","61771340"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006606","name":"Natural Science Foundation of Tianjin City","doi-asserted-by":"publisher","award":["19JCYBJC15600","18JCYBJC15300"],"award-info":[{"award-number":["19JCYBJC15600","18JCYBJC15300"]}],"id":[{"id":"10.13039\/501100006606","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Tianjin Science and Technology","award":["20YDTPJC00870"],"award-info":[{"award-number":["20YDTPJC00870"]}]},{"name":"Grant-in-Aid for Scientific Research","award":["17H01761"],"award-info":[{"award-number":["17H01761"]}]},{"name":"I-O DATA foundation"},{"name":"Joint International Research (Fostering Joint International Research","award":["20KK0233"],"award-info":[{"award-number":["20KK0233"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/taslp.2021.3051331","type":"journal-article","created":{"date-parts":[[2021,1,15]],"date-time":"2021-01-15T20:46:16Z","timestamp":1610743576000},"page":"807-822","source":"Crossref","is-referenced-by-count":15,"title":["Evolving Multi-Resolution Pooling CNN for Monaural Singing Voice Separation"],"prefix":"10.1109","volume":"29","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1900-8259","authenticated-orcid":false,"given":"Weitao","family":"Yuan","sequence":"first","affiliation":[]},{"given":"Bofei","family":"Dong","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4713-5012","authenticated-orcid":false,"given":"Shengbei","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6605-2052","authenticated-orcid":false,"given":"Masashi","family":"Unoki","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8393-5703","authenticated-orcid":false,"given":"Wenwu","family":"Wang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"crossref","first-page":"310","DOI":"10.1109\/TASL.2009.2026503","article-title":"On the improvement of singing voice separation for monaural recordings using the MIR-1\ufffdK dataset","volume":"18","author":"hsu","year":"2010","journal-title":"IEEE Trans Audio Speech Lang Process"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2014.2365354"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00492"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00332"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1162\/106365602320169811"},{"key":"ref30","article-title":"Neural architecture search: A survey","volume":"20","author":"elsken","year":"2019","journal-title":"J Mach Learn Res"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3289185"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.858005"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3321707.3321729"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3321707.3321729"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2952013"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952118"},{"key":"ref29","first-page":"5877","article-title":"The evolved transformer","author":"so","year":"0","journal-title":"Proc 36th Int Conf Mach Learn"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.889789"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2825440"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2017.8168117"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683800"},{"key":"ref21","first-page":"334","article-title":"Wave-u-net: A multi-scale neural network for end-to-end audio source separation","author":"stoller","year":"0","journal-title":"Proc Conf Int Soc Music Inf Retrieval"},{"key":"ref24","first-page":"6000","article-title":"Attention is all you need","author":"vaswani","year":"0","journal-title":"Proc Neural Inf Process Syst"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/655"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461671"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682443"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.899291"},{"journal-title":"Elementary Statistics","year":"1976","author":"hoel","key":"ref51"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2017.8169987"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952158"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/EUSIPCO.2016.7760548"},{"key":"ref55","first-page":"553","article-title":"Singing voice separation using RPCA with weighted l_1 -norm","author":"jeong","year":"0","journal-title":"Proc 13th Int Conf Latent Variable Anal Signal Separation"},{"key":"ref54","first-page":"66","article-title":"Deep NMF for speech separation","author":"roux","year":"0","journal-title":"Proc IEEE Int Conf Acoust Speech Signal Process"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/SPCOM.2016.7746672"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.2307\/2280779"},{"key":"ref10","first-page":"289","article-title":"Music source separation using stacked hourglass networks","author":"park","year":"0","journal-title":"Proc 19th Int Soc Music Inf Ret Conf"},{"key":"ref11","first-page":"745","article-title":"Singing voice separation with deep u-net convolutional networks","author":"jansson","year":"0","journal-title":"Proc 18th Int Soc Music Inf Ret Conf"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-22482-4_45"},{"key":"ref12","first-page":"340","article-title":"Multi-resolution fully convolutional neural networks for monaural audio source separation","author":"grais","year":"0","journal-title":"Proc 14th Int Conf Latent Variable Anal Signal Separation"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.23919\/EUSIPCO.2018.8553571"},{"key":"ref14","first-page":"7354","article-title":"Self-attention generative adversarial networks","author":"zhang","year":"0","journal-title":"Proc 36th Int Conf Mach Learn"},{"key":"ref15","article-title":"Optimally scheduling CNN convolutions for efficient memory access","volume":"abs 1902 1492","author":"stoutchinin","year":"2019","journal-title":"Computing Research Repository (CoRR)"},{"key":"ref16","first-page":"1096","article-title":"Unsupervised feature learning for audio classification using convolutional deep belief networks","author":"lee","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854953"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/4235.996017"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461822"},{"key":"ref4","first-page":"429","article-title":"Deep karaoke: Extracting vocals from musical mixtures using a convolutional deep neural network","author":"simpson","year":"0","journal-title":"Proc 12th Int Conf Latent Var Anal Signal Separation"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","article-title":"Deep learning","volume":"521","author":"lecun","year":"2015","journal-title":"Nature"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/GlobalSIP.2017.8309164"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2468583"},{"key":"ref8","first-page":"625","article-title":"State of the art report: Audio-based music structure analysis","author":"paulus","year":"0","journal-title":"Proc 11th Int Soc Music Inf Ret Conf"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2019.2935867"},{"key":"ref49","article-title":"Accurate, large minibatch SGD: training imagenet in 1\ufffdhour","volume":"abs 1706 2677","author":"goyal","year":"2017","journal-title":"Computing Research Repository (CoRR)"},{"key":"ref9","first-page":"1","article-title":"Music transformer: Generating music with long term structure","author":"huang","year":"0","journal-title":"Proc 7th Int Conf Learn Representations"},{"key":"ref46","first-page":"234","article-title":"U-net: Convolutional networks for biomedical image segmentation","author":"ronneberger","year":"0","journal-title":"Proc 18th Int Conf Med Image Comput Comput -Assist Interv"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7177973"},{"key":"ref48","first-page":"1","article-title":"SGDR: Stochastic gradient descent with warm restarts","author":"loshchilov","year":"0","journal-title":"Proc 5th Int Conf Learn Representations"},{"key":"ref47","first-page":"1","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"0","journal-title":"Proc 3rd Int Conf Learn Representations"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2468583"},{"article-title":"The MUSDB18 corpus for music separation","year":"2017","author":"rafii","key":"ref41"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.21105\/joss.01667"},{"key":"ref43","first-page":"427","article-title":"Low-rank representation of both singing voice and music accompaniment via learned dictionaries","author":"yang","year":"0","journal-title":"Proc 14th Int Soc Music Inf Ret Conf"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/9289074\/09325566.pdf?arnumber=9325566","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T14:53:54Z","timestamp":1652194434000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9325566\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":58,"URL":"https:\/\/doi.org\/10.1109\/taslp.2021.3051331","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"type":"print","value":"2329-9290"},{"type":"electronic","value":"2329-9304"}],"subject":[],"published":{"date-parts":[[2021]]}}}