{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T16:25:53Z","timestamp":1775665553002,"version":"3.50.1"},"reference-count":50,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,1]],"date-time":"2021-09-01T00:00:00Z","timestamp":1630454400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100002341","name":"Academy of Finland","doi-asserted-by":"publisher","award":["332063 Teaching machines to listen"],"award-info":[{"award-number":["332063 Teaching machines to listen"]}],"id":[{"id":"10.13039\/501100002341","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100000266","name":"Engineering and Physical Sciences Research Council","doi-asserted-by":"publisher","award":["EP\/N014111\/1 Making Sense of Sounds"],"award-info":[{"award-number":["EP\/N014111\/1 Making Sense of Sounds"]}],"id":[{"id":"10.13039\/501100000266","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010663","name":"H2020 European Research Council","doi-asserted-by":"publisher","award":["637422 \/ EVERYSOUND"],"award-info":[{"award-number":["637422 \/ EVERYSOUND"]}],"id":[{"id":"10.13039\/100010663","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Signal Process. Mag."],"published-print":{"date-parts":[[2021,9]]},"DOI":"10.1109\/msp.2021.3090678","type":"journal-article","created":{"date-parts":[[2021,8,27]],"date-time":"2021-08-27T20:22:41Z","timestamp":1630095761000},"page":"67-83","source":"Crossref","is-referenced-by-count":216,"title":["Sound Event Detection: A tutorial"],"prefix":"10.1109","volume":"38","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6640-9752","authenticated-orcid":false,"given":"Annamaria","family":"Mesaros","sequence":"first","affiliation":[{"name":"Computing Sciences, Tampere University, Tampere, Finland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8855-0415","authenticated-orcid":false,"given":"Toni","family":"Heittola","sequence":"additional","affiliation":[{"name":"Computing Sciences, Tampere University, Tampere, 33720, Finland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4604-9729","authenticated-orcid":false,"given":"Tuomas","family":"Virtanen","sequence":"additional","affiliation":[{"name":"Faculty of Information Technology and Communication Sciences, Tampere University, Tampere, FI-33720, Finland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9708-1075","authenticated-orcid":false,"given":"Mark D.","family":"Plumbley","sequence":"additional","affiliation":[{"name":"Centre for Vision, Speech and Signal Processing (CVSSP), University of Surrey, Guildford, GU2 7XH, United Kingdom of Great Britain and Northern Ireland"}]}],"member":"263","reference":[{"key":"ref39","first-page":"626","article-title":"Guided learning for weakly-labeled semi-supervised sound event detection","author":"lin","year":"0","journal-title":"Proc IEEE Int Conf Acoustics Speech Signal Process (ICASSP)"},{"key":"ref38","author":"hinton","year":"2015","journal-title":"Distilling the knowledge in a neural network"},{"key":"ref33","first-page":"892","article-title":"SoundNet: Learning sound representations from unlabeled video","author":"aytar","year":"0","journal-title":"Proc Adv Neural Inform Process Syst"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682909"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682475"},{"key":"ref37","first-page":"1195","article-title":"Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results","author":"tarvainen","year":"0","journal-title":"Proc Adv Neural Inform Process Syst"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-486"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2016.7552989"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.73"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2690575"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2907016"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-63450-0_5"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1121\/1.2917563"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1207\/s15326969eco0501_1"},{"key":"ref20","first-page":"827","author":"reynolds","year":"2015","journal-title":"Gaussian Mixture Models"},{"key":"ref22","first-page":"1","article-title":"Harmonic\/percussive separation using median filtering","volume":"13","author":"fitzgerald","year":"0","journal-title":"Proc Int Conf Digital Audio Effects (DAFx)"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2014.2326181"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2690570"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952260"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2018.8489470"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7177954"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2019.8937283"},{"key":"ref10","author":"goodfellow","year":"2016","journal-title":"Deep Learning"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-63450-0_6"},{"key":"ref40","first-page":"326","article-title":"Task-aware mean teacher method for large scale weakly labeled semi-supervised sound event detection","author":"yan","year":"0","journal-title":"Proc IEEE Int Conf Acoustics Speech Signal Process (ICASSP)"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683158"},{"key":"ref13","author":"fonseca","year":"2020","journal-title":"Addressing missing labels in large-scale sound event recognition using a teacher-student framework with loss masking"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2017.8170052"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"ref16","first-page":"165","article-title":"A dataset of reverberant spatial sound scenes with moving sources for sound event localization and detection","author":"politis","year":"0","journal-title":"Proc Detection Classification Acoustic Scenes Events Workshop"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472917"},{"key":"ref18","first-page":"178","article-title":"Training general-purpose audio tagging networks with noisy labels and iterative self-verification","author":"dorfer","year":"0","journal-title":"Proc Detection Classification Acoustic Scenes Events Workshop (DCASE2018)"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-63450-0_4"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-63450-0_7"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2018.2869928"},{"key":"ref6","author":"m\u00fcller","year":"2015","journal-title":"Fundamentals of Music Processing Audio Analysis Algorithms Applications"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2778423"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1186\/1687-4722-2013-1"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/EUSIPCO.2016.7760424"},{"key":"ref49","author":"konec?n","year":"2016","journal-title":"Federated optimization distributed machine learning for on-device intelligence"},{"key":"ref9","first-page":"36","article-title":"Sound event detection in multisource environments using source separation","author":"heittola","year":"0","journal-title":"Proc Workshop on Machine Listening Multisource Environments CHiME2011"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2958408"},{"key":"ref45","first-page":"591","article-title":"The million song dataset","author":"bertin-mahieux","year":"0","journal-title":"Proc 12th Int Conf Music Information Retrieval (ISMIR)"},{"key":"ref48","first-page":"1273","article-title":"Communication-efficient learning of deep networks from decentralized data","author":"mcmahan","year":"0","journal-title":"Proc Artif Intell Statist"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3029652"},{"key":"ref42","first-page":"315","article-title":"Evaluation of multiple-f0 estimation and tracking systems","author":"bay","year":"0","journal-title":"Proc 10th Int Soc Music Inform Retrieval Conf ISMIR"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-63450-0_12"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-017-9410-y"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9052995"}],"container-title":["IEEE Signal Processing Magazine"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/79\/9524538\/09524590.pdf?arnumber=9524590","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,5,5]],"date-time":"2023-05-05T17:43:22Z","timestamp":1683308602000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9524590\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9]]},"references-count":50,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/msp.2021.3090678","relation":{},"ISSN":["1053-5888","1558-0792"],"issn-type":[{"value":"1053-5888","type":"print"},{"value":"1558-0792","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,9]]}}}