{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T15:52:33Z","timestamp":1776181953745,"version":"3.50.1"},"reference-count":47,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,1,1]],"date-time":"2022-01-01T00:00:00Z","timestamp":1640995200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Singapore Ministry of Education Academic Research Fund Tier-2","award":["MOE2017-T2-2-060"],"award-info":[{"award-number":["MOE2017-T2-2-060"]}]},{"name":"Google Cloud Research Credits Program","award":["GCP205559654"],"award-info":[{"award-number":["GCP205559654"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2022]]},"DOI":"10.1109\/taslp.2022.3173054","type":"journal-article","created":{"date-parts":[[2022,5,20]],"date-time":"2022-05-20T19:37:19Z","timestamp":1653075439000},"page":"1749-1762","source":"Crossref","is-referenced-by-count":59,"title":["SALSA: Spatial Cue-Augmented Log-Spectrogram Features for Polyphonic Sound Event Localization and Detection"],"prefix":"10.1109","volume":"30","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-0210-6373","authenticated-orcid":false,"given":"Thi Ngoc Tho","family":"Nguyen","sequence":"first","affiliation":[{"name":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3878-5048","authenticated-orcid":false,"given":"Karn N.","family":"Watcharasupat","sequence":"additional","affiliation":[{"name":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore"}]},{"given":"Ngoc Khanh","family":"Nguyen","sequence":"additional","affiliation":[{"name":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore"}]},{"given":"Douglas L.","family":"Jones","sequence":"additional","affiliation":[{"name":"Department of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign, Urbana, IL, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7143-1823","authenticated-orcid":false,"given":"Woon-Seng","family":"Gan","sequence":"additional","affiliation":[{"name":"School of Electrical and Electronic Engineering, Nanyang Technological University, Singapore"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2017.2657381"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2016.7738875"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2015.2470216"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2004.1307286"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2018.2885636"},{"key":"ref6","first-page":"622","article-title":"Classification of spatial audio location and content using Convolutional neural networks","volume-title":"Proc. 138th Audio Eng. Soc. Conv.","author":"Hirvonen","year":"2015"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.33682\/4jhy-bj81"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.33682\/3qgs-e216"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053045"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414602"},{"key":"ref11","first-page":"11","article-title":"Event-independent network for polyphonic sound event localization and detection","volume-title":"Proc. 5th Workshop Detection Classification Acoust. Scenes Events","author":"Cao","year":"2020"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413473"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3069193"},{"key":"ref14","first-page":"160","article-title":"On multitask loss function for audio event detection and localization","volume-title":"Proc. 5th Workshop Detection Classification Acoust. Scenes Events","author":"Phan","year":"2020"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3256088"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413609"},{"key":"ref17","article-title":"Sound event localization and detection using cross-modal attention and parameter sharing for DCASE2021 challenge","author":"Lee","year":"2021"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2759"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCE-Asia49877.2020.9277097"},{"key":"ref20","article-title":"Multi-scale network for sound event localization and detection","author":"Emmanuel","year":"2021"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.33682\/9f2t-ab23"},{"key":"ref22","article-title":"The USTC-iFlytek system for sound event localization and detection of DCASE2020 challenge","author":"Wang","year":"2020"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413609"},{"key":"ref24","first-page":"165","article-title":"A dataset of reverberant spatial sound scenes with moving sources for sound event localization and detection","volume-title":"Proc. 5th Workshop Detect. Classif. Acoust. Scenes Events","author":"Politis","year":"2020"},{"key":"ref25","first-page":"125","article-title":"A dataset of dynamic reverberant sound scenes with directional interferers for sound event localization and detection","volume-title":"Proc. 6th Workshop Detect. Classif. Acoust. Scenes Events","author":"Politis","year":"2021"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.33682\/1xwd-5v76"},{"key":"ref27","first-page":"2287","article-title":"Robust DOA estimation of multiple speech sources","volume-title":"Proc. IEEE Int. Conf. Acoust., Speech, Signal Process.","author":"Nguyen","year":"2014"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3019646"},{"key":"ref29","article-title":"DCASE 2021 task 3: Spectrotemporally-aligned features for polyphonic sound event localization and detection","author":"Nguyen","year":"2021"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1155\/2007\/27616"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1121\/1.2871597"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2013.2272524"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404831"},{"key":"ref34","first-page":"120","article-title":"Ensemble of sequence matching networks for dynamic sound event localization, detection, and tracking","volume-title":"Proc. 5th Workshop Detection Classification Acoust. Scenes Events","author":"Nguyen","year":"2020"},{"key":"ref35","article-title":"DCASE 2019 Task 3: A two-step system for sound event localization and detection","author":"Nguyen","year":"2019"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2180896"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953332"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/j.sigpro.2014.01.021"},{"key":"ref39","article-title":"Two-stage sound event localization and detection using intensity vector and generalized cross-correlation","author":"Cao","year":"2019"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952211"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3030497"},{"key":"ref42","article-title":"DCASE 2021: Sound event localization and detection with directional interference","volume-title":"Github.","author":"Adavanne","year":"2021"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.7000"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.3047233"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2019.8937220"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2008.2010596"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/9657755\/09779561.pdf?arnumber=9779561","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,22]],"date-time":"2024-01-22T22:18:32Z","timestamp":1705961912000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9779561\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":47,"URL":"https:\/\/doi.org\/10.1109\/taslp.2022.3173054","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022]]}}}