{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,11]],"date-time":"2025-07-11T10:43:33Z","timestamp":1752230613179,"version":"3.37.3"},"reference-count":40,"publisher":"Springer Science and Business Media LLC","issue":"22","license":[{"start":{"date-parts":[[2024,8,20]],"date-time":"2024-08-20T00:00:00Z","timestamp":1724112000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,20]],"date-time":"2024-08-20T00:00:00Z","timestamp":1724112000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62003308"],"award-info":[{"award-number":["62003308"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Appl Intell"],"published-print":{"date-parts":[[2024,11]]},"DOI":"10.1007\/s10489-024-05702-9","type":"journal-article","created":{"date-parts":[[2024,8,20]],"date-time":"2024-08-20T09:02:44Z","timestamp":1724144564000},"page":"11357-11372","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["A hybrid offline-online method for sound event localization and detection"],"prefix":"10.1007","volume":"54","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8527-9826","authenticated-orcid":false,"given":"Wenjie","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Peng","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Zhan","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Zhenhe","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Mingliang","family":"Xu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,20]]},"reference":[{"key":"5702_CR1","doi-asserted-by":"publisher","first-page":"108882","DOI":"10.1016\/j.apacoust.2022.108882","volume":"196","author":"K Imoto","year":"2022","unstructured":"Imoto K, Mishima S, Arai Y, Kondo R (2022) Impact of data imbalance caused by inactive frames and difference in sound duration on sound event detection performance. Appl Acoust 196:108882. https:\/\/doi.org\/10.1016\/j.apacoust.2022.108882","journal-title":"Appl Acoust"},{"key":"5702_CR2","doi-asserted-by":"publisher","first-page":"103339","DOI":"10.1109\/ACCESS.2020.2999388","volume":"8","author":"TK Chan","year":"2020","unstructured":"Chan TK, Chin CS (2020) A comprehensive review of polyphonic sound event detection. IEEE Access 8:103339\u2013103373. https:\/\/doi.org\/10.1109\/ACCESS.2020.2999388","journal-title":"IEEE Access"},{"key":"5702_CR3","doi-asserted-by":"publisher","first-page":"4573","DOI":"10.1109\/TMM.2022.3178591","volume":"25","author":"S Park","year":"2023","unstructured":"Park S, Han DK, Elhilali M (2023) Cross-referencing self-training network for sound event detection in audio mixtures. IEEE Trans Multimed 25:4573\u20134585. https:\/\/doi.org\/10.1109\/TMM.2022.3178591","journal-title":"IEEE Trans Multimed"},{"issue":"1","key":"5702_CR4","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1121\/10.0011809","volume":"152","author":"P-A Grumiaux","year":"2022","unstructured":"Grumiaux P-A, Kiti\u0107 S, Girin L, Gu\u00e9rin A (2022) A survey of sound source localization with deep learning methods. The J Acoust Soc Am 152(1):107\u2013151. https:\/\/doi.org\/10.1121\/10.0011809","journal-title":"The J Acoust Soc Am"},{"key":"5702_CR5","doi-asserted-by":"publisher","unstructured":"Chen J, Liang H, Wang R, Zeng J, Lu P (2023) Dynamic interactive learning network for audio-visual event localization. Appl Intell, pp 1\u201312. https:\/\/doi.org\/10.1007\/s10489-023-05146-7","DOI":"10.1007\/s10489-023-05146-7"},{"issue":"7","key":"5702_CR6","doi-asserted-by":"publisher","first-page":"4631","DOI":"10.1007\/s11831-022-09747-2","volume":"29","author":"D Desai","year":"2022","unstructured":"Desai D, Mehendale N (2022) A review on sound source localization systems. Arch Comput Methods Eng 29(7):4631\u20134642. https:\/\/doi.org\/10.1007\/s11831-022-09747-2","journal-title":"Arch Comput Methods Eng"},{"key":"5702_CR7","doi-asserted-by":"publisher","first-page":"107372","DOI":"10.1016\/j.apacoust.2020.107372","volume":"166","author":"H Li","year":"2020","unstructured":"Li H, Lau S-K (2020) A review of audio-visual interaction on soundscape assessment in urban built environments. Appl Acoust 166:107372. https:\/\/doi.org\/10.1016\/j.apacoust.2020.107372","journal-title":"Appl Acoust"},{"key":"5702_CR8","doi-asserted-by":"publisher","first-page":"103360","DOI":"10.1016\/j.scs.2021.103360","volume":"75","author":"Z Li","year":"2021","unstructured":"Li Z, Ba M, Kang J (2021) Physiological indicators and subjective restorativeness with audio-visual interactions in urban soundscapes. Sustain Cities Soc 75:103360. https:\/\/doi.org\/10.1016\/j.scs.2021.103360","journal-title":"Sustain Cities Soc"},{"issue":"1","key":"5702_CR9","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1109\/TITS.2015.2470216","volume":"17","author":"P Foggia","year":"2016","unstructured":"Foggia P, Petkov N, Saggese A, Strisciuglio N, Vento M (2016) Audio surveillance of roads: A system for detecting anomalous sounds. IEEE Trans Intell Trans Sys 17(1):279\u2013288. https:\/\/doi.org\/10.1109\/TITS.2015.2470216","journal-title":"IEEE Trans Intell Trans Sys"},{"key":"5702_CR10","doi-asserted-by":"publisher","first-page":"103116","DOI":"10.1016\/j.jvcir.2021.103116","volume":"77","author":"O Elharrouss","year":"2021","unstructured":"Elharrouss O, Almaadeed N, Al-Maadeed SA (2021) A review of video surveillance systems. J Vis Commun Image Represent 77:103116. https:\/\/doi.org\/10.1016\/j.jvcir.2021.103116","journal-title":"J Vis Commun Image Represent"},{"issue":"1","key":"5702_CR11","doi-asserted-by":"publisher","first-page":"34","DOI":"10.1109\/JSTSP.2018.2885636","volume":"13","author":"S Adavanne","year":"2019","unstructured":"Adavanne S, Politis A, Nikunen J, Virtanen T (2019) Sound event localization and detection of overlapping sources using convolutional recurrent neural networks. IEEE J Sel Top Signal Process 13(1):34\u201348. https:\/\/doi.org\/10.1109\/JSTSP.2018.2885636","journal-title":"IEEE J Sel Top Signal Process"},{"key":"5702_CR12","doi-asserted-by":"publisher","first-page":"108961","DOI":"10.1016\/j.apacoust.2022.108961","volume":"199","author":"K Nagatomo","year":"2022","unstructured":"Nagatomo K, Yasuda M, Yatabe K, Saito S, Oikawa Y (2022) On-line sound event localization and detection for real-time recognition of surrounding environment. Appl Acoust 199:108961. https:\/\/doi.org\/10.1016\/j.apacoust.2022.108961","journal-title":"Appl Acoust"},{"key":"5702_CR13","doi-asserted-by":"publisher","first-page":"684","DOI":"10.1109\/TASLP.2020.3047233","volume":"29","author":"A Politis","year":"2021","unstructured":"Politis A, Mesaros A, Adavanne S, Heittola T, Virtanen T (2021) Overview and evaluation of sound event localization and detection in dcase 2019. IEEE\/ACM Trans Audio, Speech Lang Process 29:684\u2013698. https:\/\/doi.org\/10.1109\/TASLP.2020.3047233","journal-title":"IEEE\/ACM Trans Audio, Speech Lang Process"},{"key":"5702_CR14","unstructured":"Politis A, Shimada K, Sudarsanam P, Adavanne S, Krause D, Koyama Y, Takahashi N, Takahashi S, Mitsufuji Y, Virtanen T (2022) STARSS22: A dataset of spatial recordings of real scenes with spatiotemporal annotations of sound events. In: Proceedings of the 8th detection and classification of acoustic scenes and events 2022 workshop (DCASE2022), Nancy, France, pp 125\u2013129. https:\/\/dcase.community\/workshop2022\/proceedings"},{"key":"5702_CR15","doi-asserted-by":"publisher","unstructured":"Guizzo E, Marinoni C, Pennese M, Ren X, Zheng X, Zhang C, Masiero B, Uncini A, Comminiello D (2022) L3das22 challenge: Learning 3d audio sources in a real office environment. In: International conference on acoustics, speech and signal processing (ICASSP), pp 9186\u20139190. https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9746872","DOI":"10.1109\/ICASSP43922.2022.9746872"},{"key":"5702_CR16","unstructured":"Shimada K, Politis A, Sudarsanam P, Krause D.A, Uchida K, Adavanne S, Hakala A, Koyama Y, Takahashi N, Takahashi S, Virtanen T, Mitsufuji Y (2023) Starss23: An audio-visual dataset of spatial recordings of real scenes with spatiotemporal annotations of sound events. In: Advances in neural information processing systems, vol 36, pp 72931\u201372957. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/file\/e6c9671ed3b3106b71cafda3ba225c1a-Paper-Datasets_and_Benchmarks.pdf"},{"issue":"4","key":"5702_CR17","doi-asserted-by":"publisher","first-page":"042050","DOI":"10.1088\/1742-6596\/1213\/4\/042050","volume":"1213","author":"Y He","year":"2019","unstructured":"He Y, Zhao J (2019) Temporal convolutional networks for anomaly detection in time series. J Phys Conf Ser 1213(4):042050. https:\/\/doi.org\/10.1088\/1742-6596\/1213\/4\/042050","journal-title":"J Phys Conf Ser"},{"key":"5702_CR18","doi-asserted-by":"publisher","unstructured":"Mohimont L, Chemchem A, Alin F, Krajecki M, Steffenel LA (2021) Convolutional neural networks and temporal cnns for covid-19 forecasting in france. Appl Intell, pp 1\u201326. https:\/\/doi.org\/10.1007\/s10489-021-02359-6","DOI":"10.1007\/s10489-021-02359-6"},{"key":"5702_CR19","doi-asserted-by":"publisher","unstructured":"Zhu H, Yan J (2022) A deep learning based sound event location and detection algorithm using convolutional recurrent neural network. In: International conference on computer, information and telecommunication systems (CITS), pp 1\u20136. https:\/\/doi.org\/10.1109\/CITS55221.2022.9832991","DOI":"10.1109\/CITS55221.2022.9832991"},{"key":"5702_CR20","doi-asserted-by":"publisher","unstructured":"Cao Y, Kong Q, Iqbal T, An F, Wang W, Plumbley MD (2019) Polyphonic sound event detection and localization using a two-stage strategy. In: Proceedings of detection and classification of acoustic scenes and events workshop, pp 30\u201334. https:\/\/doi.org\/10.33682\/4jhy-bj81","DOI":"10.33682\/4jhy-bj81"},{"issue":"11","key":"5702_CR21","doi-asserted-by":"publisher","first-page":"8245","DOI":"10.1007\/s10489-021-02314-5","volume":"51","author":"Y Sudo","year":"2021","unstructured":"Sudo Y, Itoyama K, Nishida K, Nakadai K (2021) Multichannel environmental sound segmentation: with separately trained spectral and spatial features. Appl Intell 51(11):8245\u20138259. https:\/\/doi.org\/10.1007\/s10489-021-02314-5","journal-title":"Appl Intell"},{"issue":"6","key":"5702_CR22","doi-asserted-by":"publisher","first-page":"5015","DOI":"10.1007\/s10489-024-05438-6","volume":"54","author":"SG Kooolagudi","year":"2024","unstructured":"Kooolagudi SG et al (2024) Polyphonic sound event localization and detection using channel-wise fusionnet. Appl Intell 54(6):5015\u20135026. https:\/\/doi.org\/10.1007\/s10489-024-05438-6","journal-title":"Appl Intell"},{"key":"5702_CR23","doi-asserted-by":"publisher","unstructured":"Lee S-H, Hwang J-W, Song M-H, Park H-M (2022) A method based on dual cross-modal attention and parameter sharing for polyphonic sound event localization and detection. Appl Sci 12(10). https:\/\/doi.org\/10.3390\/app12105075","DOI":"10.3390\/app12105075"},{"key":"5702_CR24","doi-asserted-by":"publisher","unstructured":"Hu J, Cao Y, Wu M, Kong Q, Yang F, Plumbley MD, Yang J (2022) A track-wise ensemble event independent network for polyphonic sound event localization and detection. In: International conference on acoustics, speech and signal processing (ICASSP), pp 9196\u20139200. https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9747283","DOI":"10.1109\/ICASSP43922.2022.9747283"},{"key":"5702_CR25","doi-asserted-by":"publisher","unstructured":"Mao Y, Zeng Y, Liu H, Zhu W, Zhou Y (2022) Icassp 2022 l3das22 challenge: Ensemble of resnet-conformers with ambisonics data augmentation for sound event localization and detection. In: International conference on acoustics, speech and signal processing (ICASSP), pp 9191\u20139195. https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9746673","DOI":"10.1109\/ICASSP43922.2022.9746673"},{"key":"5702_CR26","doi-asserted-by":"publisher","unstructured":"Shimada K, Koyama Y, Takahashi N, Takahashi S, Mitsufuji Y (2021) Accdoa: Activity-coupled cartesian direction of arrival representation for sound event localization and detection. In: International conference on acoustics, speech and signal processing (ICASSP), pp 915\u2013919. https:\/\/doi.org\/10.1109\/ICASSP39728.2021.9413609","DOI":"10.1109\/ICASSP39728.2021.9413609"},{"issue":"8","key":"5702_CR27","doi-asserted-by":"publisher","first-page":"943","DOI":"10.1109\/89.966097","volume":"9","author":"Y Huang","year":"2001","unstructured":"Huang Y, Benesty J, Elko GW, Mersereati RM (2001) Real-time passive source localization: a practical linear-correction least-squares approach. IEEE Trans Speech Audio Process 9(8):943\u2013956. https:\/\/doi.org\/10.1109\/89.966097","journal-title":"IEEE Trans Speech Audio Process"},{"issue":"2","key":"5702_CR28","doi-asserted-by":"publisher","first-page":"1182","DOI":"10.1121\/10.0024764","volume":"155","author":"X Dang","year":"2024","unstructured":"Dang X, Zhu H (2024) An iteratively reweighted steered response power approach to multisource localization using a distributed microphone network. J Acoust Soc Am 155(2):1182\u20131197. https:\/\/doi.org\/10.1121\/10.0024764","journal-title":"J Acoust Soc Am"},{"key":"5702_CR29","doi-asserted-by":"publisher","first-page":"108884","DOI":"10.1016\/j.apacoust.2022.108884","volume":"196","author":"S Yin","year":"2022","unstructured":"Yin S, Yang Y, Chu Z, Shen L (2022) Resolution enhanced newtonized orthogonal matching pursuit solver for compressive beamforming. Appl Acoust 196:108884. https:\/\/doi.org\/10.1016\/j.apacoust.2022.108884","journal-title":"Appl Acoust"},{"key":"5702_CR30","doi-asserted-by":"publisher","first-page":"1352","DOI":"10.1109\/TASLP.2021.3067202","volume":"29","author":"BJ Cho","year":"2021","unstructured":"Cho BJ, Park H-M (2021) Convolutional maximum-likelihood distortionless response beamforming with steering vector estimation for robust speech recognition. IEEE\/ACM Tran Audio, Speech Lang Process 29:1352\u20131367. https:\/\/doi.org\/10.1109\/TASLP.2021.3067202","journal-title":"IEEE\/ACM Tran Audio, Speech Lang Process"},{"issue":"1","key":"5702_CR31","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1109\/JETCAS.2023.3243604","volume":"13","author":"P Schober","year":"2023","unstructured":"Schober P, Estiri SN, Aygun S, Jalilvand AH, Najafi MH, TaheriNejad N (2023) Stochastic computing design and implementation of a sound source localization system. IEEE J Emerg Sel Top Circ Syst 13(1):295\u2013311. https:\/\/doi.org\/10.1109\/JETCAS.2023.3243604","journal-title":"IEEE J Emerg Sel Top Circ Syst"},{"key":"5702_CR32","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1109\/TASLP.2020.3040031","volume":"29","author":"D Diaz-Guerra","year":"2021","unstructured":"Diaz-Guerra D, Miguel A, Beltran JR (2021) Robust sound source tracking using srp-phat and 3d convolutional neural networks. IEEE\/ACM Trans Audio, Speech Lang Process 29:300\u2013311. https:\/\/doi.org\/10.1109\/TASLP.2020.3040031","journal-title":"IEEE\/ACM Trans Audio, Speech Lang Process"},{"key":"5702_CR33","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1109\/TASLP.2022.3224282","volume":"31","author":"D Diaz-Guerra","year":"2023","unstructured":"Diaz-Guerra D, Miguel A, Beltran JR (2023) Direction of arrival estimation of sound sources using icosahedral cnns. IEEE\/ACM Trans Audio, Speech, Lang Process 31:313\u2013321. https:\/\/doi.org\/10.1109\/TASLP.2022.3224282","journal-title":"IEEE\/ACM Trans Audio, Speech, Lang Process"},{"key":"5702_CR34","doi-asserted-by":"publisher","unstructured":"Yang B, Liu H, Li X (2022) Srp-dnn: Learning direct-path phase difference for multiple moving sound source localization. In: International conference on acoustics, speech and signal processing (ICASSP), pp 721\u2013725. https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9746624","DOI":"10.1109\/ICASSP43922.2022.9746624"},{"key":"5702_CR35","doi-asserted-by":"publisher","unstructured":"Yang S-T, Jhou F-C, Wang J-C, Chang P-C (2021) Sound event localization and detection based on time-frequency separable convolutional compression network. In: 2021 IEEE 10th global conference on consumer electronics (GCCE), pp 432\u2013433. https:\/\/doi.org\/10.1109\/GCCE53005.2021.9622019","DOI":"10.1109\/GCCE53005.2021.9622019"},{"key":"5702_CR36","doi-asserted-by":"publisher","unstructured":"Sherstinsky A (2020) Fundamentals of recurrent neural network (rnn) and long short-term memory (lstm) network. Phys D: Nonlinear Phenom 404:132306. https:\/\/doi.org\/10.1016\/j.physd.2019.132306","DOI":"10.1016\/j.physd.2019.132306"},{"key":"5702_CR37","doi-asserted-by":"publisher","unstructured":"Shimada K, Koyama Y, Takahashi S, Takahashi N, Tsunoo E, Mitsufuji Y (2022) Multi-accdoa: Localizing and detecting overlapping sounds from the same class with auxiliary duplicating permutation invariant training. In: International conference on acoustics, speech and signal processing (ICASSP), pp 316\u2013320. https:\/\/doi.org\/10.1109\/ICASSP43922.2022.9746384","DOI":"10.1109\/ICASSP43922.2022.9746384"},{"key":"5702_CR38","doi-asserted-by":"publisher","unstructured":"Scheibler R, Komatsu T, Fujita Y, Hentschel M (2022) On sorting and padding multiple targets for sound event localization and detection with permutation invariant and location-based training. In: Asia-Pacific signal and information processing association annual summit and conference (APSIPA ASC), pp 1\u20136. https:\/\/doi.org\/10.23919\/APSIPAASC55919.2022.9979815","DOI":"10.23919\/APSIPAASC55919.2022.9979815"},{"key":"5702_CR39","doi-asserted-by":"publisher","first-page":"829","DOI":"10.1109\/TASLP.2021.3133208","volume":"30","author":"E Fonseca","year":"2022","unstructured":"Fonseca E, Favory X, Pons J, Font F, Serra X (2022) Fsd50k: An open dataset of human-labeled sound events. IEEE\/ACM Transactions on Audio, Speech, and Language Processing 30:829\u2013852. https:\/\/doi.org\/10.1109\/TASLP.2021.3133208","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"5702_CR40","unstructured":"Kumar P, Kumar A, Choudhary S, Prakash J, Kumar S (2023) A framework for seld using conformer and multi-accdoa strategies. Technical report, DCASE2023 Challenge. https:\/\/dcase.community\/documents\/challenge2023\/technical_reports\/DCASE2023_Kumar_85_t3a.pdf"}],"container-title":["Applied Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05702-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10489-024-05702-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10489-024-05702-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,9,18]],"date-time":"2024-09-18T15:24:55Z","timestamp":1726673095000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10489-024-05702-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,20]]},"references-count":40,"journal-issue":{"issue":"22","published-print":{"date-parts":[[2024,11]]}},"alternative-id":["5702"],"URL":"https:\/\/doi.org\/10.1007\/s10489-024-05702-9","relation":{},"ISSN":["0924-669X","1573-7497"],"issn-type":[{"type":"print","value":"0924-669X"},{"type":"electronic","value":"1573-7497"}],"subject":[],"published":{"date-parts":[[2024,8,20]]},"assertion":[{"value":"23 July 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 August 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conficts.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}}]}}