{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T21:03:32Z","timestamp":1767647012099,"version":"3.48.0"},"reference-count":64,"publisher":"Springer Science and Business Media LLC","issue":"10","license":[{"start":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T00:00:00Z","timestamp":1763510400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"},{"start":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T00:00:00Z","timestamp":1763510400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J. King Saud Univ. Comput. Inf. Sci."],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s44443-025-00356-0","type":"journal-article","created":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T10:22:37Z","timestamp":1763547757000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Learning discriminative representations from integrated features for DOA estimation"],"prefix":"10.1007","volume":"37","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-7578-3843","authenticated-orcid":false,"given":"Qi","family":"You","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6332-895X","authenticated-orcid":false,"given":"Qinghua","family":"Huang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,19]]},"reference":[{"key":"356_CR1","doi-asserted-by":"publisher","first-page":"943","DOI":"10.1121\/1.382599","volume":"65","author":"J Allen","year":"1979","unstructured":"Allen J, Berkley D (1979) Image method for efficiently simulating small-room acoustics. J Acoust Soc Amer 65:943\u2013950. https:\/\/doi.org\/10.1121\/1.382599","journal-title":"J Acoust Soc Amer"},{"issue":"5","key":"356_CR2","doi-asserted-by":"publisher","first-page":"20","DOI":"10.1109\/MSP.2019.2926573","volume":"36","author":"I Bilik","year":"2019","unstructured":"Bilik I, Longman O, Villeval S, Tabrikian J (2019) The rise of radar for autonomous vehicles: signal processing solutions and future research directions. IEEE Signal Process Mag 36(5):20\u201331. https:\/\/doi.org\/10.1109\/MSP.2019.2926573","journal-title":"IEEE Signal Process Mag"},{"key":"356_CR3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-662-04619-7","author":"M Brandstein","year":"2001","unstructured":"Brandstein M, Ward D (2001) Microphone arrays: Signal processing techniques and applications. Springer Berlin Germany. https:\/\/doi.org\/10.1007\/978-3-662-04619-7","journal-title":"Springer Berlin Germany"},{"key":"356_CR4","doi-asserted-by":"publisher","unstructured":"Cao F, Guo D, Wang T, Yao H, Li J, Qin C (2024) Universal screen-shooting robust image watermarking with channel-attention in dct domain. Expert Syst Appl 238:122062. https:\/\/doi.org\/10.1016\/j.eswa.2023.122062","DOI":"10.1016\/j.eswa.2023.122062"},{"key":"356_CR5","doi-asserted-by":"publisher","unstructured":"Cao Y, Iqbal T, Kong Q, An F, Wang W, Plumbley MD (2021) An improved event-independent network for polyphonic sound event localization and detection. In: Proc. IEEE Int. Conf. Acoust., Speech Signal Process. pp 885\u2013889. https:\/\/doi.org\/10.1109\/ICASSP39728.2021.9413473","DOI":"10.1109\/ICASSP39728.2021.9413473"},{"key":"356_CR6","doi-asserted-by":"publisher","unstructured":"Chen KL, Lee CH, Rao BD, Garudadri H (2023) A DNN based normalized time-frequency weighted criterion for robust wideband DOA estimation. In: Proc. IEEE Int. Conf. Acoust., Speech Signal Process. pp 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10096809","DOI":"10.1109\/ICASSP49357.2023.10096809"},{"key":"356_CR7","doi-asserted-by":"publisher","unstructured":"Cooreman P, Bohlender A, Madhu N (2023) CRNN-based multi-doa estimator: Comparing classification and regression. In: Proc. IEEE Speech Commun. pp 156\u2013160. https:\/\/doi.org\/10.30420\/456164030","DOI":"10.30420\/456164030"},{"issue":"4","key":"356_CR8","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2871183","volume":"48","author":"M Crocco","year":"2016","unstructured":"Crocco M, Cristani M, Trucco A, Murino V (2016) Audio surveillance: a systematic review. ACM Comput Surv 48(4):1\u201346. https:\/\/doi.org\/10.1145\/2871183","journal-title":"ACM Comput Surv"},{"key":"356_CR9","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1109\/TASLP.2020.3040031","volume":"29","author":"D Diaz-Guerra","year":"2021","unstructured":"Diaz-Guerra D, Miguel A, Beltran JR (2021) Robust sound source tracking using SRP-PHAT and 3D convolutional neural networks. IEEE\/ACM Trans Audio Speech Lang Process 29:300\u2013311. https:\/\/doi.org\/10.1109\/TASLP.2020.3040031","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"356_CR10","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1109\/TASLP.2022.3224282","volume":"31","author":"D Diaz-Guerra","year":"2023","unstructured":"Diaz-Guerra D, Miguel A, Beltran JR (2023) Direction of arrival estimation of sound sources using icosahedral CNNs. IEEE\/ACM Trans Audio Speech Lang Process 31:313\u2013321. https:\/\/doi.org\/10.1109\/TASLP.2022.3224282","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"356_CR11","doi-asserted-by":"publisher","unstructured":"Diaz-Guerra D, Miguel A, Beltran JR (2021) gpuRIR: a python library for room impulse response simulation with GPU acceleration. Multimed Tools Appl 80(4):5653\u20135671. https:\/\/doi.org\/10.1007\/s11042-020-09905-3, arXiv:1810.11359 [cs, eess]","DOI":"10.1007\/s11042-020-09905-3"},{"key":"356_CR12","doi-asserted-by":"publisher","unstructured":"Dmochowski JP, Benesty J, Affes S (2007) Broadband music: Opportunities and challenges for multiple source localization. In: 2007 IEEE ASSP Workshop Appl Signal Process Audio Acoust. pp 18\u201321. https:\/\/doi.org\/10.1109\/ASPAA.2007.4392978","DOI":"10.1109\/ASPAA.2007.4392978"},{"key":"356_CR13","doi-asserted-by":"publisher","unstructured":"Dong Y, Wang Q, Hong H, Jiang Y, Cheng S (2025) An experimental study on joint modeling for sound event localization and detection with source distance estimation. In: Proc. IEEE Int. Conf. Acoust., Speech Signal Process. pp 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49660.2025.10887663","DOI":"10.1109\/ICASSP49660.2025.10887663"},{"key":"356_CR14","doi-asserted-by":"publisher","unstructured":"Evers C, Loellmann H, Mellmann H, Schmidt A, Barfuss H, Naylor P, Kellermann W (2020) The LOCATA challenge: acoustic source localization and tracking. IEEE\/ACM Trans Audio Speech Lang Process 28:1620\u20131643. https:\/\/doi.org\/10.1109\/TASLP.2020.2990485, arXiv:1909.01008 [cs, eess]","DOI":"10.1109\/TASLP.2020.2990485"},{"key":"356_CR15","doi-asserted-by":"publisher","unstructured":"Feng L, Gong Y, Zhang XL (2023) Soft label coding for end-to-end sound source localization with ad-hoc microphone arrays. In: Proc. IEEE Int. Conf. Acoust., Speech Signal Process. pp 1\u20135. https:\/\/doi.org\/10.1109\/ICASSP49357.2023.10094647","DOI":"10.1109\/ICASSP49357.2023.10094647"},{"issue":"4","key":"356_CR16","doi-asserted-by":"publisher","first-page":"2743","DOI":"10.1121\/10.0032395","volume":"156","author":"X Fu","year":"2024","unstructured":"Fu X, Sun D, Teng T (2024) A high-resolution method for direction of arrival estimation based on an improved self-attention module. J Acoust Soc Amer 156(4):2743\u20132758. https:\/\/doi.org\/10.1121\/10.0032395","journal-title":"J Acoust Soc Amer"},{"key":"356_CR17","doi-asserted-by":"publisher","unstructured":"Gong Y, Yu X, Ding Y, Peng X, Zhao J, Han Z (2021) Effective fusion factor in fpn for tiny object detection. In: 2021 IEEE Winter Conference on Applications of Computer Vision (WACV). pp 1159\u20131167. https:\/\/doi.org\/10.1109\/WACV48630.2021.00120","DOI":"10.1109\/WACV48630.2021.00120"},{"key":"356_CR18","doi-asserted-by":"publisher","unstructured":"Guirguis K, Schorn C, Guntoro A, Abdulatif S, Yang B (2021) SELD-TCN: Sound event localization & detection via temporal convolutional networks. In: Proc. 28th Eur. Signal Process. Conf. (EUSIPCO). pp 16\u201320. https:\/\/doi.org\/10.23919\/Eusipco47968.2020.9287716","DOI":"10.23919\/Eusipco47968.2020.9287716"},{"issue":"3","key":"356_CR19","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1007\/s41095-022-0271-y","volume":"8","author":"MH Guo","year":"2022","unstructured":"Guo MH, Xu TX, Liu JJ, Liu ZN, Jiang PT, Mu TJ, Zhang SH, Martin RR, Cheng MM, Hu SM (2022) Attention mechanisms in computer vision: a survey. Comp Visual Media 8(3):331\u2013368. https:\/\/doi.org\/10.1007\/s41095-022-0271-y","journal-title":"Comp Visual Media"},{"key":"356_CR20","doi-asserted-by":"publisher","unstructured":"Hadad E, Gannot S (2018) Multi-speaker direction of arrival estimation using srp-phat algorithm with a weighted histogram. In: 2018 IEEE Int. Conf. Sci. Electr. Eng. Israel, ICSEE. pp 1\u20135. https:\/\/doi.org\/10.1109\/ICSEE.2018.8646206","DOI":"10.1109\/ICSEE.2018.8646206"},{"key":"356_CR21","doi-asserted-by":"publisher","unstructured":"Huang Q, Fang W (2022) DOA estimation using two independent convolutional neural networks with residual blocks. Digit Signal Process 131:103765. https:\/\/doi.org\/10.1016\/j.dsp.2022.103765","DOI":"10.1016\/j.dsp.2022.103765"},{"key":"356_CR22","doi-asserted-by":"publisher","unstructured":"Huang Y, Chen Z, Li D, Ye K, Zhou L, Hong S (2025) An efficient sparse Bayesian doa estimator based on fixed-point method for off-grid targets. Expert Syst Appl 269:126424. https:\/\/doi.org\/10.1016\/j.eswa.2025.126424","DOI":"10.1016\/j.eswa.2025.126424"},{"issue":"10","key":"356_CR23","doi-asserted-by":"publisher","first-page":"1901","DOI":"10.1109\/TASLP.2017.2726762","volume":"25","author":"M Kolb\u00e6k","year":"2017","unstructured":"Kolb\u00e6k M, Yu D, Tan ZH, Jensen J (2017) Multitalker speech separation with utterance-level permutation invariant training of deep recurrent neural networks. IEEE\/ACM Trans Audio Speech Lang Process 25(10):1901\u20131913. https:\/\/doi.org\/10.1109\/TASLP.2017.2726762","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"issue":"6","key":"356_CR24","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1007\/s44443-025-00132-0","volume":"37","author":"C Li","year":"2025","unstructured":"Li C, Du X, Hou X, Mai Y (2025) An indoor positioning optimization method based on dimensionality reduction and clustering fusion. J King Saud Univ Comput Inf 37(6):118. https:\/\/doi.org\/10.1007\/s44443-025-00132-0","journal-title":"J King Saud Univ Comput Inf"},{"key":"356_CR25","doi-asserted-by":"publisher","unstructured":"Li Y, Chen H, Feng G, Miao Q (2023b) Learning robust representations with information bottleneck and memory network for RGB-D-based gesture recognition. In: Proc. IEEE\/CVF Int. Conf. Comput. Vis. pp 20911\u201320921. https:\/\/doi.org\/10.1109\/ICCV51070.2023.01917","DOI":"10.1109\/ICCV51070.2023.01917"},{"key":"356_CR26","doi-asserted-by":"publisher","unstructured":"Li J, Han J, Deng S, Zheng T, He Y, Zheng G (2023a) Mutual information-based embedding decoupling for generalizable speaker verification. In: Proc. Interspeech. pp 3147\u20133151. https:\/\/doi.org\/10.21437\/Interspeech.2023-1314","DOI":"10.21437\/Interspeech.2023-1314"},{"issue":"9","key":"356_CR27","doi-asserted-by":"publisher","first-page":"8622","DOI":"10.1109\/JSEN.2022.3155706","volume":"22","author":"H Liu","year":"2022","unstructured":"Liu H, Zhou J, Xi G, Peng B, Zhang S, Xiao Q (2022) Research on acoustic events recognition method with dimensionality reduction combining attention and mutual information. IEEE Sensors J 22(9):8622\u20138632. https:\/\/doi.org\/10.1109\/JSEN.2022.3155706","journal-title":"IEEE Sensors J"},{"key":"356_CR28","doi-asserted-by":"publisher","unstructured":"Liu K, Wang X, Yu J, Ma J (2023) Attention based DOA estimation in the presence of unknown nonuniform noise. Appl Acoust 211:109506. https:\/\/doi.org\/10.1016\/j.apacoust.2023.109506","DOI":"10.1016\/j.apacoust.2023.109506"},{"issue":"1","key":"356_CR29","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1007\/s11760-024-03647-7","volume":"19","author":"K Liu","year":"2025","unstructured":"Liu K, Fu Y, Ma J (2025) Multi-scale feature fusion based DOA and range estimation for near-field sources. Signal Image Video Process 19(1):4. https:\/\/doi.org\/10.1007\/s11760-024-03647-7","journal-title":"Signal Image Video Process"},{"key":"356_CR30","doi-asserted-by":"publisher","unstructured":"Mack W, Wechsler J, Habets EA (2022) Signal-aware direction-of-arrival estimation using attention mechanisms. Comput Speech Lang 75:101363. https:\/\/doi.org\/10.1016\/j.csl.2022.101363","DOI":"10.1016\/j.csl.2022.101363"},{"key":"356_CR31","doi-asserted-by":"publisher","unstructured":"Panayotov V, Chen G, Povey D, Khudanpur S (2015) Librispeech: An ASR corpus based on public domain audio books. In: Proc. IEEE Int. Conf. Acoust., Speech Signal Process. pp 5206\u20135210. https:\/\/doi.org\/10.1109\/ICASSP.2015.7178964","DOI":"10.1109\/ICASSP.2015.7178964"},{"issue":"1","key":"356_CR32","doi-asserted-by":"publisher","first-page":"22","DOI":"10.1109\/JSTSP.2019.2900164","volume":"13","author":"L Perotin","year":"2019","unstructured":"Perotin L, Serizel R, Vincent E, Guerin A (2019) CRNN-based multiple DOA estimation using acoustic intensity features for ambisonics recordings. IEEE J Sel Top Signal Process 13(1):22\u201333. https:\/\/doi.org\/10.1109\/JSTSP.2019.2900164","journal-title":"IEEE J Sel Top Signal Process"},{"key":"356_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2024.3522632","volume":"74","author":"R Pi","year":"2025","unstructured":"Pi R, Yu X (2025) Uncertainty estimation for sound source localization with deep learning. IEEE Trans Instrum Meas 74:1\u201312. https:\/\/doi.org\/10.1109\/TIM.2024.3522632","journal-title":"IEEE Trans Instrum Meas"},{"key":"356_CR34","doi-asserted-by":"publisher","first-page":"550","DOI":"10.1109\/TASLP.2022.3226330","volume":"31","author":"X Qian","year":"2023","unstructured":"Qian X, Wang Z, Wang J, Guan G, Li H (2023) Audio-visual cross-attention network for robotic speaker tracking. IEEE\/ACM Trans Audio Speech Lang Process 31:550\u2013562. https:\/\/doi.org\/10.1109\/TASLP.2022.3226330","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"356_CR35","doi-asserted-by":"publisher","unstructured":"Ravanelli M, Bengio Y (2019) Learning speaker representations with mutual information. In: Kubin G, Kacic Z (eds) Proc. 20th Annu. Conf. Int. Speech Commun. Assoc. ISCA. pp 1153\u20131157. https:\/\/doi.org\/10.21437\/Interspeech.2019-2380","DOI":"10.21437\/Interspeech.2019-2380"},{"key":"356_CR36","doi-asserted-by":"publisher","unstructured":"Roman AS, Roman IR, Bello JP (2024) Robust DOA estimation from deep acoustic imaging. In: Proc. IEEE Int. Conf. Acoust., Speech Signal Process. pp 1321\u20131325. https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10447883","DOI":"10.1109\/ICASSP48485.2024.10447883"},{"issue":"7","key":"356_CR37","doi-asserted-by":"publisher","first-page":"984","DOI":"10.1109\/29.32276","volume":"37","author":"R Roy","year":"1989","unstructured":"Roy R, Kailath T (1989) ESPRIT-estimation of signal parameters via rotational invariance techniques. IEEE Trans Acoust Speech Signal Process 37(7):984\u2013995. https:\/\/doi.org\/10.1109\/29.32276","journal-title":"IEEE Trans Acoust Speech Signal Process"},{"key":"356_CR38","doi-asserted-by":"publisher","unstructured":"Sanghi A (2020) Info3D: Representation learning on 3D objects using mutual information maximization and contrastive learning. In: Eur. Conf. Comput. Vis. pp 626\u2013642. https:\/\/doi.org\/10.1007\/978-3-030-58526-6_37","DOI":"10.1007\/978-3-030-58526-6_37"},{"key":"356_CR39","doi-asserted-by":"publisher","unstructured":"Scheibler R, Bezzam E, Dokmani\u0107 I (2018) Pyroomacoustics: A python package for audio room simulation and array processing algorithms. In: Proc. IEEE Int. Conf. Acoust., Speech Signal Process. pp 351\u2013355. https:\/\/doi.org\/10.1109\/ICASSP.2018.8461310","DOI":"10.1109\/ICASSP.2018.8461310"},{"issue":"3","key":"356_CR40","doi-asserted-by":"publisher","first-page":"276","DOI":"10.1109\/TAP.1986.1143830","volume":"34","author":"R Schmidt","year":"1986","unstructured":"Schmidt R (1986) Multiple emitter location and signal parameter estimation. IEEE Trans Antennas Propag 34(3):276\u2013280. https:\/\/doi.org\/10.1109\/TAP.1986.1143830","journal-title":"IEEE Trans Antennas Propag"},{"key":"356_CR41","doi-asserted-by":"publisher","unstructured":"SongGong K, Zhang P, Zhang X, Sun M, Wang W (2024) Multi-speaker localization in the circular harmonic domain on small aperture microphone arrays using deep convolutional networks. In: Proc. IEEE Int. Conf. Acoust., Speech Signal Process. pp 8586\u20138590. https:\/\/doi.org\/10.1109\/ICASSP48485.2024.10448198","DOI":"10.1109\/ICASSP48485.2024.10448198"},{"key":"356_CR42","doi-asserted-by":"publisher","unstructured":"Subramanian AS, Weng C, Watanabe S, Yu M, Yu D (2022) Deep learning based multi-source localization with source splitting and its effectiveness in multi-talker speech recognition. Comput Speech Lang 75:101360. https:\/\/doi.org\/10.1016\/j.csl.2022.101360","DOI":"10.1016\/j.csl.2022.101360"},{"key":"356_CR43","doi-asserted-by":"publisher","unstructured":"Tang X, Huang J, Lin Y, Dang T, Cheng J (2025) Speech emotion recognition via cnn-transformer and multidimensional attention mechanism. Speech Commun 171:103242. https:\/\/doi.org\/10.1016\/j.specom.2025.103242","DOI":"10.1016\/j.specom.2025.103242"},{"key":"356_CR44","doi-asserted-by":"publisher","unstructured":"Tolie HF, Ren J, Elyan E (2024) Dicam: Deep inception and channel-wise attention modules for underwater image enhancement. Neurocomputing 584:127585. https:\/\/doi.org\/10.1016\/j.neucom.2024.127585","DOI":"10.1016\/j.neucom.2024.127585"},{"issue":"2","key":"356_CR45","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1109\/53.665","volume":"5","author":"B Van Veen","year":"1988","unstructured":"Van Veen B, Buckley K (1988) Beamforming: a versatile approach to spatial filtering. IEEE ASSP Mag 5(2):4\u201324. https:\/\/doi.org\/10.1109\/53.665","journal-title":"IEEE ASSP Mag"},{"key":"356_CR46","doi-asserted-by":"publisher","unstructured":"Varga A, Steeneken HJ (1993) Assessment for automatic speech recognition: II. NOISEX-92: a database and an experiment to study the effect of additive noise on speech recognition systems. Speech Commun 12(3):247\u2013251. https:\/\/doi.org\/10.1016\/0167-6393(93)90095-3","DOI":"10.1016\/0167-6393(93)90095-3"},{"key":"356_CR47","doi-asserted-by":"publisher","first-page":"5051","DOI":"10.1109\/TASLP.2024.3507560","volume":"32","author":"Y Wang","year":"2024","unstructured":"Wang Y, Yang B, Li X (2024) IPDnet: a universal direct-path IPD estimation network for sound source localization. IEEE\/ACM Trans Audio Speech Lang Process 32:5051\u20135064. https:\/\/doi.org\/10.1109\/TASLP.2024.3507560","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"356_CR48","doi-asserted-by":"publisher","unstructured":"Wang Y, Yang B, Li X (2023) FN-SSL: Full-band and narrow-band fusion for sound source localization. In: Proc. Interspeech. pp 3779\u20133783. https:\/\/doi.org\/10.21437\/Interspeech.2023-714","DOI":"10.21437\/Interspeech.2023-714"},{"issue":"6","key":"356_CR49","doi-asserted-by":"publisher","first-page":"9103","DOI":"10.1109\/TVT.2024.3355970","volume":"73","author":"X Wu","year":"2024","unstructured":"Wu X, Wang J, Yang X, Tian F (2024) A gridless doa estimation method based on residual attention network and transfer learning. IEEE Trans Veh Technol 73(6):9103\u20139108. https:\/\/doi.org\/10.1109\/TVT.2024.3355970","journal-title":"IEEE Trans Veh Technol"},{"key":"356_CR50","doi-asserted-by":"publisher","unstructured":"Wu Y, He K (2018) Group normalization. In: Pro. Eur. Conf. Comput. Vis. pp 3\u201319. https:\/\/doi.org\/10.1007\/s11263-019-01198-w","DOI":"10.1007\/s11263-019-01198-w"},{"key":"356_CR51","doi-asserted-by":"publisher","unstructured":"Xiao X, Zhao S, Zhong X, Jones DL, Chng ES, Li H (2015) A learning-based approach to direction of arrival estimation in noisy and reverberant environments. In: Proc. IEEE Int. Conf. Acoust., Speech Signal Process. pp 2814\u20132818. https:\/\/doi.org\/10.1109\/ICASSP.2015.7178484","DOI":"10.1109\/ICASSP.2015.7178484"},{"issue":"12","key":"356_CR52","doi-asserted-by":"publisher","first-page":"20240","DOI":"10.1109\/JSEN.2024.3396337","volume":"24","author":"X Xu","year":"2024","unstructured":"Xu X, Huang Q (2024) MD-DOA: a model-based deep learning DOA estimation architecture. IEEE Sensors J 24(12):20240\u201320253. https:\/\/doi.org\/10.1109\/JSEN.2024.3396337","journal-title":"IEEE Sensors J"},{"key":"356_CR53","doi-asserted-by":"publisher","unstructured":"Yan Y, Huang Q (2025) Robust DOA estimation: cross-branch fused multi-stream network with feature enhancement. Circ Syst Signal Process 44(3):2145\u20132166. https:\/\/doi.org\/10.1007\/s00034-024-02921-w","DOI":"10.1007\/s00034-024-02921-w"},{"key":"356_CR54","doi-asserted-by":"publisher","first-page":"3491","DOI":"10.1109\/TASLP.2021.3120641","volume":"29","author":"B Yang","year":"2021","unstructured":"Yang B, Liu H, Li X (2021) Learning deep direct-path relative transfer function for binaural sound source localization. IEEE\/ACM Trans Audio Speech Lang Process 29:3491\u20133503. https:\/\/doi.org\/10.1109\/TASLP.2021.3120641","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"issue":"6","key":"356_CR55","doi-asserted-by":"publisher","first-page":"3444","DOI":"10.1121\/10.0016467","volume":"152","author":"Y Yang","year":"2022","unstructured":"Yang Y, Chen H, Zhang P (2022) A stacked self-attention network for two-dimensional direction-of-arrival estimation in hands-free speech communication. J Acoust Soc Amer 152(6):3444\u20133457. https:\/\/doi.org\/10.1121\/10.0016467","journal-title":"J Acoust Soc Amer"},{"key":"356_CR56","doi-asserted-by":"publisher","unstructured":"Yin H, Ge M, Fu Y, Zhang G, Wang L, Zhang L, Qiu L, Dang J (2022) MIMO-DOAnet: Multi-channel input and multiple outputs DOA network with unknown number of sound sources. In: Proc. Interspeech, pp 891\u2013895, https:\/\/doi.org\/10.21437\/Interspeech.2022-10493, arXiv:2207.07307","DOI":"10.21437\/Interspeech.2022-10493"},{"key":"356_CR57","doi-asserted-by":"publisher","first-page":"462","DOI":"10.1109\/TASLP.2022.3225649","volume":"31","author":"Q Zhang","year":"2023","unstructured":"Zhang Q, Qian X, Ni Z, Nicolson A, Ambikairajah E, Li H (2023) A time-frequency attention module for neural speech enhancement. IEEE\/ACM Trans Audio Speech Lang Process 31:462\u2013475","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"356_CR58","doi-asserted-by":"publisher","unstructured":"Zhang K, Tong H, Tao J, Li B, Zhou Q (2025) A high-precision DOA estimator for low-frequency signals using ultra-dense small-aperture microphone arrays. Meas Sci Technol 36(1):016341. https:\/\/doi.org\/10.1088\/1361-6501\/ad9bdc","DOI":"10.1088\/1361-6501\/ad9bdc"},{"key":"356_CR59","doi-asserted-by":"publisher","unstructured":"Zhang C, Florencio D, Zhang Z (2008) Why does PHAT work well in lownoise, reverberative environments? In: Proc. IEEE Int. Conf. Acoust., Speech Signal Process. pp 2565\u20132568. https:\/\/doi.org\/10.1109\/ICASSP.2008.4518172","DOI":"10.1109\/ICASSP.2008.4518172"},{"key":"356_CR60","doi-asserted-by":"publisher","unstructured":"Zhao S, Ahmed S, Liang Y, Rupnow K, Chen D, Jones DL (2012) A real-time 3D sound localization system with miniature microphone array for virtual reality. In: Proc. IEEE Conf. Ind. Electron. Appl. (ICIEA). pp 1853\u20131857. https:\/\/doi.org\/10.1109\/ICIEA.2012.6361029","DOI":"10.1109\/ICIEA.2012.6361029"},{"key":"356_CR61","doi-asserted-by":"publisher","unstructured":"Zhou M, Yan K, Huang J, Yang Z, Fu X, Zhao F (2022) Mutual information-driven pan-sharpening. In: IEEE Conf. Comput. Vis. Pattern Recognit. pp 1788\u20131798. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00184","DOI":"10.1109\/CVPR52688.2022.00184"},{"key":"356_CR62","doi-asserted-by":"publisher","first-page":"1935","DOI":"10.1109\/LSP.2024.3428361","volume":"31","author":"B Zhu","year":"2024","unstructured":"Zhu B, Zhang W, Chen J, Zhu M, Li C (2024) Multi-source DOA estimation using higher-order pseudo intensity vector on a spherical microphone array. IEEE Signal Process Lett 31:1935\u20131939. https:\/\/doi.org\/10.1109\/LSP.2024.3428361","journal-title":"IEEE Signal Process Lett"},{"key":"356_CR63","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TIM.2023.3348907","volume":"73","author":"XC Zhu","year":"2024","unstructured":"Zhu XC, Zhang H, Feng HT, Zhao DH, Zhang XJ, Tao Z (2024) IFAN: an icosahedral feature attention network for sound source localization. IEEE Trans Instrum Meas 73:1\u201313. https:\/\/doi.org\/10.1109\/TIM.2023.3348907","journal-title":"IEEE Trans Instrum Meas"},{"key":"356_CR64","doi-asserted-by":"publisher","unstructured":"Zhu X, Cheng D, Zhang Z, Lin S, Dai J (2019) An empirical study of spatial attention mechanisms in deep networks. In: Proc. IEEE\/CVF Int. Conf. Comput. Vis. pp 6687\u20136696. https:\/\/doi.org\/10.1109\/ICCV.2019.00679","DOI":"10.1109\/ICCV.2019.00679"}],"container-title":["Journal of King Saud University Computer and Information Sciences"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44443-025-00356-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s44443-025-00356-0","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s44443-025-00356-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,5]],"date-time":"2026-01-05T18:48:16Z","timestamp":1767638896000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s44443-025-00356-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,19]]},"references-count":64,"journal-issue":{"issue":"10","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["356"],"URL":"https:\/\/doi.org\/10.1007\/s44443-025-00356-0","relation":{},"ISSN":["1319-1578","2213-1248"],"issn-type":[{"type":"print","value":"1319-1578"},{"type":"electronic","value":"2213-1248"}],"subject":[],"published":{"date-parts":[[2025,11,19]]},"assertion":[{"value":"2 August 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 October 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 November 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}},{"value":"This article does not contain any studies with human participants or animals performed by any of the authors.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics Approval and Consent to Participate"}}],"article-number":"323"}}