{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,9]],"date-time":"2026-04-09T08:06:54Z","timestamp":1775722014884,"version":"3.50.1"},"reference-count":152,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2021,12,27]],"date-time":"2021-12-27T00:00:00Z","timestamp":1640563200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,12,27]],"date-time":"2021-12-27T00:00:00Z","timestamp":1640563200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s11042-021-11817-9","type":"journal-article","created":{"date-parts":[[2021,12,27]],"date-time":"2021-12-27T20:03:21Z","timestamp":1640635401000},"page":"5537-5586","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":63,"title":["Anomalous sound event detection: A survey of machine learning based methods and applications"],"prefix":"10.1007","volume":"81","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8929-3609","authenticated-orcid":false,"given":"Zied","family":"Mnasri","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stefano","family":"Rovetta","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Francesco","family":"Masulli","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,12,27]]},"reference":[{"issue":"4","key":"11817_CR1","first-page":"e1258","volume":"8","author":"A Abdullatif","year":"2018","unstructured":"Abdullatif A, Masulli F, Rovetta S (2018) Clustering of nonstationary data streams: A survey of fuzzy partitional methods. Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery 8(4):e1258","journal-title":"Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery"},{"key":"11817_CR2","unstructured":"Adavanne S, Parascandolo G, Pertila P, Heittola T, Virtanen T (2016) Sound event detection in multichannel audio using spatial and harmonic features. In: Proceedings of the Detection and Classification of Acoustic Scenes and Events 2016 Workshop (DCASE2016), pp 6\u201310"},{"key":"11817_CR3","doi-asserted-by":"crossref","unstructured":"Adavanne S, Pertil\u00e4 P, Virtanen T (2017) Sound event detection using spatial features and convolutional recurrent neural network. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 771\u2013775","DOI":"10.1109\/ICASSP.2017.7952260"},{"key":"11817_CR4","unstructured":"Adavanne S, Virtanen T (2020) A report on sound event detection with different binaural features. In: Proceedings of the Detection and Classification of Acoustic Scenes and Events 2017 Workshop (DCASE2017). Munich, Germany"},{"key":"11817_CR5","doi-asserted-by":"crossref","unstructured":"Ahn JW, Grueneberg K, Ko BJ, Lee WH, Morales E, Wang S, Wang X, Wood D (2019) Acoustic anomaly detection system: demo abstract. In: Proceedings of the 17th Conference on Embedded Networked Sensor Systems, pp 378\u2013379","DOI":"10.1145\/3356250.3361963"},{"issue":"11","key":"11817_CR6","doi-asserted-by":"crossref","first-page":"2480","DOI":"10.3390\/s19112480","volume":"19","author":"RM Alsina-Pag\u00e8s","year":"2019","unstructured":"Alsina-Pag\u00e8s RM, Orga F, Al\u00edas F, Socor\u00f3 JC (2019) A wasn-based suburban dataset for anomalous noise event detection on dynamic road-traffic noise mapping. Sensors 19(11):2480","journal-title":"Sensors"},{"key":"11817_CR7","doi-asserted-by":"crossref","unstructured":"Arora V, Sun M, Wang C (2019) Deep embeddings for rare audio event detection with imbalanced data. In: ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 3297\u20133301","DOI":"10.1109\/ICASSP.2019.8682395"},{"key":"11817_CR8","doi-asserted-by":"crossref","unstructured":"Atrey PK, Maddage NC, Kankanhalli MS (2006) Audio based event detection for multimedia surveillance. In: 2006 IEEE International Conference on Acoustics Speech and Signal Processing Proceedings, vol 5. IEEE, pp 813\u2013816","DOI":"10.1109\/ICASSP.2006.1661400"},{"key":"11817_CR9","doi-asserted-by":"crossref","unstructured":"Aurino F, Folla M, Gargiulo F, Moscato V, Picariello A, Sansone C (2014) One-class svm based approach for detecting anomalous audio events. In: 2014 International Conference on Intelligent Networking and Collaborative Systems. IEEE, pp 145\u2013151","DOI":"10.1109\/INCoS.2014.59"},{"issue":"9\u201310","key":"11817_CR10","doi-asserted-by":"crossref","first-page":"661","DOI":"10.1080\/08839514.2018.1430469","volume":"31","author":"E Babaee","year":"2017","unstructured":"Babaee E, Anuar NB, Abdul Wahab AW, Shamshirband S, Chronopoulos AT (2017) An overview of audio event detection methods from feature extraction to classification. Applied Artificial Intelligence 31(9\u201310):661\u2013714","journal-title":"Applied Artificial Intelligence"},{"key":"11817_CR11","doi-asserted-by":"crossref","unstructured":"Baumann J, Lohrenz T, Roy A, Fingscheidt T (2020) Beyond the dcase 2017 challenge on rare sound event detection: A proposal for a more realistic training and test framework. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 611\u2013615","DOI":"10.1109\/ICASSP40776.2020.9052950"},{"issue":"1","key":"11817_CR12","doi-asserted-by":"crossref","first-page":"e12564","DOI":"10.1111\/exsy.12564","volume":"38","author":"B Bayram","year":"2021","unstructured":"Bayram B, Duman TB, Ince G (2021) Real time detection of acoustic anomalies in industrial processes using sequential autoencoders. Expert Systems 38(1):e12564","journal-title":"Expert Systems"},{"key":"11817_CR13","doi-asserted-by":"crossref","unstructured":"Becker P, Roth C, Roennau A, Dillmann R (2020) Acoustic anomaly detection in additive manufacturing with long short-term memory neural networks. In: 2020 IEEE 7th International Conference on Industrial Engineering and Applications (ICIEA). IEEE, pp 921\u2013926","DOI":"10.1109\/ICIEA49774.2020.9102002"},{"issue":"3","key":"11817_CR14","doi-asserted-by":"crossref","first-page":"1727","DOI":"10.1121\/1.4790351","volume":"133","author":"E Benetos","year":"2013","unstructured":"Benetos E, Dixon S (2013) Multiple-instrument polyphonic music transcription using a temporally constrained shift-invariant model. The Journal of the Acoustical Society of America 133(3):1727\u20131741","journal-title":"The Journal of the Acoustical Society of America"},{"key":"11817_CR15","doi-asserted-by":"crossref","unstructured":"Borges N, Meyer GG (2008) Unsupervised distributional anomaly detection for a self-diagnostic speech activity detector. In: 2008 42nd Annual Conference on Information Sciences and Systems. IEEE, pp 950\u2013955","DOI":"10.1109\/CISS.2008.4558655"},{"key":"11817_CR16","unstructured":"Butko T (2011) Feature selection for multimodal: acoustic Event detection. Universitat Polit\u00e8cnica de Catalunya"},{"issue":"6","key":"11817_CR17","doi-asserted-by":"crossref","first-page":"1291","DOI":"10.1109\/TASLP.2017.2690575","volume":"25","author":"E Cak\u0131r","year":"2017","unstructured":"Cak\u0131r E, Parascandolo G, Heittola T, Huttunen H, Virtanen T (2017) Convolutional recurrent neural networks for polyphonic sound event detection. IEEE\/ACM Transactions on Audio, Speech, and Language Processing 25(6):1291\u20131303","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"11817_CR18","doi-asserted-by":"crossref","unstructured":"Chakrabarty D, Elhilali M (2016) Abnormal sound event detection using temporal trajectories mixtures. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 216\u2013220","DOI":"10.1109\/ICASSP.2016.7471668"},{"key":"11817_CR19","doi-asserted-by":"crossref","first-page":"103339","DOI":"10.1109\/ACCESS.2020.2999388","volume":"8","author":"T Chan","year":"2020","unstructured":"Chan T, Chin CS (2020) A comprehensive review of polyphonic sound event detection. IEEE Access 8:103339\u2013103373","journal-title":"IEEE Access"},{"key":"11817_CR20","first-page":"15","volume":"14","author":"V Chandola","year":"2007","unstructured":"Chandola V, Banerjee A, Kumar V (2007) Outlier detection: A survey. ACM Computing Surveys 14:15","journal-title":"ACM Computing Surveys"},{"issue":"3","key":"11817_CR21","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3322240","volume":"52","author":"S Chandrakala","year":"2019","unstructured":"Chandrakala S, Jayalakshmi S (2019) Environmental audio scene and sound event recognition for autonomous surveillance: A survey and comparative studies. ACM Computing Surveys (CSUR) 52(3):1\u201334","journal-title":"ACM Computing Surveys (CSUR)"},{"issue":"1","key":"11817_CR22","doi-asserted-by":"crossref","first-page":"16","DOI":"10.1016\/j.compeleceng.2013.11.024","volume":"40","author":"G Chandrashekar","year":"2014","unstructured":"Chandrashekar G, Sahin F (2014) A survey on feature selection methods. Computers & Electrical Engineering 40(1):16\u201328","journal-title":"Computers & Electrical Engineering"},{"key":"11817_CR23","unstructured":"Chen C, Chen P, Yang L, Mo J, Song H, Xie Y, Ma L (2020) Acoustic anomaly detection via latent regularized gaussian mixture generative adversarial networks. In: Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop (DCASE2020). Tokyo, Japan http:\/\/dcase.community\/challenge2020\/index. Preprint: arxiv: 2002.01107"},{"key":"11817_CR24","doi-asserted-by":"crossref","unstructured":"Chen Y, Jin H (2019) Rare sound event detection using deep learning and data augmentation. In: INTERSPEECH, pp 619\u2013623","DOI":"10.21437\/Interspeech.2019-1985"},{"key":"11817_CR25","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1016\/j.comcom.2021.01.005","volume":"168","author":"Z Chen","year":"2021","unstructured":"Chen Z, Chen Q, Zhang Y, Zhou L, Jiang J, Wu C, Huang Z (2021) Clustering-based feature subset selection with analysis on the redundancy-complementarity dimension. Computer Communications 168:65\u201374. https:\/\/doi.org\/10.1016\/j.comcom.2021.01.005","journal-title":"Computer Communications"},{"key":"11817_CR26","doi-asserted-by":"crossref","unstructured":"Colangelo F, Battisti F, Carli M, Neri A, Calabr\u00f3 F (2017) Enhancing audio surveillance with hierarchical recurrent neural networks. In: 2017 14th IEEE International Conference on Advanced Video and Signal Based Surveillance (AVSS). IEEE, pp 1\u20136","DOI":"10.1109\/AVSS.2017.8078496"},{"key":"11817_CR27","doi-asserted-by":"crossref","first-page":"372","DOI":"10.1016\/j.promfg.2020.05.059","volume":"48","author":"C Cooper","year":"2020","unstructured":"Cooper C, Zhang J, Gao RX, Wang P, Ragai I (2020) Anomaly detection in milling tools using acoustic signals and generative adversarial networks. Procedia Manufacturing 48:372\u2013378","journal-title":"Procedia Manufacturing"},{"key":"11817_CR28","doi-asserted-by":"crossref","unstructured":"Cotton CV, Ellis DP (2011) Spectral vs. spectro-temporal features for acoustic event detection. In: 2011 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA). IEEE, pp 69\u201372","DOI":"10.1109\/ASPAA.2011.6082331"},{"key":"11817_CR29","doi-asserted-by":"crossref","unstructured":"Criminisi A, Shotton J (2013) Semi-supervised classification forests. In: Decision Forests for Computer Vision and Medical Image Analysis. Springer, pp 95\u2013107","DOI":"10.1007\/978-1-4471-4929-3_8"},{"key":"11817_CR30","unstructured":"Dang A, Vu TH, Wang JC (2017) Deep learning for dcase2017 challenge. In: Workshop on DCASE2017 Challenge, Tech. Rep"},{"key":"11817_CR31","unstructured":"Dee HM, Hogg DC (2005) On the feasibility of using a cognitive model to filter surveillance data. In: IEEE Conference on Advanced Video and Signal Based Surveillance, 2005. IEEE, pp 34\u201339"},{"key":"11817_CR32","unstructured":"Dekkers G, Lauwereins S, Thoen B, Adhana MW, Brouckxon H, van Waterschoot T, Vanrumste B, Verhelst M, Karsmakers P (2017) The SINS database for detection of daily activities in a home environment using an acoustic sensor network. In: Proceedings of the Detection and Classication of Acoustic Scenes and Events 2017 Workshop (DCASE2017), pp 32\u201336"},{"key":"11817_CR33","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li LJ, Li K, Fei-Fei L (2009) Imagenet: A large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition. Ieee, pp 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"6","key":"11817_CR34","doi-asserted-by":"publisher","first-page":"1978","DOI":"10.1109\/TBME.2020.3045720","volume":"68","author":"T Dissanayake","year":"2021","unstructured":"Dissanayake T, Fernando T, Denman S, Ghaemmaghami H, Sridharan S, Fookes C (2021) Domain generalization in biosignal classification. IEEE Transactions on Biomedical Engineering 68(6):1978\u20131989. https:\/\/doi.org\/10.1109\/TBME.2020.3045720","journal-title":"IEEE Transactions on Biomedical Engineering"},{"key":"11817_CR35","doi-asserted-by":"crossref","unstructured":"Duman TB, Bayram B, \u0130nce G (2019) Acoustic anomaly detection using convolutional autoencoders in industrial processes. In: International Workshop on Soft Computing Models in Industrial and Environmental Applications. Springer, pp 432\u2013442","DOI":"10.1007\/978-3-030-20055-8_41"},{"key":"11817_CR36","doi-asserted-by":"crossref","first-page":"121","DOI":"10.1016\/j.patcog.2016.03.028","volume":"58","author":"SM Erfani","year":"2016","unstructured":"Erfani SM, Rajasegarar S, Karunasekera S, Leckie C (2016) High-dimensional and large-scale anomaly detection using a linear one-class svm with deep learning. Pattern Recognition 58:121\u2013134","journal-title":"Pattern Recognition"},{"issue":"1","key":"11817_CR37","doi-asserted-by":"crossref","first-page":"279","DOI":"10.1109\/TITS.2015.2470216","volume":"17","author":"P Foggia","year":"2015","unstructured":"Foggia P, Petkov N, Saggese A, Strisciuglio N, Vento M (2015) Audio surveillance of roads: A system for detecting anomalous sounds. IEEE transactions on intelligent transportation systems 17(1):279\u2013288","journal-title":"IEEE transactions on intelligent transportation systems"},{"key":"11817_CR38","unstructured":"Fonseca E, Pons J, Favory X, Font F, Bogdanov D, Ferraro A, Oramas S, Porter A, Serra X (2017) Freesound datasets: a platform for the creation of open audio datasets. In: Proceedings of the 18th International Society for Music Information Retrieval Conference (ISMIR 2017). Suzhou, China, pp 486\u2013493"},{"issue":"1","key":"11817_CR39","doi-asserted-by":"crossref","first-page":"49","DOI":"10.1145\/1882471.1882479","volume":"12","author":"G Forman","year":"2010","unstructured":"Forman G, Scholz M (2010) Apples-to-apples in cross-validation studies: pitfalls in classifier performance measurement. Acm Sigkdd Explorations Newsletter 12(1):49\u201357","journal-title":"Acm Sigkdd Explorations Newsletter"},{"key":"11817_CR40","doi-asserted-by":"crossref","unstructured":"Gemmeke JF, Ellis DP, Freedman D, Jansen A, Lawrence W, Moore RC, Plakal M, Ritter M (2017) Audio set: An ontology and human-labeled dataset for audio events. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 776\u2013780","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"11817_CR41","unstructured":"Giri R, Cheng F, Helwani K, Tenneti SV, Isik U, Krishnaswamy A (2020) Group masked autoencoder based density estimator for audio anomaly detection. In: Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop (DCASE2020).Tokyo, Japan, pp 51\u201355.\u00a0http:\/\/dcase.community\/documents\/workshop2020\/proceedings\/DCASE2020Workshop_Giri_66.pdf"},{"key":"11817_CR42","doi-asserted-by":"crossref","unstructured":"Hayashi T, Komatsu T, Kondo R, Toda T, Takeda K (2018) Anomalous sound event detection based on wavenet. In: 2018 26th European Signal Processing Conference (EUSIPCO). IEEE , pp 2494\u20132498","DOI":"10.23919\/EUSIPCO.2018.8553423"},{"key":"11817_CR43","doi-asserted-by":"crossref","unstructured":"Hayashi T, Watanabe S, Toda T, Hori T, Le Roux J, Takeda K (2017) Blstm-hmm hybrid system combined with sound activity detection network for polyphonic sound event detection. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 766\u2013770","DOI":"10.1109\/ICASSP.2017.7952259"},{"issue":"11","key":"11817_CR44","doi-asserted-by":"crossref","first-page":"2059","DOI":"10.1109\/TASLP.2017.2740002","volume":"25","author":"T Hayashi","year":"2017","unstructured":"Hayashi T, Watanabe S, Toda T, Hori T, Le Roux J, Takeda K (2017) Duration-controlled lstm for polyphonic sound event detection. IEEE\/ACM Transactions on Audio, Speech, and Language Processing 25(11):2059\u20132070","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"11817_CR45","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"11817_CR46","doi-asserted-by":"crossref","unstructured":"He KX, Zhang WQ, Liu J, Liu Y (2019) Dilated-gated convolutional neural network with a new loss function on sound event detection. In: 2019 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC). IEEE, pp 1491\u20131495","DOI":"10.1109\/APSIPAASC47483.2019.9023308"},{"issue":"1","key":"11817_CR47","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/1687-4722-2013-1","volume":"2013","author":"T Heittola","year":"2013","unstructured":"Heittola T, Mesaros A, Eronen A, Virtanen T (2013) Context-dependent sound event detection. EURASIP Journal on Audio, Speech, and Music Processing 2013(1):1\u201313","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"issue":"1","key":"11817_CR48","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1186\/1687-4722-2013-1","volume":"2013","author":"T Heittola","year":"2013","unstructured":"Heittola T, Mesaros A, Eronen A, Virtanen T (2013) Context-dependent sound event detection. EURASIP Journal on Audio, Speech, and Music Processing 2013(1):1","journal-title":"EURASIP Journal on Audio, Speech, and Music Processing"},{"key":"11817_CR49","unstructured":"Holmes A (2012) Hadoop in practice. Manning Publications Co"},{"key":"11817_CR50","unstructured":"Iandola FN, Han S, Moskewicz MW, Ashraf K, Dally WJ, Keutzer K (2017) Squeezenet: Alexnet-level accuracy with 50x fewer parameters and< 0.5 mb model size. In: 5th International Conference on Learning Representations, ICLR 2017, Toulon, France, April 24-26, 2017, Conference Track Proceedings"},{"key":"11817_CR51","doi-asserted-by":"crossref","unstructured":"Imoto K, Tonami N, Koizumi Y, Yasuda M, Yamanishi R, Yamashita Y (2020) Sound event detection by multitask learning of sound events and scenes with soft scene labels. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 621\u2013625","DOI":"10.1109\/ICASSP40776.2020.9053912"},{"key":"11817_CR52","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1016\/j.engappai.2019.05.011","volume":"84","author":"ZH Janjua","year":"2019","unstructured":"Janjua ZH, Vecchio M, Antonini M, Antonelli F (2019) Irese: An intelligent rare-event detection system using unsupervised learning on the iot edge. Engineering Applications of Artificial Intelligence 84:41\u201350","journal-title":"Engineering Applications of Artificial Intelligence"},{"key":"11817_CR53","doi-asserted-by":"crossref","unstructured":"Kao CC, Sun M, Wang W, Wang C (2020) A comparison of pooling methods on lstm models for rare acoustic event classication. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 316\u2013320","DOI":"10.1109\/ICASSP40776.2020.9053150"},{"key":"11817_CR54","first-page":"1358","volume":"2018","author":"CC Kao","year":"2018","unstructured":"Kao CC, Wang W, Sun M, Wang C (2018) R-crnn: Region-based convolutional recurrent neural network for audio event detection. Proc. Interspeech 2018:1358\u20131362","journal-title":"Proc. Interspeech"},{"key":"11817_CR55","doi-asserted-by":"crossref","unstructured":"Kawachi Y, Koizumi Y, Harada N (2018) Complementary set variational autoencoder for supervised anomaly detection. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 2366\u20132370","DOI":"10.1109\/ICASSP.2018.8462181"},{"key":"11817_CR56","doi-asserted-by":"crossref","unstructured":"Kawaguchi Y (2018) Anomaly detection based on feature reconstruction from subsampled audio signals. In: 2018 26th European Signal Processing Conference (EUSIPCO). IEEE, pp 2524\u20132528","DOI":"10.23919\/EUSIPCO.2018.8553480"},{"key":"11817_CR57","doi-asserted-by":"crossref","unstructured":"Kim HG, Moreau N, Sikora T (2006) MPEG-7 audio and beyond: Audio content indexing and retrieval. John Wiley & Sons","DOI":"10.1002\/0470093366"},{"key":"11817_CR58","doi-asserted-by":"crossref","unstructured":"Ko BJ, Ortiz J, Salonidis T, Touma M, Verma D, Wang S, Wang X, Wood D (2016) Demo abstract: acoustic signal processing for anomaly detection in machine room environments. In: Proc. of ACM BuildSys","DOI":"10.1145\/2993422.2996401"},{"key":"11817_CR59","unstructured":"Koizumi Y, Kawaguchi Y, Imoto K, Nakamura T, Nikaido Y, Tanabe R, Purohit H, Suefusa K, Endo T, Yasuda M, Harada N (2020) Description and discussion on dcase2020 challenge task2: Unsupervised anomalous sound detection for machine condition monitoring. In: Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop (DCASE2020). Tokyo, Japan. http:\/\/dcase.community\/challenge2020\/index. Preprint: arxiv: 2006.05822"},{"key":"11817_CR60","doi-asserted-by":"crossref","unstructured":"Koizumi Y, Murata S, Harada N, Saito S, Uematsu H (2019) Sniper: Few-shot learning for anomaly detection to minimize false-negative rate with ensured true-positive rate. In: ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 915\u2013919","DOI":"10.1109\/ICASSP.2019.8683667"},{"key":"11817_CR61","doi-asserted-by":"crossref","unstructured":"Koizumi Y, Saito S, Uematsu H, Harada N (2017) Optimizing acoustic feature extractor for anomalous sound detection based on neyman-pearson lemma. In: 2017 25th European Signal Processing Conference (EUSIPCO). IEEE, pp 698\u2013702","DOI":"10.23919\/EUSIPCO.2017.8081297"},{"key":"11817_CR62","doi-asserted-by":"crossref","unstructured":"Koizumi Y, Saito S, Uematsu H, Harada N, Imoto K (2019) Toyadmos: A dataset of miniature-machine operating sounds for anomalous sound detection. In: 2019 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA). IEEE, pp 313\u2013317","DOI":"10.1109\/WASPAA.2019.8937164"},{"key":"11817_CR63","doi-asserted-by":"crossref","unstructured":"Koizumi Y, Yasuda M, Murata S, Saito S, Uematsu H, Harada N (2020) Spidernet: Attention network for one-shot anomaly detection in sounds. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 281\u2013285","DOI":"10.1109\/ICASSP40776.2020.9053620"},{"issue":"3","key":"11817_CR64","first-page":"231","volume":"1","author":"HP Kriegel","year":"2011","unstructured":"Kriegel HP, Kr\u00f6ger P, Sander J, Zimek A (2011) Density-based clustering. Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery 1(3):231\u2013240","journal-title":"Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery"},{"issue":"6","key":"11817_CR65","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) Imagenet classification with deep convolutional neural networks. Communications of the ACM 60(6):84\u201390","journal-title":"Communications of the ACM"},{"key":"11817_CR66","doi-asserted-by":"crossref","unstructured":"Latif S, Rana R, Qadir J, Epps J (2018) Variational autoencoders for learning latent representations of speech emotion: a preliminary study. In: Interspeech 2018: Proceedings, pp. 3107\u20133111. International Speech Communication Association (ISCA)","DOI":"10.21437\/Interspeech.2018-1568"},{"issue":"4","key":"11817_CR67","doi-asserted-by":"crossref","first-page":"1640","DOI":"10.1109\/TITS.2011.2163154","volume":"12","author":"J Lee","year":"2011","unstructured":"Lee J, Rakotonirainy A (2011) Acoustic hazard detection for pedestrians with obscured hearing. IEEE Transactions on Intelligent Transportation Systems 12(4):1640\u20131649","journal-title":"IEEE Transactions on Intelligent Transportation Systems"},{"key":"11817_CR68","unstructured":"Li Y, Li X (2017) The seie-scut systems for ieee aasp challenge on dcase 2017: Deep learning techniques for audio representation and classication. In: Proc. Detection Classication Acoustic Scenes Events 2018 Workshop"},{"key":"11817_CR69","unstructured":"Lim H, Park J, Han Y (2017) Rare sound event detection using 1d convolutional recurrent neural networks. In: Proceedings of the Detection and Classification of Acoustic Scenes and Events 2017 Workshop, pp 80\u201384"},{"key":"11817_CR70","doi-asserted-by":"crossref","unstructured":"Lin L, Wang X, Liu H, Qian Y (2020) Guided learning for weakly-labeled semi-supervised sound event detection. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 626\u2013630","DOI":"10.1109\/ICASSP40776.2020.9053584"},{"issue":"12","key":"11817_CR71","doi-asserted-by":"crossref","first-page":"2181","DOI":"10.1088\/0967-3334\/37\/12\/2181","volume":"37","author":"C Liu","year":"2016","unstructured":"Liu C, Springer D, Li Q, Moody B, Juan RA, Chorro FJ, Castells F, Roig JM, Silva I, Johnson AE et al (2016) An open access database for the evaluation of heart sound algorithms. Physiological Measurement 37(12):2181","journal-title":"Physiological Measurement"},{"key":"11817_CR72","doi-asserted-by":"crossref","unstructured":"Liu Y, Tang J, Song Y, Dai L (2018) A capsule based approach for polyphonic sound event detection. In: 2018 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC). IEEE, pp 1853\u20131857","DOI":"10.23919\/APSIPA.2018.8659533"},{"key":"11817_CR73","doi-asserted-by":"crossref","unstructured":"Lu YC, Wu CW, Lu CT, Lerch A (2016) An unsupervised approach to anomaly detection in music datasets. In: Proceedings of the 39th International ACM SIGIR conference on Research and Development in Information Retrieval, pp 749\u2013752","DOI":"10.1145\/2911451.2914700"},{"key":"11817_CR74","unstructured":"Mandel M, Salamon J, Ellis DPW (2019) Proceedings of the Detection and Classification of Acoustic Scenes and Events 2019 Workshop (DCASE2019). New York University, NY, USA"},{"key":"11817_CR75","doi-asserted-by":"crossref","unstructured":"Marchi E, Vesperini F, Squartini S, Schuller B (2017) Deep recurrent neural network-based autoencoders for acoustic novelty detection. Computational intelligence and neuroscience 2017","DOI":"10.1155\/2017\/4694860"},{"issue":"6","key":"11817_CR76","doi-asserted-by":"crossref","first-page":"992","DOI":"10.1109\/TASLP.2019.2907016","volume":"27","author":"A Mesaros","year":"2019","unstructured":"Mesaros A, Diment A, Elizalde B, Heittola T, Vincent E, Raj B, Virtanen T (2019) Sound event detection in the dcase 2017 challenge. IEEE\/ACM Transactions on Audio, Speech, and Language Processing 27(6):992\u20131006","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"issue":"2","key":"11817_CR77","doi-asserted-by":"crossref","first-page":"379","DOI":"10.1109\/TASLP.2017.2778423","volume":"26","author":"A Mesaros","year":"2017","unstructured":"Mesaros A, Heittola T, Benetos E, Foster P, Lagrange M, Virtanen T, Plumbley MD (2017) Detection and classification of acoustic scenes and events: Outcome of the dcase 2016 challenge. IEEE\/ACM Transactions on Audio, Speech, and Language Processing 26(2):379\u2013393","journal-title":"IEEE\/ACM Transactions on Audio, Speech, and Language Processing"},{"key":"11817_CR78","unstructured":"Mesaros A, Heittola T, Klapuri A (2011) Latent semantic analysis in sound event detection. In: 2011 19th European Signal Processing Conference. IEEE, pp 1307\u20131311"},{"issue":"6","key":"11817_CR79","doi-asserted-by":"crossref","first-page":"162","DOI":"10.3390\/app6060162","volume":"6","author":"A Mesaros","year":"2016","unstructured":"Mesaros A, Heittola T, Virtanen T (2016) Metrics for polyphonic sound event detection. Applied Sciences 6(6):162","journal-title":"Applied Sciences"},{"key":"11817_CR80","doi-asserted-by":"crossref","unstructured":"Mesaros A, Heittola T, Virtanen T (2016) Tut database for acoustic scene classification and sound event detection. In: 2016 24th European Signal Processing Conference (EUSIPCO). IEEE, pp 1128\u20131132","DOI":"10.1109\/EUSIPCO.2016.7760424"},{"issue":"7","key":"11817_CR81","doi-asserted-by":"crossref","first-page":"1877","DOI":"10.1587\/transinf.2015EDP7457","volume":"99","author":"M Morise","year":"2016","unstructured":"Morise M, Yokomori F, Ozawa K (2016) World: a vocoder-based high-quality speech synthesis system for real-time applications. IEICE TRANSACTIONS on Information and Systems 99(7):1877\u20131884","journal-title":"IEICE TRANSACTIONS on Information and Systems"},{"key":"11817_CR82","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.dsp.2019.01.001","volume":"87","author":"M Mulimani","year":"2019","unstructured":"Mulimani M, Koolagudi SG (2019) Extraction of mapreduce-based features from spectrograms for audio-based surveillance. Digital Signal Processing 87:1\u20139","journal-title":"Digital Signal Processing"},{"key":"11817_CR83","doi-asserted-by":"crossref","unstructured":"M\u00fcller R, Ritz F, Illium S, Linnhoff-Popien C (2020) Acoustic anomaly detection for machine sounds based on image transfer learning. arXiv:2006.03429","DOI":"10.5220\/0010185800490056"},{"issue":"7","key":"11817_CR84","doi-asserted-by":"crossref","first-page":"075042","DOI":"10.1103\/PhysRevD.101.075042","volume":"101","author":"B Nachman","year":"2020","unstructured":"Nachman B, Shih D (2020) Anomaly detection with density estimation. Physical Review D 101(7):075042","journal-title":"Physical Review D"},{"key":"11817_CR85","unstructured":"Ng A, et al (2011) Sparse autoencoder. CS294A Lecture notes 72(2011), 1\u201319"},{"key":"11817_CR86","doi-asserted-by":"crossref","first-page":"69","DOI":"10.1016\/j.dsp.2014.05.003","volume":"31","author":"S Ntalampiras","year":"2014","unstructured":"Ntalampiras S (2014) Universal background modeling for acoustic surveillance of urban traffic. Digital Signal Processing 31:69\u201378","journal-title":"Digital Signal Processing"},{"issue":"4","key":"11817_CR87","doi-asserted-by":"crossref","first-page":"713","DOI":"10.1109\/TMM.2011.2122247","volume":"13","author":"S Ntalampiras","year":"2011","unstructured":"Ntalampiras S, Potamitis I, Fakotakis N (2011) Probabilistic novelty detection for acoustic surveillance under real-world conditions. IEEE Transactions on Multimedia 13(4):713\u2013719","journal-title":"IEEE Transactions on Multimedia"},{"key":"11817_CR88","unstructured":"Nunes, E.C.: Anomalous sound detection with machine learning: A systematic review. arXiv preprint arXiv:2102.07820(2021)"},{"issue":"5","key":"11817_CR89","doi-asserted-by":"crossref","first-page":"1308","DOI":"10.3390\/s18051308","volume":"18","author":"DY Oh","year":"2018","unstructured":"Oh DY, Yun ID (2018) Residual error based anomaly detection using auto-encoder in smd machine sound. Sensors 18(5):1308","journal-title":"Sensors"},{"key":"11817_CR90","doi-asserted-by":"crossref","unstructured":"Omar MK, Chaudhari U, Ramaswamy G (2005) Blind change detection for audio segmentation. In: Proceedings.(ICASSP\u201905). IEEE International Conference on Acoustics, Speech, and Signal Processing, 2005., vol. 1. IEEE, pp I\u2013501","DOI":"10.1109\/ICASSP.2005.1415160"},{"key":"11817_CR91","doi-asserted-by":"crossref","unstructured":"Ono Y, Onishi Y, Koshinaka T, Takata S, Hoshuyama O (2013) Anomaly detection of motors with feature emphasis using only normal sounds. In: 2013 IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE, pp 2800\u20132804","DOI":"10.1109\/ICASSP.2013.6638167"},{"key":"11817_CR92","unstructured":"Van den Oord A, Dieleman S, Zen H, Simonyan K, Vinyals O, Graves A, Kalchbrenner N, Senior A, Kavukcuoglu K (2016) Wavenet: A generative model for raw audio. In: 9th ISCA Speech Synthesis Workshop, pp 125\u2013125"},{"key":"11817_CR93","unstructured":"Oord A, Li Y, Babuschkin I, Simonyan K, Vinyals O, Kavukcuoglu K, Driessche G, Lockhart E, Cobo L, Stimberg F, et al (2018) Parallel wavenet: Fast high-fidelity speech synthesis. In: International conference on machine learning. PMLR, pp 3918\u20133926"},{"issue":"4","key":"11817_CR94","doi-asserted-by":"crossref","first-page":"1138","DOI":"10.1109\/JBHI.2013.2294399","volume":"18","author":"CD Papadaniil","year":"2013","unstructured":"Papadaniil CD, Hadjileontiadis LJ (2013) Efficient heart sound segmentation and extraction using ensemble empirical mode decomposition and kurtosis features. IEEE journal of biomedical and health informatics 18(4):1138\u20131152","journal-title":"IEEE journal of biomedical and health informatics"},{"key":"11817_CR95","doi-asserted-by":"crossref","unstructured":"Parascandolo G, Huttunen H, Virtanen T (2016) Recurrent neural networks for polyphonic sound event detection in real life recordings. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 6440\u20136444","DOI":"10.1109\/ICASSP.2016.7472917"},{"issue":"3","key":"11817_CR96","doi-asserted-by":"crossref","first-page":"611","DOI":"10.1007\/s10514-018-9733-6","volume":"43","author":"D Park","year":"2019","unstructured":"Park D, Kim H, Kemp CC (2019) Multimodal anomaly detection for assistive robots. Autonomous Robots 43(3):611\u2013629","journal-title":"Autonomous Robots"},{"key":"11817_CR97","unstructured":"Perez-Castanos S, Naranjo-Alcazar J, Zuccarello P, Cobos M (2020) Anomalous sound detection using unsupervised and semi-supervised autoencoders and gammatone audio representation. In: Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop (DCASE2020). Tokyo, Japan http:\/\/dcase.community\/challenge2020\/index. Preprint: arxiv: 2006.15321"},{"key":"11817_CR98","doi-asserted-by":"crossref","unstructured":"Petitjean F, Forestier G, Webb GI, Nicholson AE, Chen Y, Keogh E (2014) Dynamic time warping averaging of time series allows faster and more accurate classification. In: 2014 IEEE international conference on data mining. IEEE, pp 470\u2013479","DOI":"10.1109\/ICDM.2014.27"},{"key":"11817_CR99","doi-asserted-by":"crossref","unstructured":"Pham LD, Phan H, Palaniappan R, Mertins A, McLoughlin I (2021) Cnn-moe based framework for classification of respiratory anomalies and lung disease detection. IEEE Journal of Biomedical and Health Informatics","DOI":"10.1109\/JBHI.2021.3064237"},{"key":"11817_CR100","doi-asserted-by":"crossref","unstructured":"Phan H, Ch\u00e9n OY, Koch P, Pham L, McLoughlin I, Mertins A, De Vos M (2019) Unifying isolated and overlapping audio event detection with multi-label multi-task convolutional recurrent neural networks. In: ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE , pp 51\u201355","DOI":"10.1109\/ICASSP.2019.8683064"},{"key":"11817_CR101","doi-asserted-by":"crossref","unstructured":"Phan H, Krawczyk-Becker M, Gerkmann T, Mertins A (2017) Dnn and cnn with weighted and multi-task loss functions for audio event detection. In: Proc. DCASE 2017-Workshop Detect. Classification Acoust. Scenes Events","DOI":"10.1109\/ICASSP.2018.8461353"},{"key":"11817_CR102","doi-asserted-by":"crossref","unstructured":"Phan H.,Krawczyk-Becker M, Gerkmann T, Mertins A (2018) Weighted and multi-task loss for rare audio event detection. In: 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 336\u2013340","DOI":"10.1109\/ICASSP.2018.8461353"},{"key":"11817_CR103","doi-asserted-by":"crossref","unstructured":"Plinge A, Grzeszick R, Fink GA (2014) A bag-of-features approach to acoustic event detection. In: 2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 3704\u20133708","DOI":"10.1109\/ICASSP.2014.6854293"},{"key":"11817_CR104","unstructured":"Plumbley MD, Kroos C, Bello JP, Richard G, Ellis DP, Mesaros A (2018) Proceedings of the Detection and Classification of Acoustic Scenes and Events 2018 Workshop (DCASE2018). Tampere University of Technology. Laboratory of Signal Processing"},{"key":"11817_CR105","doi-asserted-by":"crossref","unstructured":"Prego TDM, de Lima AA, Netto SL, da Silva EA (2016) Audio anomaly detection on rotating machinery using image signal processing. In: 2016 IEEE 7th Latin American Symposium on Circuits & Systems (LASCAS). IEEE, pp 207\u2013210","DOI":"10.1109\/LASCAS.2016.7451046"},{"key":"11817_CR106","unstructured":"Purohit H, Tanabe R, Endo T, Suefusa K, Nikaido Y, Kawaguchi Y (2020) Deep autoencoding gmm-based unsupervised anomaly detection in acoustic signals and its hyper-parameter optimization. In: Proceedings of the Detection and Classification of Acoustic Scenes and Events 2020 Workshop (DCASE2020). Tokyo, Japan. http:\/\/dcase.community\/challenge2020\/index. Preprint: arxiv: 2009.12042"},{"key":"11817_CR107","doi-asserted-by":"publisher","unstructured":"Purohit H, Tanabe R, Ichige K, Endo T, Nikaido Y, Suefusa K, Kawaguchi Y (2019) Mimii dataset: Sound dataset for malfunctioning industrial machine investigation and inspection. In: Proceedings of the Detection and Classification of Acoustic Scenes and Events 2019 Workshop (DCASE2019), pp 209\u2013213. Tokyo, Japan. https:\/\/doi.org\/10.33682\/m76f-d61","DOI":"10.33682\/m76f-d61"},{"issue":"2","key":"11817_CR108","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/5.18626","volume":"77","author":"LR Rabiner","year":"1989","unstructured":"Rabiner LR (1989) A tutorial on hidden markov models and selected applications in speech recognition. Proceedings of the IEEE 77(2):257\u2013286. https:\/\/doi.org\/10.1109\/5.18626","journal-title":"Proceedings of the IEEE"},{"key":"11817_CR109","doi-asserted-by":"crossref","unstructured":"Rachburee N, Punlumjeak W (2015) A comparison of feature selection approach between greedy, ig-ratio, chi-square, and mrmr in educational mining. In: 2015 7th International Conference on Information Technology and Electrical Engineering (ICITEE). IEEE , pp 420\u2013424","DOI":"10.1109\/ICITEED.2015.7408983"},{"issue":"1\u20133","key":"11817_CR110","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1006\/dspr.1999.0361","volume":"10","author":"DA Reynolds","year":"2000","unstructured":"Reynolds DA, Quatieri TF, Dunn RB (2000) Speaker verification using adapted gaussian mixture models. Digital signal processing 10(1\u20133):19\u201341","journal-title":"Digital signal processing"},{"issue":"4","key":"11817_CR111","doi-asserted-by":"crossref","first-page":"504","DOI":"10.1109\/TSA.2005.848882","volume":"13","author":"G Riccardi","year":"2005","unstructured":"Riccardi G, Hakkani-Tur D (2005) Active learning: Theory and applications to automatic speech recognition. IEEE transactions on speech and audio processing 13(4):504\u2013511","journal-title":"IEEE transactions on speech and audio processing"},{"key":"11817_CR112","doi-asserted-by":"crossref","unstructured":"Rocha B, Filos D, Mendes L, Vogiatzis I, Perantoni E, Kaimakamis E, Natsiavas P, Oliveira A, J\u00e1come C, Marques A, et al (2017) A respiratory sound database for the development of automated classification. In: International Conference on Biomedical and Health Informatics. Springer, pp 33\u201337","DOI":"10.1007\/978-981-10-7419-6_6"},{"key":"11817_CR113","doi-asserted-by":"crossref","unstructured":"Rossi A, Montefoschi F, Rizzo A, Diligenti M, Festucci C (2017) Auto-associative recurrent neural networks and long term dependencies in novelty detection for audio surveillance applications. In: IOP Conference Series: Materials Science and Engineering","DOI":"10.1088\/1757-899X\/261\/1\/012009"},{"key":"11817_CR114","doi-asserted-by":"crossref","unstructured":"Rovetta S, Mnasri Z, Masulli F (2020) Detection of hazardous road events from audio streams: An ensemble outlier detection approach. In: 2020 IEEE Conference on Evolving and Adaptive Intelligent Systems (EAIS). IEEE, pp 1\u20136","DOI":"10.1109\/EAIS48028.2020.9122704"},{"key":"11817_CR115","doi-asserted-by":"crossref","unstructured":"Rovetta S, Mnasri Z, Masulli F, Cabri A (2021) Audio surveillance of road traffic: An approahc based on interval comparison and type 2 fuzzy sets. In: The 12th Conference of the European Society for Fuzzy Logic and Technology. EUSFLAT","DOI":"10.2991\/asum.k.210827.059"},{"key":"11817_CR116","doi-asserted-by":"crossref","unstructured":"Rushe E, Mac Namee B (2019) Anomaly detection in raw audio using deep autoregressive networks. In: ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE , pp 3597\u20133601","DOI":"10.1109\/ICASSP.2019.8683414"},{"key":"11817_CR117","doi-asserted-by":"crossref","unstructured":"Salamon J, Jacoby C, Bello JP (2014) A dataset and taxonomy for urban sound research. In: Proceedings of the 22nd ACM international conference on Multimedia, pp 1041\u20131044","DOI":"10.1145\/2647868.2655045"},{"key":"11817_CR118","doi-asserted-by":"crossref","unstructured":"Sammarco M, Detyniecki M (2018) Crashzam: Sound-based car crash detection. In: VEHITS, pp 27\u201335","DOI":"10.5220\/0006629200270035"},{"issue":"4","key":"11817_CR119","doi-asserted-by":"crossref","first-page":"513","DOI":"10.1088\/0967-3334\/31\/4\/004","volume":"31","author":"SE Schmidt","year":"2010","unstructured":"Schmidt SE, Holst-Hansen C, Graff C, Toft E, Struijk JJ (2010) Segmentation of heart sound recordings by a duration-dependent hidden markov model. Physiological measurement 31(4):513","journal-title":"Physiological measurement"},{"key":"11817_CR120","first-page":"582","volume":"12","author":"B Sch\u00f6lkopf","year":"1999","unstructured":"Sch\u00f6lkopf B, Williamson RC, Smola A, Shawe-Taylor J, Platt J (1999) Support vector method for novelty detection. Advances in neural information processing systems 12:582\u2013588","journal-title":"Advances in neural information processing systems"},{"key":"11817_CR121","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511809682","author":"J Shawe-Taylor","year":"2004","unstructured":"Shawe-Taylor J, Cristianini N (2004) Kernel Methods for Pattern Analysis. Cambridge University Press. https:\/\/doi.org\/10.1017\/CBO9780511809682","journal-title":"Cambridge University Press"},{"key":"11817_CR122","doi-asserted-by":"crossref","unstructured":"Shimada K, Koyama Y, Inoue A (2020) Metric learning with background noise class for few-shot detection of rare sound events. In: ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE , pp 616\u2013620","DOI":"10.1109\/ICASSP40776.2020.9054712"},{"key":"11817_CR123","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. In: 3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7-9, 2015, Workshop Track Proceedings"},{"issue":"6","key":"11817_CR124","doi-asserted-by":"crossref","first-page":"1257","DOI":"10.1109\/TSMCC.2012.2215319","volume":"42","author":"AA Sodemann","year":"2012","unstructured":"Sodemann AA, Ross MP, Borghetti BJ (2012) A review of anomaly detection in automated surveillance. IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews) 42(6):1257\u20131272","journal-title":"IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews)"},{"issue":"3","key":"11817_CR125","doi-asserted-by":"crossref","first-page":"185","DOI":"10.1121\/1.1915893","volume":"8","author":"SS Stevens","year":"1937","unstructured":"Stevens SS, Volkmann J, Newman EB (1937) A scale for the measurement of the psychological magnitude pitch. The Journal of the Acoustical Society of America 8(3):185\u2013190","journal-title":"The Journal of the Acoustical Society of America"},{"issue":"10","key":"11817_CR126","doi-asserted-by":"crossref","first-page":"1733","DOI":"10.1109\/TMM.2015.2428998","volume":"17","author":"D Stowell","year":"2015","unstructured":"Stowell D, Giannoulis D, Benetos E, Lagrange M, Plumbley MD (2015) Detection and classification of acoustic scenes and events. IEEE Transactions on Multimedia 17(10):1733\u20131746","journal-title":"IEEE Transactions on Multimedia"},{"issue":"1","key":"11817_CR127","first-page":"2213","volume":"14","author":"D Stowell","year":"2013","unstructured":"Stowell D, Plumbley MD (2013) Segregating event streams and noise with a markov renewal process model. The Journal of Machine Learning Research 14(1):2213\u20132238","journal-title":"The Journal of Machine Learning Research"},{"key":"11817_CR128","doi-asserted-by":"crossref","unstructured":"Su TW, Liu JY, Yang YH (2017) Weakly-supervised audio event detection using event-specific gaussian filters and fully convolutional networks. In: 2017 IEEE international conference on acoustics, speech and signal processing (ICASSP). IEEE , pp 791\u2013795","DOI":"10.1109\/ICASSP.2017.7952264"},{"issue":"4","key":"11817_CR129","doi-asserted-by":"crossref","first-page":"651","DOI":"10.1109\/TBME.2006.889189","volume":"54","author":"Z Syed","year":"2007","unstructured":"Syed Z, Leeds D, Curtis D, Nesta F, Levine RA, Guttag J (2007) A framework for the analysis of acoustical cardiac signals. IEEE Transactions on Biomedical Engineering 54(4):651\u2013662","journal-title":"IEEE Transactions on Biomedical Engineering"},{"issue":"5","key":"11817_CR130","doi-asserted-by":"crossref","first-page":"1557","DOI":"10.1109\/TASL.2006.878256","volume":"14","author":"SE Tranter","year":"2006","unstructured":"Tranter SE, Reynolds DA (2006) An overview of automatic speaker diarization systems. IEEE Transactions on audio, speech, and language processing 14(5):1557\u20131565","journal-title":"IEEE Transactions on audio, speech, and language processing"},{"key":"11817_CR131","doi-asserted-by":"crossref","unstructured":"Turpault, N., Serizel, R., Parag Shah, A., Salamon, J.: Sound event detection in domestic environments with weakly labeled data and soundscape synthesis (2019). Preprint: https:\/\/hal.inria.fr\/hal-02160855","DOI":"10.33682\/006b-jx26"},{"key":"11817_CR132","doi-asserted-by":"crossref","unstructured":"Uematsu H, Koizumi Y, Saito S, Nakagawa A, Harada N (2017) Anomaly detection technique in sound to detect faulty equipment. NTT Technical Review 15(8)","DOI":"10.53829\/ntr201708fa5"},{"key":"11817_CR133","doi-asserted-by":"crossref","unstructured":"Valenzise G, Gerosa L, Tagliasacchi M, Antonacci F, Sarti A (2007) Scream and gunshot detection and localization for audio-surveillance systems. In: 2007 IEEE Conference on Advanced Video and Signal Based Surveillance. IEEE, pp 21\u201326","DOI":"10.1109\/AVSS.2007.4425280"},{"issue":"1\u20132","key":"11817_CR134","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1007\/s13748-015-0063-z","volume":"4","author":"RM Vallim","year":"2015","unstructured":"Vallim RM, de Mello RF (2015) Unsupervised change detection in data streams: an application in music analysis. Progress in Artificial Intelligence 4(1\u20132):1\u201310","journal-title":"Progress in Artificial Intelligence"},{"key":"11817_CR135","doi-asserted-by":"crossref","unstructured":"Vesperini F, Droghini D, Ferretti D, Principi E, Gabrielli L, Squartini S, Piazza F (2017) A hierarchic multi-scaled approach for rare sound event detection. In: Proc. DCASE 2017-Workshop Detect. Classification Acoust. Scenes Events","DOI":"10.23919\/EUSIPCO.2018.8553089"},{"key":"11817_CR136","doi-asserted-by":"crossref","unstructured":"Vincent E, Barker J, Watanabe S, Le Roux J, Nesta F, Matassoni M (2013) The second \u2018chime\u2019speech separation and recognition challenge: An overview of challenge systems and outcomes. In: 2013 IEEE Workshop on Automatic Speech Recognition and Understanding. IEEE , pp 162\u2013167","DOI":"10.1109\/ASRU.2013.6707723"},{"key":"11817_CR137","first-page":"3371","volume":"11","author":"P Vincent","year":"2010","unstructured":"Vincent P, Larochelle H, Lajoie I, Bengio Y, Manzagol PA (2010) Stacked denoising autoencoders: Learning useful representations in a deep network with a local denoising criterion. Journal of Machine Learning Research 11:3371\u20133408","journal-title":"Journal of Machine Learning Research"},{"key":"11817_CR138","unstructured":"Virtanen T, Mesaros A, Heittola T, Diment A, Vincent E, Benetos E, Elizalde BM (2017)Proceedings of the Detection and Classification of Acoustic Scenes and Events 2017 Workshop (DCASE2017). Tampere University of Technology. Laboratory of Signal Processing"},{"key":"11817_CR139","unstructured":"Virtanen T, Mesaros A, Heittola T, Plumbley M, Foster P, Benetos E, Lagrange M (2016)Proceedings of the Detection and Classification of Acoustic Scenes and Events 2016 Workshop (DCASE2016). Tampere University of Technology. Department of Signal Processing"},{"key":"11817_CR140","unstructured":"WEI, Q., LIU, Y.: Auto-encoder and metric-learning for anomalous sound detection task(2020). http:\/\/dcase.community\/challenge2020\/index. Preprint: http:\/\/dcase.community\/documents\/challenge2020\/technical_reports\/DCASE2020_Wei_49_t2.pdf"},{"issue":"3","key":"11817_CR141","doi-asserted-by":"crossref","first-page":"569","DOI":"10.1109\/TMM.2019.2933330","volume":"22","author":"X Xia","year":"2019","unstructured":"Xia X, Togneri R, Sohel F, Zhao Y, Huang D (2019) Multi-task learning for acoustic event detection using event and frame position information. IEEE Transactions on Multimedia 22(3):569\u2013578","journal-title":"IEEE Transactions on Multimedia"},{"issue":"8","key":"11817_CR142","doi-asserted-by":"crossref","first-page":"3433","DOI":"10.1007\/s00034-019-01094-1","volume":"38","author":"X Xia","year":"2019","unstructured":"Xia X, Togneri R, Sohel F, Zhao Y, Huang D (2019) A survey: neural network-based deep learning for acoustic event detection. Circuits, Systems, and Signal Processing 38(8):3433\u20133453","journal-title":"Circuits, Systems, and Signal Processing"},{"issue":"1","key":"11817_CR143","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1016\/j.cviu.2007.06.004","volume":"111","author":"T Xiang","year":"2008","unstructured":"Xiang T, Gong S (2008) Incremental and adaptive abnormal behaviour detection. Computer Vision and Image Understanding 111(1):59\u201373","journal-title":"Computer Vision and Image Understanding"},{"key":"11817_CR144","doi-asserted-by":"crossref","unstructured":"Yamaguchi M, Koizumi Y, Harada N (2019) Adaflow: Domain-adaptive density estimator with application to anomaly detection and unpaired cross-domain translation. In: ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE , pp 3647\u20133651","DOI":"10.1109\/ICASSP.2019.8683072"},{"key":"11817_CR145","doi-asserted-by":"crossref","first-page":"317","DOI":"10.2197\/ipsjjip.25.317","volume":"25","author":"Y Yamato","year":"2017","unstructured":"Yamato Y, Fukumoto Y, Kumazaki H (2017) Predictive maintenance platform with sound stream analysis in edges. Journal of Information processing 25:317\u2013320","journal-title":"Journal of Information processing"},{"key":"11817_CR146","doi-asserted-by":"crossref","unstructured":"Yan J, Song Y, Guo W, Dai LR, McLoughlin I, Chen L (2019) A region based attention method for weakly supervised sound event detection and classication. In: ICASSP 2019-2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 755\u2013759","DOI":"10.1109\/ICASSP.2019.8682376"},{"key":"11817_CR147","doi-asserted-by":"crossref","unstructured":"Ye J, Kobayashi T, Higuchi T (2012) Smart audio sensor on anomaly respiration detection using flac features. In: 2012 IEEE Sensors Applications Symposium Proceedings. IEEE, pp 1\u20135","DOI":"10.1109\/SAS.2012.6166323"},{"key":"11817_CR148","doi-asserted-by":"crossref","unstructured":"Zabihi M, Rad AB, Kiranyaz S, Gabbouj M, Katsaggelos AK (2016) Heart sound anomaly and quality detection using ensemble of neural networks without segmentation. In: 2016 Computing in Cardiology Conference (CinC). IEEE , pp 613\u2013616","DOI":"10.22489\/CinC.2016.180-213"},{"issue":"1","key":"11817_CR149","doi-asserted-by":"publisher","first-page":"235","DOI":"10.1016\/j.ejor.2020.09.028","volume":"290","author":"Y Zhang","year":"2021","unstructured":"Zhang Y, Zhu R, Chen Z, Gao J, Xia D (2021) Evaluating and selecting features via information theoretic lower bounds of feature inner correlations for high-dimensional data. European Journal of Operational Research 290(1):235\u2013247. https:\/\/doi.org\/10.1016\/j.ejor.2020.09.028","journal-title":"European Journal of Operational Research"},{"key":"11817_CR150","doi-asserted-by":"crossref","unstructured":"Zhang Z, Schuller B (2012) Semi-supervised learning helps in sound event classification. In: 2012 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 333\u2013336","DOI":"10.1109\/ICASSP.2012.6287884"},{"issue":"12","key":"11817_CR151","doi-asserted-by":"crossref","first-page":"1543","DOI":"10.1016\/j.patrec.2010.02.005","volume":"31","author":"X Zhuang","year":"2010","unstructured":"Zhuang X, Zhou X, Hasegawa-Johnson MA, Huang TS (2010) Real-world acoustic event detection. Pattern Recognition Letters 31(12):1543\u20131551","journal-title":"Pattern Recognition Letters"},{"key":"11817_CR152","doi-asserted-by":"crossref","unstructured":"Zhuang X, Zhou X, Huang TS, Hasegawa-Johnson M (2008) Feature analysis and selection for acoustic event detection. In: 2008 IEEE International Conference on Acoustics, Speech and Signal Processing. IEEE, pp 17\u201320","DOI":"10.1109\/ICASSP.2008.4517535"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11817-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-021-11817-9\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-021-11817-9.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,14]],"date-time":"2023-11-14T22:15:50Z","timestamp":1700000150000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-021-11817-9"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,12,27]]},"references-count":152,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["11817"],"URL":"https:\/\/doi.org\/10.1007\/s11042-021-11817-9","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,12,27]]},"assertion":[{"value":"26 April 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 August 2021","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 December 2021","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 December 2021","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}