{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T22:34:44Z","timestamp":1765233284061},"reference-count":41,"publisher":"Springer Science and Business Media LLC","issue":"21","license":[{"start":{"date-parts":[[2022,4,7]],"date-time":"2022-04-07T00:00:00Z","timestamp":1649289600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,4,7]],"date-time":"2022-04-07T00:00:00Z","timestamp":1649289600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2022,9]]},"DOI":"10.1007\/s11042-022-12873-5","type":"journal-article","created":{"date-parts":[[2022,4,7]],"date-time":"2022-04-07T18:03:51Z","timestamp":1649354631000},"page":"30911-30930","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Hand-crafted versus learned representations for audio event detection"],"prefix":"10.1007","volume":"81","author":[{"given":"Selver Ezgi","family":"K\u00fc\u00e7\u00fckbay","sequence":"first","affiliation":[]},{"given":"Adnan","family":"Yaz\u0131c\u0131","sequence":"additional","affiliation":[]},{"given":"Sinan","family":"Kalkan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,4,7]]},"reference":[{"key":"12873_CR1","doi-asserted-by":"crossref","unstructured":"Aytar Y, Vondrick C, Torralba A (2016) Soundnet: Learning sound representations from unlabeled video. In: Advances in neural information processing systems","DOI":"10.1109\/CVPR.2016.18"},{"key":"12873_CR2","unstructured":"Becker S, Ackermann M, Lapuschkin S, M\u00fcller K-R, Samek W (2018) Interpreting and explaining deep neural networks for classification of audio signals. CoRR"},{"key":"12873_CR3","doi-asserted-by":"crossref","unstructured":"\u00c7akir E, Parascandolo G, Heittola T, Huttunen H, Virtanen T (2017) Convolutional recurrent neural networks for polyphonic sound event detection. CoRR","DOI":"10.1109\/ICASSP.2016.7472917"},{"key":"12873_CR4","doi-asserted-by":"crossref","unstructured":"\u00c7akir E, Virtanen T (2018) End-to-end polyphonic sound event detection using convolutional recurrent neural networks with learned time-frequency representation input. CoRR","DOI":"10.1109\/IJCNN.2018.8489470"},{"key":"12873_CR5","doi-asserted-by":"crossref","unstructured":"Dai W, Dai C, Qu S, Li J, Das S (2016) Very deep convolutional neural networks for raw waveforms","DOI":"10.1109\/ICASSP.2017.7952190"},{"key":"12873_CR6","unstructured":"Dinkel H, Qian Y, Yu K P (2018) A hybrid asr model approach on weakly labeled scene classification"},{"key":"12873_CR7","doi-asserted-by":"crossref","unstructured":"Eutizi C, Benedetto F (2021) On the performance improvements of deep learning methods for audio event detection and classification. In: 2021 44th International Conference on Telecommunications and Signal Processing (TSP), pp 141\u2013145","DOI":"10.1109\/TSP52935.2021.9522625"},{"key":"12873_CR8","doi-asserted-by":"crossref","unstructured":"Fonseca E, Ortego D, McGuinness K, O\u2019Connor N E, Serra X (2020) Unsupervised contrastive learning of sound event representations","DOI":"10.1109\/ICASSP39728.2021.9415009"},{"key":"12873_CR9","doi-asserted-by":"crossref","unstructured":"Gemmeke J F, Ellis D P W, Freedman D, Jansen A, Lawrence W, Moore R C, Plakal M, Ritter M (2017) Audio set: An ontology and human-labeled dataset for audio events. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp 776\u2013780","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"12873_CR10","doi-asserted-by":"crossref","unstructured":"Giannakopoulos T, Spyrou E, Perantonis S (2019) Recognition of urban sound events using deep context-aware feature extractors and handcrafted features. Int. Conf. on Artificial Intelligence Applications and Innovations","DOI":"10.20944\/preprints201811.0509.v1"},{"key":"12873_CR11","doi-asserted-by":"crossref","unstructured":"Hershey S, Chaudhuri S, Ellis D P W, Gemmeke J F, Jansen A, Moore R C, Plakal M, Platt D, Saurous R A, Seybold B, Slaney M, Weiss R J, Wilson K W (2016) CNN architectures for large-scale audio classification. CoRR","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"12873_CR12","unstructured":"Kayser M, Zhong V (2015) Denoising convolutional autoencoders for noisy speech recognition. Technical Report, CS231 Standford Reports"},{"key":"12873_CR13","doi-asserted-by":"crossref","unstructured":"Kong Q, Cao Y, Iqbal T, Wang Y, Wang W, Plumbley M D (2020) Panns: Large-scale pretrained audio neural networks for audio pattern recognition","DOI":"10.1109\/TASLP.2020.3030497"},{"key":"12873_CR14","unstructured":"Kothinti S, Sell G, Watanabe S, Elhilali M (2019) Integrated bottom-up and top-down inference for sound event detection. Technical Report, Department of Electrical and Computer Engineering. Johns Hopkins University, Baltimore"},{"key":"12873_CR15","first-page":"1097","volume":"25","author":"A Krizhevsky","year":"2012","unstructured":"Krizhevsky A, Sutskever I, Hinton G E (2012) Imagenet classification with deep convolutional neural networks. Adv Neural Inf Process Syst 25:1097\u20131105","journal-title":"Adv Neural Inf Process Syst"},{"key":"12873_CR16","doi-asserted-by":"publisher","first-page":"4911","DOI":"10.3390\/app10144911","volume":"10","author":"J-Y Kwak","year":"2020","unstructured":"Kwak J-Y, Chung Y-J (2020) Sound event detection using derivative features in deep neural networks. Appl Sci 10:4911. https:\/\/doi.org\/10.3390\/app10144911","journal-title":"Appl Sci"},{"key":"12873_CR17","unstructured":"Lee J, Kim T, Park J, Nam J (2017) Raw waveform-based audio classification using sample-level cnn architectures. 31st Conf. on Neural Information Processing Systems (NIPS)"},{"issue":"2","key":"12873_CR18","doi-asserted-by":"publisher","first-page":"270","DOI":"10.1016\/j.dsp.2010.07.003","volume":"21","author":"S Lef\u00e8vre","year":"2011","unstructured":"Lef\u00e8vre S, Vincent N (2011) A two level strategy for audio segmentation. Digit Signal Process 21(2):270\u2013277","journal-title":"Digit Signal Process"},{"key":"12873_CR19","doi-asserted-by":"crossref","unstructured":"Li J, Dai W, Metze F, Qu S, Das S (2017) A comparison of deep learning methods for environmental sound. CoRR","DOI":"10.1109\/ICASSP.2017.7952131"},{"issue":"5","key":"12873_CR20","doi-asserted-by":"publisher","first-page":"1067","DOI":"10.1016\/j.jss.2011.12.019","volume":"85","author":"H Liu","year":"2012","unstructured":"Liu H, Zhang S (2012) Noisy data elimination using mutual k-nearest neighbor for classification mining. J Syst Softw 85(5):1067\u20131074","journal-title":"J Syst Softw"},{"key":"12873_CR21","doi-asserted-by":"crossref","unstructured":"Maas A, Le Q, neil T, Vinyals O, Nguyen P, Ng A (2012) Recurrent neural networks for noise reduction in robust asr. 13th Annual Conference of the International Speech Communication Association 2012, INTERSPEECH 2012 1","DOI":"10.21437\/Interspeech.2012-6"},{"key":"12873_CR22","doi-asserted-by":"crossref","unstructured":"Maria A, Jeyaseelan A S (2021) Development of optimal feature selection and deep learning toward hungry stomach detection using audio signals. J Control Autom Electr Syst 32","DOI":"10.1007\/s40313-021-00727-8"},{"key":"12873_CR23","doi-asserted-by":"crossref","unstructured":"Mesaros A, Heittola T, Virtanen T (2016) Tut database for acoustic scene classification and sound event detection. In: 2016 24th European Signal Processing Conference (EUSIPCO), pp 1128\u20131132","DOI":"10.1109\/EUSIPCO.2016.7760424"},{"issue":"5","key":"12873_CR24","doi-asserted-by":"publisher","first-page":"670","DOI":"10.1109\/MSP.2021.3090678","volume":"38","author":"A Mesaros","year":"2021","unstructured":"Mesaros A, Heittola T, Virtanen T, Plumbley M D (2021) Sound event detection: a tutorial. IEEE Signal Proc Mag 38(5):670\u201383. https:\/\/doi.org\/10.1109\/msp.2021.3090678","journal-title":"IEEE Signal Proc Mag"},{"key":"12873_CR25","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.bspc.2014.02.001","volume":"11","author":"G Muhammad","year":"2014","unstructured":"Muhammad G, Melhem M (2014) Pathological voice detection and binary classification using mpeg-7 audio features. Biomed Signal Process Control 11:1\u20139","journal-title":"Biomed Signal Process Control"},{"key":"12873_CR26","doi-asserted-by":"crossref","unstructured":"Ntalampiras S, Potamitis I, Fakotakis N (2009) On acoustic surveillance of hazardous situations. In: IEEE international conference on acoustics, speech and signal processing, pp 165\u2013168","DOI":"10.1109\/ICASSP.2009.4959546"},{"key":"12873_CR27","unstructured":"Ntalampiras S, Potamitis I, Fakotakis N (2009) A portable system for robust acoustic detection of atypical situations. In: 17th European signal processing conference, pp 1121\u20131125"},{"key":"12873_CR28","doi-asserted-by":"crossref","unstructured":"Piczak K J (2015) Environmental sound classification with convolutional neural networks. In: 2015 IEEE 25th International Workshop on Machine Learning for Signal Processing (MLSP), pp 1\u20136","DOI":"10.1109\/MLSP.2015.7324337"},{"key":"12873_CR29","unstructured":"Piczak K J (2016) Recognizing bird species in audio recordings using deep convolutional neural networks. In: CLEF"},{"key":"12873_CR30","first-page":"91","volume":"28","author":"S Ren","year":"2015","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: Towards real-time object detection with region proposal networks. Adv Neural Inf Process Syst 28:91\u201399","journal-title":"Adv Neural Inf Process Syst"},{"key":"12873_CR31","doi-asserted-by":"crossref","unstructured":"Saeed A, Grangier D, Zeghidour N (2020) Contrastive learning of general-purpose audio representations","DOI":"10.1109\/ICASSP39728.2021.9413528"},{"key":"12873_CR32","unstructured":"Shah A, Kumar A, Hauptmann A G, Raj B (2018) A closer look at weak label learning for audio events. arXiv:1804.09288"},{"key":"12873_CR33","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1121\/1.1915893","volume":"8","author":"SS Stevens","year":"1937","unstructured":"Stevens S S, Volkmann J, Newman E B (1937) A scale for the measurement of the psychological magnitude pitch. J Acoust Soc Am 8:185\u2013190","journal-title":"J Acoust Soc Am"},{"issue":"10","key":"12873_CR34","doi-asserted-by":"publisher","first-page":"1733","DOI":"10.1109\/TMM.2015.2428998","volume":"17","author":"D Stowell","year":"2015","unstructured":"Stowell D, Giannoulis D, Benetos E, Lagrange M, Plumbley M D (2015) Detection and classification of acoustic scenes and events. IEEE Trans Multimed 17(10):1733\u20131746","journal-title":"IEEE Trans Multimed"},{"key":"12873_CR35","doi-asserted-by":"crossref","unstructured":"Sun Y, Ghaffarzadegan S (2020) An ontology-aware framework for audio event classification. In: IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP 2020","DOI":"10.1109\/ICASSP40776.2020.9053389"},{"key":"12873_CR36","unstructured":"Turpault N, Serizel R (2020) Training sound event detection on a heterogeneous dataset"},{"issue":"3","key":"12873_CR37","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1016\/j.bspc.2009.02.001","volume":"4","author":"M Vasilakis","year":"2009","unstructured":"Vasilakis M, Stylianou Y (2009) Spectral jitter modeling and estimation. Biomed Signal Process Control 4(3):183\u2013193","journal-title":"Biomed Signal Process Control"},{"key":"12873_CR38","doi-asserted-by":"crossref","unstructured":"Wang Z, Casebeer J, Clemmitt A, Tzinis E, Smaragdis P (2021) Sound event detection with adaptive frequency selection. arXiv:2105.07596","DOI":"10.1109\/WASPAA52581.2021.9632798"},{"key":"12873_CR39","unstructured":"Xu Y, Huang Q, Wang W, Foster P, Sigtia S, Jackson P J B, Plumbley M D (2016) Fully deep neural networks incorporating unsupervised feature learning for audio tagging. CoRR, arXiv:1607.03681"},{"key":"12873_CR40","unstructured":"Zang Z, Yang M, Liu L (2019) An improved system for dcase 2019 challenge task4. Technical Report, University of Electronic Science and Technology of China School of Information and Communication Engineering"},{"key":"12873_CR41","doi-asserted-by":"crossref","unstructured":"Zhuang X, Zhou X, Huang T S, Hasegawa-Johnson M (2008) Feature analysis and selection for acoustic event detection. In: 2008 IEEE international conference on acoustics, speech and signal processing, pp 17\u201320","DOI":"10.1109\/ICASSP.2008.4517535"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-12873-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-022-12873-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-12873-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,17]],"date-time":"2022-08-17T05:44:20Z","timestamp":1660715060000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-022-12873-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,4,7]]},"references-count":41,"journal-issue":{"issue":"21","published-print":{"date-parts":[[2022,9]]}},"alternative-id":["12873"],"URL":"https:\/\/doi.org\/10.1007\/s11042-022-12873-5","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,4,7]]},"assertion":[{"value":"5 July 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 March 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 March 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 April 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Selver Ezgi K\u00fc\u00e7\u00fckbay, Adnan Yaz\u0131c\u0131 and Sinan Kalkan declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of Interests"}}]}}