{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,17]],"date-time":"2025-10-17T13:55:30Z","timestamp":1760709330462},"reference-count":63,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2017,6,1]],"date-time":"2017-06-01T00:00:00Z","timestamp":1496275200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2017,6]]},"DOI":"10.1109\/taslp.2017.2690569","type":"journal-article","created":{"date-parts":[[2017,5,23]],"date-time":"2017-05-23T21:03:22Z","timestamp":1495573402000},"page":"1304-1314","source":"Crossref","is-referenced-by-count":20,"title":["Classifier Architectures for Acoustic Scenes and Events: Implications for DNNs, TDNNs, and Perceptual Features from DCASE 2016"],"prefix":"10.1109","volume":"25","author":[{"given":"Jens","family":"Schroder","sequence":"first","affiliation":[{"name":"Project Group Hearing, Speech and Audio Technology, Fraunhofer Institute for Digital Media Technology, Oldenburg, Germany"}]},{"given":"Niko","family":"Moritz","sequence":"additional","affiliation":[{"name":"Project Group Hearing, Speech and Audio Technology, Fraunhofer Institute for Digital Media Technology, Oldenburg, Germany"}]},{"given":"Jorn","family":"Anemuller","sequence":"additional","affiliation":[{"name":"Department of Medical Physics and Acoustics and the Cluster of Excellence Hearing4all, University of Oldenburg, Oldenburg, Germany"}]},{"given":"Stefan","family":"Goetze","sequence":"additional","affiliation":[{"name":"Project Group Hearing, Speech and Audio Technology, Fraunhofer Institute for Digital Media Technology, Oldenburg, Germany"}]},{"given":"Birger","family":"Kollmeier","sequence":"additional","affiliation":[{"name":"Department of Medical Physics and Acoustics and the Cluster of Excellence Hearing4all, University of Oldenburg, Oldenburg, Germany"}]}],"member":"263","reference":[{"key":"ref39","first-page":"18","author":"t\u00fcske","year":"2012","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/EUSIPCO.2015.7362479"},{"key":"ref33","first-page":"1267","article-title":"Acoustic event detection in real-life recordings","author":"mesaros","year":"2010","journal-title":"Proc 18th Eur Signal Process Conf"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.854103"},{"key":"ref31","first-page":"437","article-title":"Conversational speech transcription using context-dependent deep neural networks","author":"seide","year":"2011","journal-title":"Proc 12th Annu Conf Int Speech Commun Assoc"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/EUSIPCO.2015.7362845"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7177950"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2013.6701861"},{"key":"ref35","first-page":"1525","article-title":"Automatic detection of relevant acoustic events in kindergarten noisy environments","author":"schr\u00f6der","year":"2015","journal-title":"Proc Deutsche Jahrestagung f&#x00FC;r Akustik"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1186\/1687-4722-2013-1"},{"key":"ref60","article-title":"Car-forest: Joint classification-regression decision forests for overlapping audio event detection","author":"phan","year":"2016"},{"key":"ref62","article-title":"Sound event detection in real-life audio","author":"ubskii","year":"2016"},{"key":"ref61","article-title":"DCASE 2016 sound event detection system based on convolutional neural network","author":"gorin","year":"2016"},{"key":"ref63","article-title":"Random system performance in task 3","author":"kroos","year":"2016"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2205597"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472918"},{"key":"ref29","first-page":"506","article-title":"Recognition of acoustic events using deep neural networks","author":"gencoglu","year":"2014","journal-title":"Proc 22nd Eur Signal Process Conf"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2015.2428998"},{"key":"ref1","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-540-68585-2","volume":"4625","author":"stiefelhagen","year":"2008","journal-title":"Multimodal Technologies for Perception of Humans"},{"key":"ref20","article-title":"Recurrence quantification analysis features for auditory scene classification","author":"roma","year":"2013"},{"key":"ref22","article-title":"Auditory scene classification using machine learning techniques","author":"li","year":"2013"},{"key":"ref21","article-title":"Recognising acoustic scenes with large-scale audio feature extraction and SVM","author":"geiger","year":"2013"},{"key":"ref24","article-title":"IEEE AASP scene classification challenge using hidden Markov models and frame based classification","author":"chuma","year":"2013"},{"key":"ref23","article-title":"A tone-fit feature representation for scene classification","author":"krijnders","year":"2013"},{"key":"ref26","first-page":"3549","article-title":"Sparse representations for modeling environmental acoustic scenes, application to train stations soundscapes","author":"cauchi","year":"2012","journal-title":"Proc Congr&#x00E8;s Fran&#x00E7;ais d&#x2019;Acoustique"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/EUSIPCO.2015.7362477"},{"key":"ref50","first-page":"3214","article-title":"A time delay neural network architecture for efficient modeling of long temporal contexts","author":"peddinti","year":"2015","journal-title":"Proc Annu Conf Int Speech Commun Assoc"},{"key":"ref51","first-page":"153","article-title":"Greedy layer-wise training of deep networks","volume":"19","author":"bengio","year":"2006","journal-title":"Adv Neural Inf Process Syst"},{"key":"ref59","article-title":"Experimentation on the DCASE challenge 2016: Task 1&#x2014;Acoustic scene classification and task 3&#x2014;Sound event detection in real life audio","author":"elizalde","year":"2016"},{"key":"ref58","article-title":"Sound event detection for real life audio DCASE challenge","author":"dai","year":"2016"},{"key":"ref57","article-title":"Deep neural network baseline for DCASE challenge 2016","author":"kong","year":"2016"},{"key":"ref56","article-title":"DCASE report for task 3: Sound event detection in real life audio","author":"lai","year":"2016"},{"key":"ref55","article-title":"Acoustic scene and event recognition using recurrent neural networks","author":"vu","year":"2016"},{"key":"ref54","article-title":"Gated recurrent networks applied to acoustic scene classification and acoustic event detection","author":"z\u00f6hrer","year":"2016"},{"key":"ref53","article-title":"Sound event detection in multichannel audio using spatial and harmonic features","author":"adavanne","year":"2016"},{"key":"ref52","author":"young","year":"2015","journal-title":"The HTK Book (for HTK Version 3 5alpha)"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2005.1521669"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6637696"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2456420"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472921"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TBME.2006.873548"},{"key":"ref14","doi-asserted-by":"crossref","first-page":"278","DOI":"10.1109\/JBHI.2013.2268663","article-title":"Food intake monitoring: Automated chew event detection in chewing sounds","volume":"18","author":"p\u00e4\u00dfler","year":"2014","journal-title":"IEEE J Biomed Health Informat"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472920"},{"key":"ref16","first-page":"1266","article-title":"Foreign object detection in tires by acoustic event detection","author":"schr\u00f6der","year":"2015","journal-title":"Proc Deutsche Jahrestagung f&#x00FC;r Akustik"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TR.2015.2459684"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/29.21701"},{"key":"ref19","article-title":"Automatic event classification using front end single channel noise reduction, MFCC features and a support vector machine classifier","author":"nogueira","year":"2013"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.4018\/978-1-60960-177-5.ch008"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2587218"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2013.6701819"},{"key":"ref5","first-page":"28","article-title":"Reduction of non-stationary noise for a robotic living assistant using sparse non-negative matrix factorization","author":"cauchi","year":"2012","journal-title":"Proceedings of the 1st Workshop on Speech and Multimodal Interaction in Assistive Environments"},{"key":"ref8","first-page":"142","article-title":"Histogram of gradients of time-frequency representations for audio scene classification","volume":"23","author":"rakotomamonjy","year":"2015","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/EUSIPCO.2016.7760424"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2467964"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1121\/1.2750160"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1016\/0165-1684(95)00049-J"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1152\/jn.00851.2002"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/EUSIPCO.2015.7362476"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/WASPAA.2013.6701868"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2615239"},{"key":"ref41","first-page":"468","article-title":"Long-term acoustic features for noise robust automatic speech recognition","author":"moritz","year":"2015","journal-title":"Proc IEEE Workshop on Automatic Speech Recognition and Understanding"},{"key":"ref44","first-page":"1297","article-title":"Discriminative learning of receptive fields from responses to non-Gaussian stimulus ensembles","volume":"70","author":"marcelja","year":"1980","journal-title":"J Opt Soc Amer"},{"key":"ref43","first-page":"429","article-title":"Theory of communication","volume":"93","author":"gabor","year":"1946","journal-title":"J Inst Elect Eng"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/7933016\/07933048.pdf?arnumber=7933048","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:15:00Z","timestamp":1642004100000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/7933048\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,6]]},"references-count":63,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/taslp.2017.2690569","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2017,6]]}}}