{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T00:18:11Z","timestamp":1773965891384,"version":"3.50.1"},"reference-count":70,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"funder":[{"name":"European Union as part of the ESF-Program","award":["K-7531.20\/434-11"],"award-info":[{"award-number":["K-7531.20\/434-11"]}]},{"name":"European Union as part of the ESF-Program","award":["SAB-Nr. 100316843"],"award-info":[{"award-number":["SAB-Nr. 100316843"]}]},{"DOI":"10.13039\/501100004061","name":"Oesterreichische Nationalbank","doi-asserted-by":"publisher","award":["OeNB"],"award-info":[{"award-number":["OeNB"]}],"id":[{"id":"10.13039\/501100004061","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004061","name":"Oesterreichische Nationalbank","doi-asserted-by":"publisher","award":["P16430"],"award-info":[{"award-number":["P16430"]}],"id":[{"id":"10.13039\/501100004061","id-type":"DOI","asserted-by":"publisher"}]},{"name":"BioTechMed-Graz, Austria"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2021]]},"DOI":"10.1109\/taslp.2020.3037414","type":"journal-article","created":{"date-parts":[[2020,11,11]],"date-time":"2020-11-11T21:38:07Z","timestamp":1605130687000},"page":"54-67","source":"Crossref","is-referenced-by-count":8,"title":["Comparison of Artificial Neural Network Types for Infant Vocalization Classification"],"prefix":"10.1109","volume":"29","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7149-0586","authenticated-orcid":false,"given":"Franz","family":"Anders","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2364-0986","authenticated-orcid":false,"given":"Mario","family":"Hlawitschka","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7553-1517","authenticated-orcid":false,"given":"Mirco","family":"Fuchs","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.4300\/JGME-D-12-00156.1"},{"key":"ref39","first-page":"818","article-title":"Visualizing and understanding convolutional networks","author":"zeiler","year":"2014","journal-title":"Proc"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1016\/S0892-1997(02)00127-3"},{"key":"ref33","article-title":"Auditory toolbox","volume":"10","author":"slaney","year":"1998","journal-title":"Tech Rep Interval Research Corporation"},{"key":"ref32","first-page":"6340","article-title":"audeep: Unsupervised learning of representations from audio with deep recurrent neural networks","volume":"18","author":"freitag","year":"2017","journal-title":"J Mach Learn Res"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1002\/1097-0355(198122)2:2<118::AID-IMHJ2280020208>3.0.CO;2-5"},{"key":"ref37","article-title":"An acoustic phonetic catalog of prespeech vocalizations from a developmental perspective","author":"buder","year":"2013","journal-title":"Comprehensive perspectives on child speech development and disorders Pathways from linguistic theory to clinical practice"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvoice.2006.12.009"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2690575"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"ref60","first-page":"5024","article-title":"Cp-jku submissions for dcase-2016: A hybrid approach using binaural i-vectors and deep convolutional neural networks","volume":"6","author":"eghbal-zadeh","year":"2016","journal-title":"IEEE AASP Challenge Detection and Classification of Acoustic Scenes and Events"},{"key":"ref62","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"arXiv 1412 6980"},{"key":"ref61","article-title":"Acoustic scene classification and audio tagging with receptive-field-regularized CNNs","author":"koutini","year":"2019"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1093\/oxfordjournals.pan.a004868"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178838"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2010.764"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.33682\/57xx-t679"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-7138-7"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-05318-5_1"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2020.03.003"},{"key":"ref67","article-title":"Do we need harmless Bayesian optimization and first-order Bayesian optimization","author":"ahmed","year":"2016","journal-title":"NIPS BayesOpt"},{"key":"ref68","article-title":"Efficient hyperparameter optimization and infinitely many armed bandits","author":"rostamizadeh","year":"2017"},{"key":"ref69","article-title":"An introduction to recursive partitioning using the rpart routines","author":"therneau","year":"1997"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/s11910-017-0748-8"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1093\/med\/9780199642656.001.0001"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TAFFC.2015.2457417"},{"key":"ref22","first-page":"9","article-title":"A multi-device dataset for urban acoustic scene classification","author":"mesaros","year":"2018","journal-title":"Proc Workshop Detection Classif Acoust Scenes Events"},{"key":"ref21","article-title":"Dcase 2017 challenge setup: Tasks, datasets and baseline system","author":"mesaros","year":"2017","journal-title":"Proceedings of the Detection and Classification of Acoustic Scenes and Events 2017 Workshop (DCASE2017)"},{"key":"ref24","article-title":"Integrating the data augmentation scheme with various classifiers for acoustic scene modeling","year":"2019"},{"key":"ref23","author":"goodfellow","year":"2016","journal-title":"Deep Learning"},{"key":"ref26","first-page":"25","article-title":"Acoustic scene classification using various pre-processed features and convolutional neural networks","author":"seo","year":"2019","journal-title":"Proc Workshop Detection Classif Acoust Scenes Events"},{"key":"ref25","article-title":"CP-JKU submissions to dcase19: Acoustic scene classification and audio tagging with receptive-field-regularized cnns","author":"koutini","year":"0","journal-title":"Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE)"},{"key":"ref50","first-page":"80","article-title":"Rare sound event detection using 1D convolutional recurrent neural networks","author":"lim","year":"0","journal-title":"Proc DCAS"},{"key":"ref51","first-page":"3","article-title":"Rectifier nonlinearities improve neural network acoustic models","volume":"30","author":"maas","year":"0","journal-title":"Proc ICML"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952552"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2149"},{"key":"ref56","article-title":"Recurrent batch normalization","author":"cooijmans","year":"2016","journal-title":"arXiv 1603 09025"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2261"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1610"},{"key":"ref53","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","author":"chung","year":"2014","journal-title":"arXiv 1412 3555"},{"key":"ref52","article-title":"Fast and accurate deep network learning by exponential linear units (elus)","author":"clevert","year":"0","journal-title":"arXiv 1511 07289"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.3758\/BF03195385"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1080\/02699200500211451"},{"key":"ref40","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"2014"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502224"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2013.00292"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1076"},{"key":"ref15","article-title":"End2you&#x2013;the imperial toolkit for multimodal profiling by end-to-end learning","author":"tzirakis","year":"0"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2019"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2187"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1914"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1959"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1136\/adc.63.4.380"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1007"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1002\/icd.344"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.1996.506929"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-51"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1300337110"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1832"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1238"},{"key":"ref46","article-title":"Knowledge distillation with specialist models in acoustic scene classification","author":"jung","year":"2019"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.33682\/8rd2-g787"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2581"},{"key":"ref47","article-title":"Ciaic-ASC system for dcase 2019 challenge task1","author":"wan","year":"2019"},{"key":"ref42","article-title":"Multitask learning and semisupervised learning with noisy data for audio tagging","author":"akiyama","year":"2019"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref44","article-title":"Acoustic scene classification using CNN ensembles and primary ambient extraction","author":"yang","year":"2019"},{"key":"ref43","article-title":"Thuee system for dcase 2019 challenge task 2","author":"he","year":"2019"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/9289074\/09257020.pdf?arnumber=9257020","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T15:58:25Z","timestamp":1642003105000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9257020\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"references-count":70,"URL":"https:\/\/doi.org\/10.1109\/taslp.2020.3037414","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021]]}}}