{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T23:13:32Z","timestamp":1768432412807,"version":"3.49.0"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,7,18]],"date-time":"2021-07-18T00:00:00Z","timestamp":1626566400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,7,18]]},"DOI":"10.1109\/ijcnn52387.2021.9533654","type":"proceedings-article","created":{"date-parts":[[2021,9,20]],"date-time":"2021-09-20T17:27:41Z","timestamp":1632158861000},"page":"1-8","source":"Crossref","is-referenced-by-count":30,"title":["ESResNe(X)t-fbsp: Learning Robust Time-Frequency Transformation of Audio"],"prefix":"10.1109","author":[{"given":"Andrey","family":"Guzhov","sequence":"first","affiliation":[{"name":"DFKI GmbH"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Federico","family":"Raue","sequence":"additional","affiliation":[{"name":"DFKI GmbH"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jorn","family":"Hees","sequence":"additional","affiliation":[{"name":"DFKI GmbH"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andreas","family":"Dengel","sequence":"additional","affiliation":[{"name":"DFKI GmbH"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref31","first-page":"536","article-title":"On the theory of filter amplifiers","volume":"7","author":"butterworth","year":"1930","journal-title":"Wireless Engineer"},{"key":"ref30","article-title":"A method of solving a convex programming problem with convergence rate o (1\/k&#x2018; 2) o (1\/k2)","volume":"27","author":"nesterov","year":"1983","journal-title":"Sov Math Dokl"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2019.06.040"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-4142-3"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/MLSP.2015.7324337"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1121\/1.1901999"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1977.1162950"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2017.2657381"},{"key":"ref16","author":"arnault","year":"2020","journal-title":"Urban sound classification striving towards a fair comparison"},{"key":"ref17","first-page":"5447","article-title":"A sequential self teaching approach for improving generalization in sound event recognition","author":"kumar","year":"0","journal-title":"International Conference on Machine Learning"},{"key":"ref18","author":"palanisamy","year":"2020","journal-title":"Rethinking cnn models for audio classification"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-03335-4_31"},{"key":"ref28","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"ArXiv Preprint"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-831"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref3","first-page":"4933","article-title":"Esresnet: Environmental sound classification based on visual domain models","author":"guzhov","year":"0","journal-title":"International Conference on Pattern Recognition (ICPR)"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952651"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1137\/0330046"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-00767-6_49"},{"key":"ref7","author":"tokozume","year":"2017","journal-title":"Learning from between-class examples for deep sound recognition"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/2647868.2655045"},{"key":"ref9","article-title":"An efficient implementation of the patterson-holdsworth auditory filter bank","volume":"35","author":"slaney","year":"1993","journal-title":"Apple Computer Perception Group Tech Rep"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/2733373.2806390"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2939495"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.634"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-69900-4_40"},{"key":"ref24","article-title":"Siamese neural networks for one-shot image recognition","volume":"2","author":"koch","year":"0","journal-title":"ICML Deep Learning Workshop"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref26","author":"oppenheim","year":"1999","journal-title":"Discrete-Time Signal Processing"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.195"}],"event":{"name":"2021 International Joint Conference on Neural Networks (IJCNN)","location":"Shenzhen, China","start":{"date-parts":[[2021,7,18]]},"end":{"date-parts":[[2021,7,22]]}},"container-title":["2021 International Joint Conference on Neural Networks (IJCNN)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9533266\/9533267\/09533654.pdf?arnumber=9533654","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,8,2]],"date-time":"2022-08-02T19:32:32Z","timestamp":1659468752000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9533654\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,7,18]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/ijcnn52387.2021.9533654","relation":{},"subject":[],"published":{"date-parts":[[2021,7,18]]}}}