{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T22:00:26Z","timestamp":1769205626431,"version":"3.49.0"},"reference-count":60,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100003725","name":"National Research Foundation of Korea","doi-asserted-by":"publisher","award":["2015R1C1A1A02036962"],"award-info":[{"award-number":["2015R1C1A1A02036962"]}],"id":[{"id":"10.13039\/501100003725","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE J. Sel. Top. Signal Process."],"published-print":{"date-parts":[[2019,5]]},"DOI":"10.1109\/jstsp.2019.2909479","type":"journal-article","created":{"date-parts":[[2019,4,4]],"date-time":"2019-04-04T19:42:13Z","timestamp":1554406933000},"page":"285-297","source":"Crossref","is-referenced-by-count":71,"title":["Comparison and Analysis of SampleCNN Architectures for Audio Classification"],"prefix":"10.1109","volume":"13","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9312-535X","authenticated-orcid":false,"given":"Taejun","family":"Kim","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1126-0081","authenticated-orcid":false,"given":"Jongpil","family":"Lee","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2664-2119","authenticated-orcid":false,"given":"Juhan","family":"Nam","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178781"},{"key":"ref38","first-page":"11","article-title":"Analysis of CNN-based speech recognition system using raw speech as input","author":"palaz","year":"0","journal-title":"Proc Int Conf Acoust Speech Signal Process"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682912"},{"key":"ref32","first-page":"1089","article-title":"On random weights and unsupervised feature learning","author":"saxe","year":"0","journal-title":"Proc Int Conf Mach Learn"},{"key":"ref31","first-page":"637","article-title":"End-to-end learning for music audio tagging at scale","author":"pons","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.3390\/app8010150"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952651"},{"key":"ref36","article-title":"SampleRNN: An unconditional end-to-end neural audio generation model","author":"mehri","year":"0","journal-title":"Proc Int Conf Learn Represent"},{"key":"ref35","article-title":"WaveNet: A generative model for raw audio","author":"van den oord","year":"2016"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462046"},{"key":"ref60","first-page":"550","article-title":"Residual networks behave like ensembles of relatively shallow networks","author":"veit","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1495"},{"key":"ref27","article-title":"Wav2letter: an end-to-end convnet-based speech recognition system","author":"collobert","year":"2016"},{"key":"ref29","first-page":"220","article-title":"Sample-level deep convolutional neural networks for music auto-tagging using raw waveforms","author":"lee","year":"0","journal-title":"Proc Sound and Music Computing Conf"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"ref20","first-page":"603","article-title":"On the use of sparse time-relative auditory codes for music","author":"manzagol","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854950"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947700"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952190"},{"key":"ref23","first-page":"1","article-title":"Learning the speech front-end with raw waveform CLDNNS","author":"sainath","year":"0","journal-title":"Proc Int Speech Commun Assoc"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-256"},{"key":"ref25","article-title":"Learning features of music from scratch","author":"thickstun","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref50","article-title":"Speech commands: A dataset for limited-vocabulary speech recognition","author":"warden","year":"2018"},{"key":"ref51","first-page":"85","article-title":"DCASE 2017 challenge setup: Tasks, datasets and baseline system","author":"mesaros","year":"0","journal-title":"Proc Workshop Detection Classif Acoust Scenes Events"},{"key":"ref59","first-page":"6389","article-title":"Visualizing the loss landscape of neural nets","author":"li","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.23915\/distill.00007"},{"key":"ref57","article-title":"Visualizing higher-layer features of a deep network","author":"erhan","year":"2009"},{"key":"ref56","article-title":"Surrey-CVSSP system for DCASE2017 challenge task4","author":"xu","year":"0","journal-title":"Detection Classification Acoust Scenes Events Challenge"},{"key":"ref55","article-title":"A neural attention model for speech command recognition","author":"de andrade","year":"2018"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2017.2713830"},{"key":"ref53","article-title":"Multi-level and multi-scale feature aggregation using sample-level deep convolutional neural networks for music classification","author":"lee","year":"0","journal-title":"Proc Mach Learning Music Discovery Workshop\/Int Conf Mach Learn"},{"key":"ref52","article-title":"The million song dataset","volume":"2","author":"bertin-mahieux","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2017.2657381"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952132"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1370"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461975"},{"key":"ref13","article-title":"Acoustic scene classification by ensemble of spectrograms based on adaptive temporal divisions","author":"sakashita","year":"0","journal-title":"Proc Workshop Detection Classif Acoust Scenes Events"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2014.2339736"},{"key":"ref15","first-page":"577","article-title":"Attention-based models for speech recognition","author":"chorowski","year":"0","journal-title":"Proc Adv Neural Inf Process Syst"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8461870"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-440"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1038\/nn831"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1038\/nature04485"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.243"},{"key":"ref6","first-page":"29","article-title":"Transfer learning by supervised pre-training for audio-based music classification","author":"van den oord","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref5","first-page":"116","article-title":"Multiscale approaches to music audio feature learning","author":"dieleman","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref8","first-page":"805","article-title":"Automatic tagging using deep convolutional neural networks","author":"choi","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CBMI.2016.7500246"},{"key":"ref49","first-page":"387","article-title":"Evaluation of algorithms using games: The case of music tagging","author":"law","year":"0","journal-title":"Proc Int Soc Music Inf Retrieval Conf"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952585"},{"key":"ref46","first-page":"730","article-title":"Very deep convolutional networks for large-scale image recognition","author":"simonyan","year":"0","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.5244\/C.30.87"},{"key":"ref47","first-page":"630","article-title":"Identity mappings in deep residual networks","author":"he","year":"0","journal-title":"Proc Eur Conf Comput Vision"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2018.2874383"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/s10844-013-0248-5"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref43","article-title":"Raw waveform-based audio classification using sample-level CNN architectures","author":"lee","year":"0","journal-title":"Proc Workshop Mach Learn Audio Signal Process NIPS"}],"container-title":["IEEE Journal of Selected Topics in Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/4200690\/8717740\/08681654.pdf?arnumber=8681654","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,13]],"date-time":"2022-07-13T21:08:15Z","timestamp":1657746495000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8681654\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5]]},"references-count":60,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/jstsp.2019.2909479","relation":{},"ISSN":["1932-4553","1941-0484"],"issn-type":[{"value":"1932-4553","type":"print"},{"value":"1941-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,5]]}}}