{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,1]],"date-time":"2025-12-01T11:16:56Z","timestamp":1764587816463},"reference-count":46,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2016,12,1]],"date-time":"2016-12-01T00:00:00Z","timestamp":1480550400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"}],"funder":[{"name":"Department of Electronics and Information Technology, Government of India"},{"name":"TTS"},{"name":"ASR"},{"name":"DA-IICT, Gandhinagar, India"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2016,12]]},"DOI":"10.1109\/taslp.2016.2607341","type":"journal-article","created":{"date-parts":[[2016,9,8]],"date-time":"2016-09-08T18:36:45Z","timestamp":1473359805000},"page":"2341-2353","source":"Crossref","is-referenced-by-count":42,"title":["Novel Unsupervised Auditory Filterbank Learning Using Convolutional RBM for Speech Recognition"],"prefix":"10.1109","volume":"24","author":[{"given":"Hardik B.","family":"Sailor","sequence":"first","affiliation":[]},{"given":"Hemant A.","family":"Patil","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2011.03.001"},{"key":"ref38","article-title":"The KALDI speech recognition toolkit","author":"povey","year":"0","journal-title":"Proc IEEE Workshop Autom Speech Recog Underst"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/18.119739"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178847"},{"key":"ref31","first-page":"305","article-title":"Auditory features based on gammatone filters for robust speech recognition","author":"qi","year":"0","journal-title":"Proc IEEE Int Symp Circuits Syst"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1162\/089976602760128018"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/29.46546"},{"key":"ref36","article-title":"AURORA working group: DSR front end LVCSR evaluation","author":"parihar","year":"2002","journal-title":"Inst Signal Inf Process"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.3115\/1075527.1075614"},{"key":"ref34","first-page":"27403","article-title":"DARPA TIMIT acoustic-phonetic continuous speech corpus CD-ROM. NIST speech disc 1&#x2013;1.1","author":"garofolo","year":"1993","journal-title":"NASA STI\/Recon Hampton VA USA Tech Rep No 93"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178781"},{"key":"ref11","first-page":"890","article-title":"Acoustic modeling with deep neural networks using raw time signal for LVCSR","author":"t\u00fcske","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2009.5459469"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1038\/nature14539"},{"key":"ref13","first-page":"1631","article-title":"Speech feature extraction using independent component analysis","volume":"3","author":"lee","year":"0","journal-title":"IEEE Int Conf Acoust Speech Signal Process"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1038\/nn831"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1162\/089976600300015826"},{"key":"ref16","first-page":"4713","article-title":"Unsupervised learning of auditory filter banks using non-negative matrix factorization","author":"bertrand","year":"0","journal-title":"Proc IEEE Int Conf Acoust Speech Signal Process"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2010.5495666"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2010.2101597"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2011.5947700"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1111\/cogs.12049"},{"key":"ref28","first-page":"807","article-title":"Rectified linear units improve restricted Boltzmann machines","author":"nair","year":"0","journal-title":"Proc 27th Int Conf Mach Learn"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.50"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/89.326615"},{"key":"ref6","article-title":"Deep speech 2: End-to-end speech recognition in English and Mandarin","author":"amodei","year":"2015","journal-title":"Proc Int Conf Mach Learn (ICML)"},{"key":"ref5","first-page":"1764","article-title":"Towards end-to-end speech recognition with recurrent neural networks","author":"graves","year":"2014","journal-title":"Proc 31st Int Conf Mach Learn"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-33275-3_2"},{"key":"ref8","first-page":"1","article-title":"Learning the speech front-end with raw waveform CLDNNs","author":"sainath","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404790"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/MSP.2012.2207989"},{"key":"ref9","first-page":"26","article-title":"Convolutional neural networks for acoustic modeling of raw time signal in LVCSR","author":"golik","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1002\/9781118392683.ch8"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1162\/0899766052530839"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-812"},{"key":"ref45","first-page":"905","article-title":"Robust CNN-based speech recognition with gabor filter kernels","author":"chang","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2014.2326991"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1038\/nature04485"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553453"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178920"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472808"},{"key":"ref41","first-page":"1766","article-title":"Estimating phoneme class conditional probabilities from raw speech signal using convolutional neural networks","author":"palaz","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref26","first-page":"873","article-title":"Unsupervised learning of temporal receptive fields using convolutional RBM for ASR task","author":"sailor","year":"0","journal-title":"Proc Eur Signal Process Conf"},{"key":"ref44","first-page":"895","article-title":"Evaluating robust features on deep neural networks for speech recognition in noisy and channel mismatched conditions","author":"mitra","year":"0","journal-title":"Proc INTERSPEECH"},{"key":"ref25","first-page":"1096","article-title":"Unsupervised feature learning for audio classification using convolutional deep belief networks","author":"lee","year":"0","journal-title":"Proc 23rd Annu Conf Neural Inf Process Syst"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2014.7078567"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/7571201\/07563327.pdf?arnumber=7563327","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,12]],"date-time":"2022-01-12T16:39:07Z","timestamp":1642005547000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7563327\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,12]]},"references-count":46,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/taslp.2016.2607341","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2016,12]]}}}