{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T15:23:01Z","timestamp":1780586581786,"version":"3.54.1"},"reference-count":28,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,4]]},"DOI":"10.1109\/icassp.2018.8462165","type":"proceedings-article","created":{"date-parts":[[2018,9,21]],"date-time":"2018-09-21T22:24:48Z","timestamp":1537568688000},"page":"4884-4888","source":"Crossref","is-referenced-by-count":80,"title":["Towards Directly Modeling Raw Speech Signal for Speaker Verification Using CNNS"],"prefix":"10.1109","author":[{"given":"Hannah","family":"Muckenhirn","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mathew","family":"Magimai.-Doss","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sebastien","family":"Marcell","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref10","article-title":"Acoustic Modeling with Deep Neural Networks Using Raw Time Signal for LVCSR","author":"zolt\u00e1n","year":"2014","journal-title":"Proc of Interspeech"},{"key":"ref11","article-title":"Learning the speech front-end with raw waveform CLDNNs","author":"tara","year":"2015","journal-title":"Proc of Interspeech"},{"key":"ref12","article-title":"Adieu features? End-to-end speech emotion recognition using a deep convolutional recurrent network","author":"george","year":"2016","journal-title":"Proc of ICASSP"},{"key":"ref13","article-title":"Feature learning with raw-waveform CLDNNs for voice activity detection","author":"rub\u00e9n","year":"2016","journal-title":"Proc of Interspeech"},{"key":"ref14","article-title":"End-to-end spoofing detection with raw waveform CLDNNS","author":"heinrich","year":"2017","journal-title":"Proc of ICASSP"},{"key":"ref15","article-title":"End-to-end convolutional neural network-based voice presentation attack detection","author":"hannah","year":"2017","journal-title":"Intl Joint Conference on Biometrics"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1121\/1.1913065"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1975.1162664"},{"key":"ref18","article-title":"End-to-end acoustic modeling using convolutional neural networks for automatic speech recognition","author":"dimitri","year":"2016","journal-title":"Idiap-RR Idiap-RR-18&#x2013;2016 Idiap 6"},{"key":"ref19","article-title":"Spear: An open source toolbox for speaker recognition based on bob","author":"khoury","year":"2014","journal-title":"Proc of ICASSP"},{"key":"ref28","article-title":"Bi-modal person recognition on a mobile phone: using mobile phone data","author":"mccool","year":"2012","journal-title":"Proc of Workshop on Hot Topics in Mobile Multimedia"},{"key":"ref4","article-title":"Probabilistic linear discriminant analysis for inferences about identity","author":"simon","year":"2007","journal-title":"Proc of International Conference on Computer Vision"},{"key":"ref27","article-title":"A pitch extraction reference database","author":"fabrice","year":"1995","journal-title":"Proc of Eurospeech"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"788","DOI":"10.1109\/TASL.2010.2064307","article-title":"Front-end factor analysis for speaker verification","volume":"19","author":"najim","year":"2011","journal-title":"IEEE Transactions on Audio Speech and Language Processing"},{"key":"ref6","article-title":"End-to-end text-dependent speaker verification","author":"heigold","year":"2016","journal-title":"Proc of ICASSP"},{"key":"ref5","article-title":"Deep neural networks for small footprint text-dependent speaker verification","author":"ehsan","year":"2014","journal-title":"Proc of International Conference on Acoustics speech and signal Processing (ICASSP)"},{"key":"ref8","article-title":"Voxceleb: a large-scale speaker identification dataset","author":"arsha","year":"2017","journal-title":"Proc of Interspeech"},{"key":"ref7","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1608","article-title":"End-to-end text-independent speaker verification with triplet loss on short utterances","author":"zhang","year":"2017","journal-title":"Proc of Interspeech"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2006.870086"},{"key":"ref9","article-title":"Estimating phoneme class conditional probabilities from raw speech signal using convolutional neural networks","author":"dimitri","year":"2013","journal-title":"Proc of Interspeech"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1006\/dspr.1999.0361"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2007.05.003"},{"key":"ref22","article-title":"Torch7: A Matlab-like Environment for Machine Learning","author":"collobert","year":"2011","journal-title":"BigLearn NIPS Workshop"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2006.881693"},{"key":"ref24","first-page":"1","article-title":"Convolutional neural networks analyzed via convolutional sparse coding","volume":"18","author":"papyan","year":"2017","journal-title":"Journal of Machine Learning Research"},{"key":"ref23","article-title":"Analysis of CNN-based speech recognition system using raw speech as input","author":"palaz","year":"2015","journal-title":"Proc of Interspeech"},{"key":"ref26","article-title":"Wavesurfer-an open source speech tool","author":"k\u00e5re","year":"2000","journal-title":"Proc Int l Conf Spoken Language Processing"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1098\/rsta.2015.0203"}],"event":{"name":"ICASSP 2018 - 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","location":"Calgary, AB","start":{"date-parts":[[2018,4,15]]},"end":{"date-parts":[[2018,4,20]]}},"container-title":["2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8450881\/8461260\/08462165.pdf?arnumber=8462165","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,24]],"date-time":"2020-08-24T00:51:39Z","timestamp":1598230299000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8462165\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/icassp.2018.8462165","relation":{},"subject":[],"published":{"date-parts":[[2018,4]]}}}