{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T13:25:30Z","timestamp":1725801930463},"reference-count":61,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,12,1]],"date-time":"2019-12-01T00:00:00Z","timestamp":1575158400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,12]]},"DOI":"10.1109\/bigdata47090.2019.9006601","type":"proceedings-article","created":{"date-parts":[[2020,2,25]],"date-time":"2020-02-25T06:05:34Z","timestamp":1582610734000},"page":"2447-2456","source":"Crossref","is-referenced-by-count":4,"title":["Eliminating Data Collection Bottleneck for Wake Word Engine Training Using Found and Synthetic Data"],"prefix":"10.1109","author":[{"given":"Buvaneswari","family":"Ramanan","sequence":"first","affiliation":[]},{"given":"Lawrence","family":"Drabeck","sequence":"additional","affiliation":[]},{"given":"Thomas","family":"Woo","sequence":"additional","affiliation":[]},{"given":"Troy","family":"Cauble","sequence":"additional","affiliation":[]},{"given":"Anil","family":"Rana","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"journal-title":"Youglish","year":"2019","key":"ref39"},{"journal-title":"DeepSpeech Implementation","year":"2019","key":"ref38"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639589"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1883"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683343"},{"key":"ref30","article-title":"Data Augmentation for Spoken Language Understanding via Joint Variational Generation","author":"yoo","year":"2018","journal-title":"arXiv preprint arXiv 1809 02305"},{"key":"ref37","first-page":"3935","article-title":"Enhancing the TED-LIUM corpus with selected data for language modeling and more TED talks","author":"rousseau","year":"2014","journal-title":"Proc Int Conference on Language Resources and Evaluation (LREC)"},{"journal-title":"GENTLE","year":"2019","author":"ochshorn","key":"ref36"},{"journal-title":"Learning to recognize speech from chaotically synthesized data","year":"2018","author":"bonab","key":"ref35"},{"key":"ref34","article-title":"Training neural speech recognition systems with synthetic speech augmentation","author":"li","year":"2018","journal-title":"arXiv 1811 00707v1[cs CL]"},{"year":"2019","key":"ref60"},{"journal-title":"CMU Phonetic definition","year":"2019","key":"ref61"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.292"},{"key":"ref27","article-title":"UnrealStereo: Controlling Hazardous Factors to Analyze Stereo Vision","author":"zhang","year":"2016","journal-title":"arXiv preprint arXiv 1612 04647"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/GCCE.2018.8574624"},{"key":"ref2","article-title":"Convolutional neural networksfor smallfootprint keyword spotting","author":"sainath","year":"2015","journal-title":"Sixteenth Annual Conference of the International Speech Communication Association"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854370"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-2016"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"2616","DOI":"10.21437\/Interspeech.2017-950","article-title":"VoxCeleb: A Large-Scale Speaker Identification Dataset","author":"nagrani","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1085"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683120"},{"key":"ref23","article-title":"VoxCeleb2: Deep Speaker Recognition","author":"nagrani","year":"0","journal-title":"arXiv preprint arXiv 1806 05622"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.352"},{"key":"ref25","first-page":"240","article-title":"Can we steal your vocal identity from the Internet?: Initial investigation of cloning Obama&#x2019;s voice using GAN, WaveNet and low-quality found data","author":"lorenzo-trueba","year":"2018","journal-title":"Proc Odyssey 2018 The Speaker and Language Recognition Workshop"},{"key":"ref50","article-title":"WAVENET: A Generative Model for Raw Audio","author":"van den oord","year":"2016","journal-title":"SSW"},{"journal-title":"Public Domain Sounds Backup","year":"2019","key":"ref51"},{"key":"ref59","article-title":"Deep Speaker: an end-to-end neural speaker embedding system","author":"li","year":"2017","journal-title":"unpublished arXiv 1705 02304v1 [cs CL]"},{"key":"ref58","article-title":"WaveCycleGAN: Synthetic-to-natural speech waveform conversion using cycle-consistent adversarial networks","author":"tanaka","year":"2018","journal-title":"Unpublished arXiv 1809 10288v2"},{"key":"ref57","article-title":"ClariNet: Parallel wave generation in end-to-end text-to-speech","author":"ping","year":"2019","journal-title":"unpublished arXiv 1807 07281v3 [cs CL]"},{"journal-title":"Picovoice Wakeword Benchmark","year":"2018","key":"ref56"},{"year":"2019","key":"ref55"},{"journal-title":"DEMAND Dataset A database of mutichannel environmental noise recordings","year":"2019","author":"anhari","key":"ref54"},{"journal-title":"Tensorflow Speech Commands Example","year":"2019","key":"ref53"},{"journal-title":"Keyword spotting for Microcontroller","year":"2019","key":"ref52"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462688"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683474"},{"journal-title":"PyRoomAcustics","year":"2019","key":"ref40"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2891838"},{"article-title":"Speech commands: A public dataset for single-word speech recognition","year":"2017","author":"warden","key":"ref13"},{"key":"ref14","article-title":"Siamese neural networks for one-shot image recognition","author":"koch","year":"2015","journal-title":"ICML Deep Learning Workshop"},{"key":"ref15","first-page":"3630","article-title":"Matching networks for one shot learning","author":"vinyals","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref16","first-page":"4077","article-title":"Prototypical networks for few-shot learning","author":"snell","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref17","first-page":"1126","article-title":"Model-agnostic meta-learning for fast adaptation of deep networks","author":"chelsea","year":"2017","journal-title":"Proceedings of the 34th International Conference on Machine Learning-Volume 70"},{"key":"ref18","article-title":"Meta Learning for few-shot keyword spotting","author":"yangbin","year":"2018","journal-title":"unpublished arXiv 1812 10233v1 [cs CL]"},{"key":"ref19","article-title":"Prototypical metric transfer learning for continuous speech keyword spotting with limited training data","author":"seth","year":"2019","journal-title":"unpublished arXiv 1901 03860v1 [cs SD]"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/29.103088"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1990.115555"},{"key":"ref6","first-page":"5236","article-title":"Query-byexample keyword spotting using long short-term memory networks","author":"chen","year":"2015","journal-title":"IEEE International Conference on Acoustics Speech and Signal Processing (ICASSP)"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1991.150338"},{"key":"ref8","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1737","article-title":"Convolutional recurrent neural networks for small-footprint keyword spotting","author":"arik","year":"2017","journal-title":"InterSpeech"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2016.7846306"},{"key":"ref49","first-page":"2962","article-title":"Deep voice 2: Multi-speaker neural text-to-speech","author":"gibiansky","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1777"},{"key":"ref46","first-page":"266","article-title":"StarGAN-VC: non-parallel many-to-many voice conversion using star generative adversarial networks","author":"kameoka","year":"2018","journal-title":"Proc IEEE\/ACL Workshop Spoken Lang Technol (SLT)"},{"journal-title":"Chainer VQ-VAE","year":"2018","key":"ref45"},{"key":"ref48","first-page":"4480","article-title":"Transfer learning from speaker verification to multispeaker text-to-speech synthesis","author":"jia","year":"2018","journal-title":"Advances in neural information processing systems"},{"journal-title":"SarGAN Voice Conversion","year":"2018","author":"songxiang","key":"ref47"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164317"},{"key":"ref41","first-page":"5180","article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","author":"wang","year":"2018","journal-title":"Proceedings of the 35th International Conference on Machine Learning PMLR 80"},{"key":"ref44","first-page":"6306","article-title":"Neural discrete representation learning","author":"van den oord","year":"2017","journal-title":"Advances in neural information processing systems"},{"journal-title":"GST-Tacotron","year":"2018","author":"yang","key":"ref43"}],"event":{"name":"2019 IEEE International Conference on Big Data (Big Data)","start":{"date-parts":[[2019,12,9]]},"location":"Los Angeles, CA, USA","end":{"date-parts":[[2019,12,12]]}},"container-title":["2019 IEEE International Conference on Big Data (Big Data)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8986695\/9005444\/09006601.pdf?arnumber=9006601","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,17]],"date-time":"2022-07-17T21:48:26Z","timestamp":1658094506000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9006601\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,12]]},"references-count":61,"URL":"https:\/\/doi.org\/10.1109\/bigdata47090.2019.9006601","relation":{},"subject":[],"published":{"date-parts":[[2019,12]]}}}