{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T11:30:06Z","timestamp":1778585406602,"version":"3.51.4"},"reference-count":22,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2017,12]]},"DOI":"10.1109\/asru.2017.8269012","type":"proceedings-article","created":{"date-parts":[[2018,1,25]],"date-time":"2018-01-25T21:43:53Z","timestamp":1516916633000},"page":"747-753","source":"Crossref","is-referenced-by-count":3,"title":["Composite embedding systems for ZeroSpeech2017 Track1"],"prefix":"10.1109","author":[{"given":"Hayato","family":"Shibata","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Taku","family":"Kato","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Takahiro","family":"Shinozaki","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shinji","family":"Watanabet","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2010.5495637"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1980.1163420"},{"key":"ref12","first-page":"3174","article-title":"Discovering discrete subword units with binarized autoencoders and hidden-Markov-model encoders","author":"badino","year":"2015","journal-title":"InterSpeech"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7179087"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2016.7846292"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178918"},{"key":"ref16","first-page":"3239","article-title":"Multi-softmax deep neural network for semi-supervised training","author":"su","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-741"},{"key":"ref18","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1296","article-title":"Ad-vances in joint CTC-attention based end-to-end speech recognition with a deep CNN encoder and RNN-LM","author":"hori","year":"2017","journal-title":"InterSpeech"},{"key":"ref19","author":"simonyan","year":"2014","journal-title":"Very Deep Convolutional Networks for Large-scale Image Recognition"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.909282"},{"key":"ref3","first-page":"16","article-title":"Language independent and unsupervised acoustic models for speech recognition and keyword spotting","author":"knill","year":"2014","journal-title":"Proc INTERSPEECH"},{"key":"ref6","first-page":"3189","article-title":"Paral-lel inference of dirichlet process Gaussian mixture models for unsupervised acoustic modeling: A feasibility study","author":"chen","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref5","article-title":"Fully unsupervised small-vocabulary speech recognition using a segmental bayesian model","author":"kamper","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref8","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1081","article-title":"Semi-supervised learning of a pronunciation dictionary from disjoint phonemic transcripts and text","author":"shinozaki","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1587\/transinf.E95.D.614"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/S0167-6393(00)00094-7"},{"key":"ref1","first-page":"3169","article-title":"The zero resource speech challenge 2015","author":"versteegh","year":"2015","journal-title":"Proc INTERSPEECH"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8268953"},{"key":"ref20","first-page":"244","article-title":"A Japanese national project on spontaneous speech corpus and processing technology","author":"furui","year":"2000","journal-title":"Proc ASR'00"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854049"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2015.7404852"}],"event":{"name":"2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Okinawa","start":{"date-parts":[[2017,12,16]]},"end":{"date-parts":[[2017,12,20]]}},"container-title":["2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8260578\/8268903\/08269012.pdf?arnumber=8269012","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,1,25]],"date-time":"2022-01-25T21:35:00Z","timestamp":1643146500000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/8269012\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2017,12]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/asru.2017.8269012","relation":{},"subject":[],"published":{"date-parts":[[2017,12]]}}}