{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T02:27:21Z","timestamp":1771468041216,"version":"3.50.1"},"reference-count":46,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,12]]},"DOI":"10.1109\/slt.2018.8639553","type":"proceedings-article","created":{"date-parts":[[2019,2,14]],"date-time":"2019-02-14T18:36:34Z","timestamp":1550169394000},"page":"941-948","source":"Crossref","is-referenced-by-count":19,"title":["Phonetic-and-Semantic Embedding of Spoken words with Applications in Spoken Content Retrieval"],"prefix":"10.1109","author":[{"given":"Yi-Chen","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sung-Feng","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chia-Hao","family":"Shen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hung-yi","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lin-shan","family":"Lee","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2012.2208628"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2438543"},{"key":"ref33","article-title":"beta-vae: Learning basic visual concepts with a constrained variational framework","author":"higgins","year":"2016","journal-title":"CoRR"},{"key":"ref32","first-page":"2172","article-title":"Infogan: Interpretable representation learning by information maximizing generative adversarial nets","author":"chen","year":"2016","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref31","article-title":"Unsupervised learning of disentangled and interpretable representations from sequential data","author":"hsu","year":"2017","journal-title":"CoRR"},{"key":"ref30","article-title":"Deep convolutional inverse graphics network","author":"kulkarni","year":"2015","journal-title":"CoRR"},{"key":"ref37","first-page":"7","article-title":"Towards learning semantic audio representations from unlabeled data","volume":"2","author":"jansen","year":"2017","journal-title":"Signal"},{"key":"ref36","article-title":"Unsupervised learning of semantic audio representations","author":"jansen","year":"2017"},{"key":"ref35","article-title":"Unsupervised adaptation with domain separation networks for robust speech recognition","author":"meng","year":"2017","journal-title":"CoRR"},{"key":"ref34","article-title":"Domain separation networks","author":"bousmalis","year":"2016","journal-title":"CoRR"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1166"},{"key":"ref40","article-title":"Leveraging relevance cues for improved spoken document retrieval","author":"chen","year":"2011","journal-title":"Twelfth Annual Conference of the International Speech Communication Association"},{"key":"ref11","article-title":"Neural machine translation by jointly learning to align and translate","author":"bahdanau","year":"2014","journal-title":"CoRR"},{"key":"ref12","first-page":"3104","article-title":"Sequence to sequence learning with neural networks","author":"sutskever","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.515"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1014"},{"key":"ref15","article-title":"Multiview recurrent neural acoustic word embeddings","author":"he","year":"2016"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2016.7846310"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-82"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472619"},{"key":"ref19","article-title":"Word embeddings for speech recognition","author":"bengio","year":"2014","journal-title":"Fifteenth Annual Conference of the International Speech Communication Association"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2017.8269008"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-2632"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2752462"},{"key":"ref3","article-title":"Enriching word vectors with subword information","author":"bojanowski","year":"2016"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N16-1030"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2017.04.008"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-2106"},{"key":"ref8","first-page":"2741","article-title":"Character-aware neural language models","author":"kim","year":"2016","journal-title":"AAAI"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-2067"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1041"},{"key":"ref1","first-page":"3111","article-title":"Distributed representations of words and phrases and their compositionality","author":"mikolov","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707765"},{"key":"ref45","first-page":"5769","article-title":"Improved training of wasserstein gans","author":"gulrajani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1592"},{"key":"ref21","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1118","article-title":"Multitask learning with low-level auxiliary tasks for encoder-decoder based speech recognition","author":"toshniwal","year":"2017"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639326"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2341"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639283"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref44","article-title":"An iterative closest point method for unsupervised word translation","author":"hoshen","year":"2018"},{"key":"ref26","article-title":"Parsing speech: A neural approach to integrating lexical and acoustic-prosodic information","author":"tran","year":"2017"},{"key":"ref43","article-title":"Word translation without parallel data","author":"conneau","year":"2017"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462002"}],"event":{"name":"2018 IEEE Spoken Language Technology Workshop (SLT)","location":"Athens, Greece","start":{"date-parts":[[2018,12,18]]},"end":{"date-parts":[[2018,12,21]]}},"container-title":["2018 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8632666\/8639030\/08639553.pdf?arnumber=8639553","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,3,11]],"date-time":"2019-03-11T19:45:29Z","timestamp":1552333529000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8639553\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,12]]},"references-count":46,"URL":"https:\/\/doi.org\/10.1109\/slt.2018.8639553","relation":{},"subject":[],"published":{"date-parts":[[2018,12]]}}}