{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,17]],"date-time":"2026-02-17T14:17:54Z","timestamp":1771337874913,"version":"3.50.1"},"reference-count":44,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,19]]},"DOI":"10.1109\/slt48900.2021.9383545","type":"proceedings-article","created":{"date-parts":[[2021,3,25]],"date-time":"2021-03-25T20:46:54Z","timestamp":1616705214000},"page":"935-942","source":"Crossref","is-referenced-by-count":6,"title":["Acoustic Span Embeddings for Multilingual Query-by-Example Search"],"prefix":"10.1109","author":[{"given":"Yushi","family":"Hu","sequence":"first","affiliation":[]},{"given":"Shane","family":"Settle","sequence":"additional","affiliation":[]},{"given":"Karen","family":"Livescu","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.1992.225858"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298682"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682903"},{"key":"ref32","article-title":"Word embeddings for speech recognition","author":"bengio","year":"2014","journal-title":"Proc IEEE Int Conf Acoustics Speech and Signal Processing (ICASSP)"},{"key":"ref31","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2011-304","article-title":"Rapid evaluation of speech representations for spoken term discovery","author":"carlin","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2017.2759726"},{"key":"ref37","article-title":"Query by example search on speech at MediaEval 2014","author":"anguera","year":"2014","journal-title":"MediaEval"},{"key":"ref36","article-title":"MediaEval 2013 spoken web search task: system performance measures","author":"rodriguez-fuentes","year":"2013","journal-title":"n TR-2013-1 Department of Electricity and Electronics University of the Basque Country"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-317"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-3119"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-313"},{"key":"ref40","year":"0","journal-title":"IARPA Babel language pack IARPA-babel101b-v0 4c IARPA-babel102b-v0 5a IARPA-babel103b-v0 4b IARPA-babel104b-v0 4by IARPA-babel105b-v0 5 IARPA-babel106-v0 2g IARPA-babel204b-v1 1b IARPA-babel206b-v0 1e IARPA-babel304b-v1 0b IARPA-babel305b-v1 0c IARPA-babel306b-v2 0c"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-1276"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707765"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472619"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-82"},{"key":"ref15","article-title":"Multi-view recurrent neural acoustic word embeddings","author":"he","year":"2017","journal-title":"Proc Int Conf on Learning Representations (ICLR)"},{"key":"ref16","article-title":"Truly unsupervised acoustic word embeddings using weak top-down constraints in encoder-decoder models","author":"kamper","year":"2018","journal-title":"Proc IEEE Int Conf Acoustics Speech and Signal Processing (ICASSP)"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1010"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-2364"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7179089"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2020.2988788"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2009.5372931"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472835"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2009.5372889"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639327"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2016.7846310"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2012-566","article-title":"Indexing raw acoustic features for scalable zero resource search","author":"jansen","year":"2012","journal-title":"Proc INTERSPEECH"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/BF01074755"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-691"},{"key":"ref2","article-title":"Query by example search on speech at MediaEval 2015","author":"mir\u00f3","year":"2015","journal-title":"MediaEval"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1978.1163055"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2009.5373341"},{"key":"ref20","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2017-1592","article-title":"Query-by-example search with discriminative neural acoustic word embeddings","author":"settle","year":"2017","journal-title":"Proc INTERSPEECH"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2828"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054202"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref24","article-title":"ELiRF at MediaEval 2015: Query by example search on speech task (QUESST)","author":"lafarga","year":"2015","journal-title":"MediaEval"},{"key":"ref41","article-title":"The Kaldi speech recognition toolkit","author":"povey","year":"2011","journal-title":"Proc IEEE Workshop Automatic Speech Recognition and Understanding (ASRU)"},{"key":"ref23","doi-asserted-by":"crossref","DOI":"10.21437\/Interspeech.2011-355","article-title":"A piecewise aggregate approximation lower-bound estimate for posteriorgram-based dynamic time warping","author":"zhang","year":"2011","journal-title":"Proc INTERSPEECH"},{"key":"ref44","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2015","journal-title":"Proc Int Conf on Learning Representations (ICLR)"},{"key":"ref26","article-title":"The NNI query-by-example system for MediaEval 2015","author":"hou","year":"2015","journal-title":"Working Notes Proceedings of the MediaEval 2015 Workshop"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref25","article-title":"GTM-UVigo systems for the query-by-example search on speech task at MediaEval 2015","author":"lopez-otero","year":"2015","journal-title":"MediaEval"}],"event":{"name":"2021 IEEE Spoken Language Technology Workshop (SLT)","location":"Shenzhen, China","start":{"date-parts":[[2021,1,19]]},"end":{"date-parts":[[2021,1,22]]}},"container-title":["2021 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9383468\/9383452\/09383545.pdf?arnumber=9383545","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,22]],"date-time":"2022-12-22T13:16:50Z","timestamp":1671715010000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9383545\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,19]]},"references-count":44,"URL":"https:\/\/doi.org\/10.1109\/slt48900.2021.9383545","relation":{},"subject":[],"published":{"date-parts":[[2021,1,19]]}}}