{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,29]],"date-time":"2024-10-29T19:27:09Z","timestamp":1730230029083,"version":"3.28.0"},"reference-count":38,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2018,4]]},"DOI":"10.1109\/icassp.2018.8461761","type":"proceedings-article","created":{"date-parts":[[2018,9,21]],"date-time":"2018-09-21T22:24:48Z","timestamp":1537568688000},"page":"4979-4983","source":"Crossref","is-referenced-by-count":15,"title":["Linguistic Unit Discovery from Multi-Modal Inputs in Unwritten Languages: Summary of the \u201cSpeaking Rosetta\u201d JSALT 2017 Workshop"],"prefix":"10.1109","author":[{"given":"Odette","family":"Scharenborg","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Laurent","family":"Besacier","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alan","family":"Black","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mark","family":"Hasegawa-Johnson","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Florian","family":"Metze","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Graham","family":"Neubig","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sebastian","family":"Stuker","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Pierre","family":"Godard","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Markus","family":"Muller","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lucas","family":"Ondel","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shruti","family":"Palaskar","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Philip","family":"Arthur","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Francesco","family":"Ciannella","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingxing","family":"Du","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Elin","family":"Larsen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Danny","family":"Merkx","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rachid","family":"Riad","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Liming","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Emmanuel","family":"Dupoux","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref38","first-page":"451","article-title":"Is tectorial membrane filtering required to explain two tone suppression and the upward spread of masking?'","author":"allen","year":"1999","journal-title":"Mechanics of Hearing"},{"journal-title":"Listen attend and spell","year":"0","author":"chan","key":"ref33"},{"key":"ref32","article-title":"Image2speech: Automatically generating audio descriptions of images","author":"hasegawa-johnson","year":"2017","journal-title":"Proceedings of ICNLSSP"},{"key":"ref31","first-page":"31","article-title":"Mapping from articulatory movements to vocal tract spectrum with gaussian mixture model for articulatory speech synthesis","author":"toda","year":"2004","journal-title":"Proceedings of SSW 5 Pittsburgh PA"},{"key":"ref30","article-title":"Building an asr system for a low-resource language through the adaptation of a high-resource language asr system: Preliminary results","author":"scharenborg","year":"2017","journal-title":"Proceedings of ICNLSSP"},{"key":"ref37","first-page":"401","article-title":"Efficient spoken term discovery using randomized algorithms","author":"jansen","year":"0","journal-title":"Proceedings of ASRU 2011"},{"key":"ref36","article-title":"Unwritten languages demand attention too! word discovery with encoder-decoder models","author":"zanon boito","year":"2017","journal-title":"Proceedings of ASRU"},{"journal-title":"Sequence-to-sequence models can directly transcribe foreign speech","year":"0","author":"weiss","key":"ref35"},{"key":"ref34","article-title":"Listen and translate: A proof of concept for end-to-end speech-to-text translation","author":"b\u00e9rard","year":"2016","journal-title":"Proc NIPS workshop on End-to-end Learning for Speech and Audio Processing"},{"key":"ref10","first-page":"2132","article-title":"A comparative study of bnf and dnn multilingual training on cross-lingual low-resource speech recognition","author":"xu","year":"2015","journal-title":"Proceedings of Interspeech"},{"key":"ref11","first-page":"237","article-title":"Deep multimodal semantic embeddings for speech and images","author":"harwarth","year":"2015","journal-title":"proceedings ASRU"},{"key":"ref12","article-title":"Representations of language in a model of visually grounded speech signal","author":"chrupala","year":"2017","journal-title":"Proceedings of ASRU"},{"key":"ref13","first-page":"1858","article-title":"Unsupervised learning of spoken language with visual context","author":"harwath","year":"2016","journal-title":"Advances in Neural Information Processing System"},{"key":"ref14","article-title":"Parallel speech collection for under-resourced language studies using the LIG-Aikuma mobile device app","author":"blachon","year":"2016","journal-title":"Proceedings of the SLT"},{"journal-title":"A very low resource language speech corpus for computational language documentation experiments","year":"2017","author":"godard","key":"ref15"},{"key":"ref16","first-page":"237","article-title":"Deep multimodal semantic embeddings for speech and images","author":"harwath","year":"2015","journal-title":"Proceedings of ASRU"},{"journal-title":"Speech-coco","year":"0","author":"besacier","key":"ref17"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/GLU.2017-9"},{"key":"ref19","article-title":"Microsoft coco: Common objects in context","author":"lin","year":"2014","journal-title":"European Conference on Computer Vision (ECCV)"},{"key":"ref28","article-title":"Composing graphical models with neural networks for structured representations and fast inference","author":"johnson","year":"0","journal-title":"Neural Inform Process Syst 2016"},{"key":"ref4","first-page":"165","article-title":"Unsupervised learning of acoustic sub-word units","author":"varadarajan","year":"2008","journal-title":"Proc of ACL Human Language Technologies Short Papers"},{"key":"ref27","article-title":"Collapsed variational Dirichlet process mixture models","volume":"20","author":"kurihara","year":"2007","journal-title":"Proceedings of the International Joint Conference on Artificial Intelligence"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"80","DOI":"10.1016\/j.procs.2016.04.033","article-title":"Variational inference for acoustic unit discovery","author":"ondel","year":"2016","journal-title":"Procedia Computer Science"},{"key":"ref6","first-page":"4366","article-title":"Towards multi-speaker unsupervised speech pattern discovery","author":"zhang","year":"2010","journal-title":"Proceeding of ICASSP"},{"key":"ref29","article-title":"Improving phoneme set discovery for documenting unwritten languages","author":"m\u00fcller","year":"2017","journal-title":"Elektronische Sprachsignalverarbeitung (ESSV) 2017"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.909282"},{"key":"ref8","article-title":"Cross-language bootstrapping for unsupervised acoustic model training: rapid development of a polish speech recognition system","author":"l\u00f6\u00f6f","year":"2009","journal-title":"Proceedings of Interspeech"},{"key":"ref7","article-title":"Experiments on cross-language acoustic modelling","author":"tanja schultz","year":"2001","journal-title":"Proceedings of Interspeech"},{"key":"ref2","article-title":"A summary of the 2012 JH CLSP Workshop on zero resource speech technologies and models of early language acquisition","author":"jansen","year":"2013","journal-title":"Proceedings of ICASSP"},{"key":"ref9","article-title":"The language-independent bottleneck features","author":"vesely","year":"2012","journal-title":"Proceedings of SLT"},{"key":"ref1","article-title":"Breaking the unwritten kanguage barrier: The Bulb project","author":"adda","year":"2016","journal-title":"Proceedings of the SLT"},{"key":"ref20","first-page":"340","article-title":"Experiences from the spoken dutch corpus project","author":"oostdijk","year":"2002","journal-title":"Proc LREC Las Palmas de Gran Canaria"},{"key":"ref22","article-title":"Evaluating speech features with the Minimal-Pair ABX task (I): Analysis of the classical MFC\/PLP pipeline","author":"schatz","year":"2013","journal-title":"Proceedings of Interspeech"},{"key":"ref21","first-page":"1762","article-title":"CLUSTERGEN: A statistical parametric speech synthesizer using trajectory modeling","author":"black","year":"2006","journal-title":"Proceedings of ICSLP"},{"key":"ref24","article-title":"Bridging the gap between speech technology and natural language processing: an evaluation toolbox for term discovery systems","author":"ludusan","year":"2014","journal-title":"Proceedings of LREC"},{"key":"ref23","article-title":"Evaluating speech features with the Minimal-Pair ABX task (II): Resistance to noise","author":"schatz","year":"2014","journal-title":"Proceedings of Interspeech"},{"journal-title":"Xnmt","year":"0","author":"neubig","key":"ref26"},{"key":"ref25","article-title":"The zero resource speech challenge 2017","author":"dunbar","year":"2017","journal-title":"Proceedings of ASRU"}],"event":{"name":"ICASSP 2018 - 2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2018,4,15]]},"location":"Calgary, AB","end":{"date-parts":[[2018,4,20]]}},"container-title":["2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8450881\/8461260\/08461761.pdf?arnumber=8461761","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,8,24]],"date-time":"2020-08-24T00:07:09Z","timestamp":1598227629000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8461761\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,4]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/icassp.2018.8461761","relation":{},"subject":[],"published":{"date-parts":[[2018,4]]}}}