{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,13]],"date-time":"2026-03-13T20:57:05Z","timestamp":1773435425488,"version":"3.50.1"},"reference-count":38,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,1,19]]},"DOI":"10.1109\/slt48900.2021.9383605","type":"proceedings-article","created":{"date-parts":[[2021,3,25]],"date-time":"2021-03-25T20:46:54Z","timestamp":1616705214000},"page":"215-222","source":"Crossref","is-referenced-by-count":50,"title":["Data Augmenting Contrastive Learning of Speech Representations in the Time Domain"],"prefix":"10.1109","author":[{"given":"Eugene","family":"Kharitonov","sequence":"first","affiliation":[]},{"given":"Morgane","family":"Riviere","sequence":"additional","affiliation":[]},{"given":"Gabriel","family":"Synnaeve","sequence":"additional","affiliation":[]},{"given":"Lior","family":"Wolf","sequence":"additional","affiliation":[]},{"given":"Pierre-Emmanuel","family":"Mazare","sequence":"additional","affiliation":[]},{"given":"Matthijs","family":"Douze","sequence":"additional","affiliation":[]},{"given":"Emmanuel","family":"Dupoux","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref38","article-title":"Population based augmentation: Efficient learning of augmentation policy schedules","author":"ho","year":"2019"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref32","article-title":"A simple framework for contrastive learning of visual representations","author":"chen","year":"2020"},{"key":"ref31","first-page":"766","article-title":"Discriminative unsupervised feature learning with convolutional neural networks","author":"dosovitskiy","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01264-9_9"},{"key":"ref37","article-title":"MUSAN: A Music, Speech, and Noise Corpus","author":"snyder","year":"2015"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9052974"},{"key":"ref35","year":"2019","journal-title":"Magicdata mandarin chinese read speech corpus"},{"key":"ref34","article-title":"Pytorch: An imperative style, high-performance deep learning library","author":"paszke","year":"2019","journal-title":"NeurIPS"},{"key":"ref10","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2020.findings-emnlp.106","article-title":"Learning robust and multilingual speech representations","author":"kawakami","year":"2020"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053541"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2016-988"},{"key":"ref13","article-title":"Feature optimized dpgmm clustering for un-supervised subword modeling: A contribution to zerospeech 2017","author":"heck","year":"2017","journal-title":"ASRU"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2938863"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2904"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6855085"},{"key":"ref17","doi-asserted-by":"crossref","DOI":"10.1109\/ICASSP40776.2020.9053569","article-title":"Multi-task self-supervised learning for robust speech recognition","author":"ravanelli","year":"2020"},{"key":"ref18","first-page":"165","article-title":"Unsu-pervised learning of acoustic sub-word units","author":"varadarajan","year":"2008","journal-title":"Proceedings of ACL-08 HLT Short Papers"},{"key":"ref19","first-page":"40","article-title":"A nonparametric bayesian approach to acoustic model discovery","author":"lee","year":"2012","journal-title":"Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 1 Long Papers)"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7953152"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2016.04.031"},{"key":"ref27","article-title":"Audio augmentation for speech recognition","author":"ko","year":"2015","journal-title":"Sixteenth Annual Conference of the International Speech Communication Association"},{"key":"ref3","author":"schatz","year":"2016","journal-title":"Ph D Dissertation"},{"key":"ref6","article-title":"Representation learning with contrastive predictive coding","author":"oord","year":"2018"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2680"},{"key":"ref5","article-title":"The zero resource speech challenge 2017","author":"dunbar","year":"0"},{"key":"ref8","article-title":"Libri-light: A bench-mark for asr with limited or no supervision","author":"kahn","year":"2020","journal-title":"ICASSP"},{"key":"ref7","article-title":"wav2vec: Unsupervised pre-training for speech recog-nition","author":"schneider","year":"2019"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2743"},{"key":"ref9","article-title":"Unsupervised pretraining transfers well across lan-guages","author":"rivi\u00e8re","year":"2020"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ISSPA.2012.6310546"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2016.04.033"},{"key":"ref22","article-title":"Contrastive predictive coding based feature for automatic speaker verification","author":"lai","year":"2019"},{"key":"ref21","first-page":"3033","article-title":"Putting an end to end-to-end: Gradient-isolated learning of representations","author":"l\u00f6we","year":"2019","journal-title":"NIPS"},{"key":"ref24","article-title":"Generative pre-training for speech with autoregressive predictive coding","author":"chung","year":"2019"},{"key":"ref23","article-title":"An un-supervised autoregressive model for speech representation learning","author":"chung","year":"2019"},{"key":"ref26","article-title":"Data augmentation for low resource languages","author":"ragni","year":"2014","journal-title":"InterSpeech"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU.2013.6707748"}],"event":{"name":"2021 IEEE Spoken Language Technology Workshop (SLT)","location":"Shenzhen, China","start":{"date-parts":[[2021,1,19]]},"end":{"date-parts":[[2021,1,22]]}},"container-title":["2021 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9383468\/9383452\/09383605.pdf?arnumber=9383605","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,24]],"date-time":"2023-10-24T00:22:00Z","timestamp":1698106920000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9383605\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,1,19]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/slt48900.2021.9383605","relation":{},"subject":[],"published":{"date-parts":[[2021,1,19]]}}}