{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,25]],"date-time":"2025-07-25T10:47:34Z","timestamp":1753440454139},"reference-count":29,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,6,6]],"date-time":"2021-06-06T00:00:00Z","timestamp":1622937600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,6,6]]},"DOI":"10.1109\/icassp39728.2021.9413645","type":"proceedings-article","created":{"date-parts":[[2021,5,13]],"date-time":"2021-05-13T15:53:45Z","timestamp":1620921225000},"page":"3880-3884","source":"Crossref","is-referenced-by-count":3,"title":["SEQ-CPC : Sequential Contrastive Predictive Coding for Automatic Speech Recognition"],"prefix":"10.1109","author":[{"given":"Yulong","family":"Chen","sequence":"first","affiliation":[]},{"given":"Jianping","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Weiqi","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Ming","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Haimei","family":"Kang","sequence":"additional","affiliation":[]},{"given":"Lu","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Tao","family":"Wei","sequence":"additional","affiliation":[]},{"given":"Jun","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Shaojun","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Jing","family":"Xiao","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1873"},{"key":"ref11","article-title":"vqwav2vec: Self-supervised learning of discrete speech representations","author":"baevski","year":"2020","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref12","first-page":"297","article-title":"Noise-contrastive estimation: A new estimation principle for unnormalized statistical models","author":"gutmann","year":"2010","journal-title":"Thirteenth International Conference on Artificial Intelligence and Statistics"},{"key":"ref13","article-title":"Learning deep representations by mutual information estimation and maximization","author":"hjelm","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref14","article-title":"Learning discrete structured representations by adversarially maximizing mutual information","author":"stratos","year":"2020","journal-title":"International Conference on Machine Learning"},{"key":"ref15","article-title":"Representation learning with contrastive predictive coding","author":"den oord","year":"2018","journal-title":"arXiv preprint arXiv 1807 03748"},{"key":"ref16","article-title":"Data-efficient image recognition with contrastive predictive coding","author":"h\u00e9naff","year":"2020","journal-title":"Thirty-seventh International Conference on Machine Learning"},{"key":"ref17","article-title":"Improving speech emotion recognition via transformer-based predictive coding through transfer learning","author":"lian","year":"2018","journal-title":"arXiv preprint arXiv 1811 07691"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2380"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2938863"},{"key":"ref28","article-title":"Large vocabulary continuous speech recognition using htk","author":"valtchev","year":"1994","journal-title":"Proc of ICASSP"},{"key":"ref4","article-title":"Efficient estimation of word representations in vector space","author":"mikolov","year":"2013","journal-title":"International Conference on Learning Representations"},{"key":"ref27","article-title":"CSR-I (WSJ0) complete LDC93S6A. web download","author":"paul","year":"1993","journal-title":"Linguistic Data Consortium"},{"key":"ref3","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref6","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"NAACL"},{"key":"ref29","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"International Conference on Learning Representations (ICLR)"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46487-9_40"},{"key":"ref7","article-title":"Albert: A lite BERT for self-supervised learning of language representations","author":"lan","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref2","first-page":"6015","article-title":"Efficientnet: Rethinking model scaling for convolutional neural networks","author":"tan","year":"2019","journal-title":"36th International Conference on Machine Learning"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.167"},{"key":"ref1","first-page":"173","article-title":"Deep speech 2: End-to-end speech recognition in english and mandarin","author":"amodei","year":"2016","journal-title":"International Conference on Machine Learning"},{"key":"ref20","article-title":"Contrastive predictive coding based feature for automatic speaker verification","author":"lai","year":"2018","journal-title":"Bachelor-Thesis"},{"key":"ref22","article-title":"Formal limitations on the measurement of mutual information","author":"mcallester","year":"2020","journal-title":"International Conference on Artificial Intelligence and Statistics (AISTATS)"},{"key":"ref21","article-title":"On variational bounds of mutual information","author":"poole","year":"2019","journal-title":"International Conference on Machine Learning"},{"key":"ref24","article-title":"Mutual information gradient estimation for representation learning","author":"wen","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref23","article-title":"Understanding the limitations of variational mutual information estimators","author":"song","year":"2020","journal-title":"International Conference on Learning Representations"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"journal-title":"Elements of Information Theory","year":"2006","author":"cover","key":"ref25"}],"event":{"name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2021,6,6]]},"location":"Toronto, ON, Canada","end":{"date-parts":[[2021,6,11]]}},"container-title":["ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9413349\/9413350\/09413645.pdf?arnumber=9413645","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T11:40:47Z","timestamp":1652182847000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9413645\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,6,6]]},"references-count":29,"URL":"https:\/\/doi.org\/10.1109\/icassp39728.2021.9413645","relation":{},"subject":[],"published":{"date-parts":[[2021,6,6]]}}}