{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,2]],"date-time":"2026-05-02T15:03:15Z","timestamp":1777734195075,"version":"3.51.4"},"reference-count":39,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,8,21]],"date-time":"2022-08-21T00:00:00Z","timestamp":1661040000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,8,21]],"date-time":"2022-08-21T00:00:00Z","timestamp":1661040000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,8,21]]},"DOI":"10.1109\/icpr56361.2022.9956631","type":"proceedings-article","created":{"date-parts":[[2022,11,29]],"date-time":"2022-11-29T19:34:13Z","timestamp":1669750453000},"page":"1514-1521","source":"Crossref","is-referenced-by-count":3,"title":["Scene Text Recognition with Self-supervised Contrastive Predictive Coding"],"prefix":"10.1109","author":[{"given":"Xinzhe","family":"Jiang","sequence":"first","affiliation":[{"name":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianshu","family":"Zhang","sequence":"additional","affiliation":[{"name":"iFLYTEK Research"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jun","family":"Du","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenrong","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China,National Engineering Research Center of Speech and Language Information Processing,Hefei,P. R. China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiajia","family":"Wu","sequence":"additional","affiliation":[{"name":"iFLYTEK Research"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1117\/12.2520589"},{"key":"ref38","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"0"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2015.7333942"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2013.221"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.5244\/C.26.127"},{"key":"ref30","first-page":"1457","article-title":"End-to-end scene text recognition","author":"wang","year":"0","journal-title":"IEEE International Conference on Computer Vision ICCV 2011 Barcelona Spain November 6-13 2011"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/34.24792"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2848939"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2014.07.008"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.76"},{"key":"ref10","article-title":"Learning deep representations by mutual information estimation and maximization","author":"hjelm","year":"0"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref12","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","author":"chen","year":"2020"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00313"},{"key":"ref14","article-title":"Unsupervised representation learning by predicting image rotations","author":"gidaris","year":"0"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01505"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-1873"},{"key":"ref17","article-title":"Representation learning with contrastive predictive coding","volume":"abs 1807 3748","author":"van den oord","year":"2018","journal-title":"CoRR"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v30i1.10465"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00851"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00481"},{"key":"ref27","article-title":"Judging a book by its cover","volume":"abs 1610 9204","author":"iwana","year":"2016","journal-title":"CoRR"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107692"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2013.2277662"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00439"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2018.8486456"},{"key":"ref8","article-title":"Synthetic data and artificial neural networks for natural scene text recognition","volume":"abs 1406 2227","author":"jaderberg","year":"2014","journal-title":"CoRR"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-020-01369-0"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240571"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.254"},{"key":"ref1","doi-asserted-by":"crossref","first-page":"2298","DOI":"10.1109\/TPAMI.2016.2646371","article-title":"An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition","volume":"39","author":"shi","year":"2017","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN52387.2021.9533713"},{"key":"ref22","article-title":"Predictive coding: a theoretical and experimental review","volume":"abs 2107 12979","author":"millidge","year":"2021","journal-title":"CoRR"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3293353.3293386"},{"key":"ref24","first-page":"4182","article-title":"Data-efficient image recognition with contrastive predictive coding","author":"h\u00e9naff","year":"0"},{"key":"ref23","first-page":"297","article-title":"Noise-contrastive estimation: A new estimation principle for unnormalized statistical models","author":"gutmann","year":"0"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.23915\/distill.00021"},{"key":"ref25","first-page":"125","article-title":"Wavenet: A generative model for raw audio","author":"van den oord","year":"2016","journal-title":"The 9th ISCA Speech Synthesis Workshop Sunnyvale CA USA 13-15 September 2016"}],"event":{"name":"2022 26th International Conference on Pattern Recognition (ICPR)","location":"Montreal, QC, Canada","start":{"date-parts":[[2022,8,21]]},"end":{"date-parts":[[2022,8,25]]}},"container-title":["2022 26th International Conference on Pattern Recognition (ICPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9956007\/9955631\/09956631.pdf?arnumber=9956631","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,30]],"date-time":"2023-01-30T20:05:20Z","timestamp":1675109120000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9956631\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,21]]},"references-count":39,"URL":"https:\/\/doi.org\/10.1109\/icpr56361.2022.9956631","relation":{},"subject":[],"published":{"date-parts":[[2022,8,21]]}}}