{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T16:51:12Z","timestamp":1779382272463,"version":"3.53.1"},"reference-count":550,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T00:00:00Z","timestamp":1732406400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T00:00:00Z","timestamp":1732406400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2025,12]]},"DOI":"10.1007\/s13042-024-02443-6","type":"journal-article","created":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T14:21:18Z","timestamp":1732458078000},"page":"9851-9915","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":264,"title":["A comprehensive survey on pretrained foundation models: a history from BERT to ChatGPT"],"prefix":"10.1007","volume":"16","author":[{"given":"Ce","family":"Zhou","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qian","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chen","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jun","family":"Yu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yixin","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Guangjing","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kai","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Cheng","family":"Ji","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Qiben","family":"Yan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lifang","family":"He","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hao","family":"Peng","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jianxin","family":"Li","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jia","family":"Wu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ziwei","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Pengtao","family":"Xie","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Caiming","family":"Xiong","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jian","family":"Pei","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Philip S.","family":"Yu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Lichao","family":"Sun","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2024,11,24]]},"reference":[{"key":"2443_CR1","unstructured":"Bommasani R, Hudson DA, Adeli E, Altman R, Arora S, Arx S, Bernstein MS, Bohg J, Bosselut A, Brunskill E et\u00a0al (2021) On the opportunities and risks of foundation models. arXiv preprint arXiv:2108.07258"},{"key":"2443_CR2","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1002\/aris.1440370103","volume":"37","author":"GG Chowdhury","year":"2003","unstructured":"Chowdhury GG (2003) Natural language processing. Annu Rev Inf Sci Technol 37:51\u201389","journal-title":"Annu Rev Inf Sci Technol"},{"key":"2443_CR3","volume-title":"Computer vision: a modern approach","author":"D Forsyth","year":"2011","unstructured":"Forsyth D, Ponce J (2011) Computer vision: a modern approach. University of Illinois at Urbana-Champaign, USA"},{"key":"2443_CR4","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-349-03521-2","volume-title":"Graph theory with applications","author":"JA Bondy","year":"1976","unstructured":"Bondy JA, Murty USR et al (1976) Graph theory with applications. Macmillan, London"},{"key":"2443_CR5","doi-asserted-by":"crossref","unstructured":"Qiu X, Sun T, Xu Y, Shao Y, Dai N, Huang X (2020) Pre-trained models for natural language processing: a survey. Sci China Technol Sci 63(10):1872\u20131897","DOI":"10.1007\/s11431-020-1647-3"},{"key":"2443_CR6","doi-asserted-by":"crossref","unstructured":"Li J, Tang T, Zhao WX, Wen J-R (2021) Pretrained language models for text generation: a survey","DOI":"10.24963\/ijcai.2021\/612"},{"key":"2443_CR7","unstructured":"Han K, Wang Y, Chen H, Chen X, Guo J, Liu Z, Tang Y, Xiao A, Xu C, Xu Y et\u00a0al (2020) A survey on visual transformer. arXiv"},{"key":"2443_CR8","doi-asserted-by":"crossref","unstructured":"Sanchez S, Romero H, Morales A (2020) A review: comparison of performance metrics of pretrained models for object detection using the tensorflow framework. In: IOP conference series: materials science and engineering","DOI":"10.1088\/1757-899X\/844\/1\/012024"},{"key":"2443_CR9","unstructured":"Hu W, Liu B, Gomes J, Zitnik M, Liang P, Pande V, Leskovec J (2019) Pre-training graph neural networks. arXiv"},{"key":"2443_CR10","doi-asserted-by":"crossref","unstructured":"Zhuang F, Qi Z, Duan K, Xi D, Zhu Y, Zhu H, Xiong H, He Q (2020) A comprehensive survey on transfer learning. In: Proceedings of the IEEE","DOI":"10.1109\/JPROC.2020.3004555"},{"key":"2443_CR11","unstructured":"Bengio Y, Ducharme R, Vincent P, Janvin C (2000) A neural probabilistic language model. Adv Neural Inf Proc Syst"},{"key":"2443_CR12","unstructured":"Mikolov T, Chen K, Corrado G, Dean J (2013) Efficient estimation of word representations in vector space. In: 1st international conference on learning representations, ICLR 2013. Workshop track proceedings, Scottsdale, Arizona, USA, May 2\u20134, 2013"},{"key":"2443_CR13","unstructured":"Devlin J, Chang M, Lee K, Toutanova K (2019) BERT: pre-training of deep bidirectional transformers for language understanding. In: NAACL-HLT"},{"key":"2443_CR14","unstructured":"Yang Z, Dai Z, Yang Y, Carbonell JG, Salakhutdinov R, Le QV (2019) Xlnet: generalized autoregressive pretraining for language understanding. In: NeurIPS"},{"key":"2443_CR15","unstructured":"Chen M, Tworek J, Jun H, Yuan Q, Pinto HPdO, Kaplan J, Edwards H, Burda Y, Joseph N, Brockman G et\u00a0al (2021) Evaluating large language models trained on code. arXiv preprint arXiv:2107.03374"},{"key":"2443_CR16","unstructured":"Neelakantan A, Xu T, Puri R, Radford A, Han JM, Tworek J, Yuan Q, Tezak N, Kim JW, Hallacy C et\u00a0al (2022) Text and code embeddings by contrastive pre-training. arXiv preprint arXiv:2201.10005"},{"key":"2443_CR17","unstructured":"Christiano PF, Leike J, Brown T, Martic M, Legg S, Amodei D (2017) Deep reinforcement learning from human preferences. Adv Neural Inf Process Syst"},{"key":"2443_CR18","first-page":"3008","volume":"33","author":"N Stiennon","year":"2020","unstructured":"Stiennon N, Ouyang L, Wu J, Ziegler D, Lowe R, Voss C, Radford A, Amodei D, Christiano PF (2020) Learning to summarize with human feedback. Adv Neural Inf Process Syst 33:3008\u20133021","journal-title":"Adv Neural Inf Process Syst"},{"key":"2443_CR19","unstructured":"Ouyang L, Wu J, Jiang X, Almeida D, Wainwright CL, Mishkin P, Zhang C, Agarwal S, Slama K, Ray A., et\u00a0al. (2022) Training language models to follow instructions with human feedback. arXiv preprint arXiv:2203.02155"},{"key":"2443_CR20","unstructured":"Brown TB, Mann B, Ryder N, Subbiah M, Kaplan J, Dhariwal P, Neelakantan A, Shyam P, Sastry G, Askell A et\u00a0al (2020) Language models are few-shot learners. arXiv"},{"key":"2443_CR21","doi-asserted-by":"crossref","unstructured":"Lester B, Al-Rfou R, Constant N (2021) The power of scale for parameter-efficient prompt tuning. In: Proceedings of the 2021 conference on empirical methods in natural language processing, pp 3045\u20133059","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"2443_CR22","doi-asserted-by":"crossref","unstructured":"Schick T, Sch\u00fctze H (2021) Exploiting cloze-questions for few-shot text classification and natural language inference. In: Proceedings of the 16th conference of the European chapter of the association for computational linguistics: main volume, pp 255\u2013269","DOI":"10.18653\/v1\/2021.eacl-main.20"},{"key":"2443_CR23","unstructured":"Zhang Z, Zhang A, Li M, Smola A (2023) Automatic chain of thought prompting in large language models. In: International conference on learning representations"},{"key":"2443_CR24","unstructured":"Wei J, Wang X, Schuurmans D, Bosma M, Xia F, Chi EH, Le QV, Zhou D et al (2022) Chain-of-thought prompting elicits reasoning in large language models. Adv Neu Inform Process Syst 35:24824\u201324837"},{"key":"2443_CR25","unstructured":"OpenAI (2023) GPT-4 technical report"},{"key":"2443_CR26","unstructured":"Wang P, Yang A, Men R, Lin J, Bai S, Li Z, Ma J, Zhou C, Zhou J, Yang H (2022) Unifying architectures, tasks, and modalities through a simple sequence-to-sequence learning framework. arXiv preprint arXiv:2202.03052"},{"key":"2443_CR27","unstructured":"Lu J, Clark C, Zellers R, Mottaghi R, Kembhavi A (2022) Unified-io: A unified model for vision, language, and multi-modal tasks. arXiv preprint arXiv:2206.08916"},{"key":"2443_CR28","doi-asserted-by":"crossref","unstructured":"Singh A, Hu R, Goswami V, Couairon G, Galuba W, Rohrbach M, Kiela D (2022) Flava: a foundational language and vision alignment model. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 15638\u201315650","DOI":"10.1109\/CVPR52688.2022.01519"},{"key":"2443_CR29","doi-asserted-by":"crossref","unstructured":"Wang W, Bao H, Dong L, Bjorck J, Peng Z, Liu Q, Aggarwal K, Mohammed OK, Singhal S, Som S et\u00a0al (2022) Image as a foreign language: Beit pretraining for all vision and vision-language tasks. arXiv preprint arXiv:2208.10442","DOI":"10.1109\/CVPR52729.2023.01838"},{"key":"2443_CR30","unstructured":"Clark K, Luong M, Le QV, Manning CD (2020) ELECTRA: pre-training text encoders as discriminators rather than generators. In: ICLR"},{"key":"2443_CR31","doi-asserted-by":"crossref","unstructured":"Wallace E, Rodriguez P, Feng S, Yamada I, Boyd-Graber J (2019) Trick me if you can: human-in-the-loop generation of adversarial examples for question answering. Trans Assoc Comput Linguist 7:387\u2013401","DOI":"10.1162\/tacl_a_00279"},{"key":"2443_CR32","doi-asserted-by":"crossref","unstructured":"Nie Y, Williams A, Dinan E, Bansal M, Weston J, Kiela D (2020) Adversarial NLI: a new benchmark for natural language understanding. In: ACL","DOI":"10.18653\/v1\/2020.acl-main.441"},{"key":"2443_CR33","doi-asserted-by":"crossref","unstructured":"Niven T, Kao H (2019) Probing neural network comprehension of natural language arguments. In: ACL","DOI":"10.18653\/v1\/P19-1459"},{"key":"2443_CR34","doi-asserted-by":"crossref","unstructured":"Wang G, Ivanov N, Chen B, Wang Q, Yan Q (2023) Graph learning for interactive threat detection in heterogeneous smart home rule data. In: 2023 ACM SIGMOD international conference on management of data. ACM","DOI":"10.1145\/3588956"},{"key":"2443_CR35","doi-asserted-by":"publisher","unstructured":"Gordon MA, Duh K, Andrews N (2020) Compressing BERT: studying the effects of weight pruning on transfer learning. In: RepL4NLP@ACL . https:\/\/doi.org\/10.18653\/v1\/2020.repl4nlp-1.18","DOI":"10.18653\/v1\/2020.repl4nlp-1.18"},{"key":"2443_CR36","unstructured":"Lan Z, Chen M, Goodman S, Gimpel K, Sharma P, Soricut R (2020) ALBERT: a lite BERT for self-supervised learning of language representations. In: ICLR"},{"key":"2443_CR37","doi-asserted-by":"crossref","unstructured":"Han X, Zhang Z, Ding N, Gu Y, Liu X, Huo Y, Qiu J, Zhang L, Han W, Huang M et al (2021) Pre-trained models: past, present and future. AI Open 2:225\u2013250","DOI":"10.1016\/j.aiopen.2021.08.002"},{"key":"2443_CR38","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. arXiv"},{"issue":"3","key":"2443_CR39","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1007\/s41095-022-0271-y","volume":"8","author":"M-H Guo","year":"2022","unstructured":"Guo M-H, Xu T-X, Liu J-J, Liu Z-N, Jiang P-T, Mu T-J, Zhang S-H, Martin RR, Cheng M-M, Hu S-M (2022) Attention mechanisms in computer vision: a survey. Comput Visual Media 8(3):331\u2013368","journal-title":"Comput Visual Media"},{"key":"2443_CR40","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S et\u00a0al (2020) An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929"},{"key":"2443_CR41","unstructured":"Yun S, Jeong M, Kim R, Kang J, Kim HJ (2019) Graph transformer networks. Adv Neural Inf Process Syst 32"},{"key":"2443_CR42","unstructured":"Dehghani M, Djolonga J, Mustafa B, Padlewski P, Heek J, Gilmer J, Steiner AP, Caron M, Geirhos R, Alabdulmohsin I et\u00a0al (2023) Scaling vision transformers to 22 billion parameters. In: International conference on machine learning. PMLR, pp 7480\u20137512"},{"key":"2443_CR43","unstructured":"Chowdhery A, Narang S, Devlin J, Bosma M, Mishra G, Roberts A, Barham P, Chung HW, Sutton C, Gehrmann S et\u00a0al (2022) Palm: scaling language modeling with pathways. arXiv preprint arXiv:2204.02311"},{"key":"2443_CR44","doi-asserted-by":"crossref","unstructured":"Joshi M, Chen D, Liu Y, Weld DS, Zettlemoyer L, Levy O (2020) Spanbert: improving pre-training by representing and predicting spans. Trans Assoc Comput Linguist 8:64\u201377","DOI":"10.1162\/tacl_a_00300"},{"key":"2443_CR45","doi-asserted-by":"publisher","unstructured":"Lewis M, Liu Y, Goyal N, Ghazvininejad M, Mohamed A, Levy O, Stoyanov V, Zettlemoyer L (2020) BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: ACL. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.703","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"2443_CR46","unstructured":"Clark K, Luong M-T, Le QV, Manning CD (2020) Electra: pre-training text encoders as discriminators rather than generators. arXiv"},{"key":"2443_CR47","unstructured":"Lan Z, Chen M, Goodman S, Gimpel K, Sharma P, Soricut R (2019) Albert: a lite bert for self-supervised learning of language representations. arXiv"},{"key":"2443_CR48","unstructured":"Donahue J, Kr\u00e4henb\u00fchl P, Darrell T (2016) Adversarial feature learning. arXiv"},{"key":"2443_CR49","doi-asserted-by":"crossref","unstructured":"Caron M, Bojanowski P, Joulin A, Douze M (2018) Deep clustering for unsupervised learning of visual features. In: ECCV","DOI":"10.1007\/978-3-030-01264-9_9"},{"key":"2443_CR50","unstructured":"Radford A, Narasimhan K, Salimans T, Sutskever I (2018) Improving language understanding by generative pre-training"},{"key":"2443_CR51","unstructured":"Radford A, Wu J, Child R, Luan D, Amodei D, Sutskever I (2019) Language models are unsupervised multitask learners. OpenAI blog"},{"key":"2443_CR52","doi-asserted-by":"crossref","unstructured":"Caruana R (1997) Multitask learning. Mach Learn 28:41\u201375","DOI":"10.1023\/A:1007379606734"},{"key":"2443_CR53","doi-asserted-by":"crossref","unstructured":"Peters ME, Neumann M, Iyyer M, Gardner M, Clark C, Lee K, Zettlemoyer L (2018) Deep contextualized word representations. arXiv","DOI":"10.18653\/v1\/N18-1202"},{"key":"2443_CR54","unstructured":"Wu Y, Schuster M, Chen Z, Le QV, Norouzi M, Macherey W, Krikun M, Cao Y, Gao Q, Macherey K et\u00a0al (2016) Google\u2019s neural machine translation system: Bridging the gap between human and machine translation. arXiv"},{"key":"2443_CR55","unstructured":"Liu Y, Ott M, Goyal N, Du J, Joshi M, Chen D, Levy O, Lewis M, Zettlemoyer L, Stoyanov V (2019) Roberta: A robustly optimized BERT pretraining approach. CoRR arXiv:1907.11692"},{"key":"2443_CR56","doi-asserted-by":"crossref","unstructured":"Sennrich R, Haddow B, Birch A (2015) Neural machine translation of rare words with subword units. arXiv","DOI":"10.18653\/v1\/P16-1162"},{"key":"2443_CR57","unstructured":"Song K, Tan X, Qin T, Lu J, Liu T (2020) Mpnet: masked and permuted pre-training for language understanding. In: NeurIPS"},{"issue":"2","key":"2443_CR58","first-page":"1","volume":"13","author":"Q Li","year":"2022","unstructured":"Li Q, Peng H, Li J, Xia C, Yang R, Sun L, Yu PS, He L (2022) A survey on text classification: from traditional to deep learning. ACM Trans Intell Syst Technol (TIST) 13(2):1\u201341","journal-title":"ACM Trans Intell Syst Technol (TIST)"},{"key":"2443_CR59","unstructured":"Song K, Tan X, Qin T, Lu J, Liu T-Y (2019) Mass: masked sequence to sequence pre-training for language generation. arXiv"},{"key":"2443_CR60","unstructured":"Dong L, Yang N, Wang W, Wei F, Liu X, Wang Y, Gao J, Zhou M, Hon H-W (2019) Unified language model pre-training for natural language understanding and generation. arXiv"},{"key":"2443_CR61","unstructured":"Sun Y, Wang S, Li Y, Feng S, Chen X, Zhang H, Tian X, Zhu D, Tian H, Wu H (2019) Ernie: enhanced representation through knowledge integration. arXiv"},{"key":"2443_CR62","doi-asserted-by":"crossref","unstructured":"Sun Y, Wang S, Li Y, Feng S, Tian H, Wu H, Wang H (2020) Ernie 2.0: a continual pre-training framework for language understanding. In: AAAI","DOI":"10.1609\/aaai.v34i05.6428"},{"key":"2443_CR63","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3124365","author":"Y Cui","year":"2021","unstructured":"Cui Y, Che W, Liu T, Qin B, Yang Z (2021) Pre-training with whole word masking for Chinese BERT. T-ASL. https:\/\/doi.org\/10.1109\/TASLP.2021.3124365","journal-title":"T-ASL"},{"key":"2443_CR64","doi-asserted-by":"publisher","unstructured":"Diao S, Bai J, Song Y, Zhang T, Wang Y (2020) ZEN: pre-training chinese text encoder enhanced by n-gram representations. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.425","DOI":"10.18653\/v1\/2020.findings-emnlp.425"},{"key":"2443_CR65","doi-asserted-by":"crossref","unstructured":"Tsai H, Riesa J, Johnson M, Arivazhagan N, Li X, Archer A (2019) Small and practical bert models for sequence labeling. arXiv","DOI":"10.18653\/v1\/D19-1374"},{"key":"2443_CR66","unstructured":"Schulman J, Wolski F, Dhariwal P, Radford A, Klimov O (2017) Proximal policy optimization algorithms. arXiv preprint arXiv:1707.06347"},{"key":"2443_CR67","unstructured":"Thoppilan R, De\u00a0Freitas D, Hall J, Shazeer N, Kulshreshtha A, Cheng H-T, Jin A, Bos T, Baker L, Du Y et\u00a0al (2022) Lamda: language models for dialog applications. arXiv preprint arXiv:2201.08239"},{"key":"2443_CR68","unstructured":"Mikolov T, Sutskever I, Chen K, Corrado G, Dean J (2013) Distributed representations of words and phrases and their compositionality. arXiv"},{"key":"2443_CR69","doi-asserted-by":"crossref","unstructured":"Pennington J, Socher R, Manning CD (2014) Glove: global vectors for word representation. In: EMNLP","DOI":"10.3115\/v1\/D14-1162"},{"key":"2443_CR70","unstructured":"Dai AM, Le QV (2015) Semi-supervised sequence learning. arXiv"},{"key":"2443_CR71","unstructured":"Liu P, Qiu X, Huang X (2016) Recurrent neural network for text classification with multi-task learning. arXiv"},{"key":"2443_CR72","doi-asserted-by":"crossref","unstructured":"Bojanowski P, Grave E, Joulin A, Mikolov T (2017) Enriching word vectors with subword information. TACL","DOI":"10.1162\/tacl_a_00051"},{"key":"2443_CR73","unstructured":"McCann B, Bradbury J, Xiong C, Socher R (2017) Learned in translation: Contextualized word vectors. arXiv"},{"key":"2443_CR74","doi-asserted-by":"crossref","unstructured":"Dai Z, Yang Z, Yang Y, Carbonell J, Le QV, Salakhutdinov R (2019) Transformer-xl: attentive language models beyond a fixed-length context. arXiv","DOI":"10.18653\/v1\/P19-1285"},{"key":"2443_CR75","unstructured":"Kong L, d\u2019Autume CdM, Ling W, Yu L, Dai Z, Yogatama D (2019) A mutual information maximization perspective of language representation learning. arXiv"},{"key":"2443_CR76","unstructured":"Wang W, Bi B, Yan M, Wu C, Bao Z, Xia J, Peng L, Si L (2019) Structbert: incorporating language structures into pre-training for deep language understanding. arXiv"},{"key":"2443_CR77","unstructured":"Xiong W, Du J, Wang WY, Stoyanov V (2019) Pretrained encyclopedia: weakly supervised knowledge-pretrained language model. arXiv"},{"key":"2443_CR78","doi-asserted-by":"crossref","unstructured":"Peters ME, Neumann M, Logan\u00a0IV RL, Schwartz R, Joshi V, Singh S, Smith NA (2019) Knowledge enhanced contextual word representations. arXiv","DOI":"10.18653\/v1\/D19-1005"},{"key":"2443_CR79","doi-asserted-by":"crossref","unstructured":"Huang H, Liang Y, Duan N, Gong M, Shou L, Jiang D, Zhou M (2019) Unicoder: a universal language encoder by pre-training with multiple cross-lingual tasks. arXiv","DOI":"10.18653\/v1\/D19-1252"},{"key":"2443_CR80","doi-asserted-by":"crossref","unstructured":"Eisenschlos JM, Ruder S, Czapla P, Kardas M, Gugger S, Howard J (2019) Multifit: efficient multi-lingual language model fine-tuning. arXiv","DOI":"10.18653\/v1\/D19-1572"},{"key":"2443_CR81","doi-asserted-by":"crossref","unstructured":"Beltagy I, Lo K, Cohan A (2019) Scibert: a pretrained language model for scientific text. arXiv","DOI":"10.18653\/v1\/D19-1371"},{"key":"2443_CR82","doi-asserted-by":"crossref","unstructured":"Sun S, Cheng Y, Gan Z, Liu J (2019) Patient knowledge distillation for bert model compression. arXiv","DOI":"10.18653\/v1\/D19-1441"},{"key":"2443_CR83","unstructured":"Lample G, Conneau A (2019) Cross-lingual language model pretraining. arXiv"},{"key":"2443_CR84","doi-asserted-by":"crossref","unstructured":"Zafrir O, Boudoukh G, Izsak P, Wasserblat M (2019) Q8bert: quantized 8bit bert. In: 2019 Fifth workshop on energy efficient machine learning and cognitive computing-NeurIPS edition (EMC2-NIPS). IEEE, pp 36\u201339","DOI":"10.1109\/EMC2-NIPS53020.2019.00016"},{"key":"2443_CR85","unstructured":"Sanh V, Debut L, Chaumond J, Wolf T (2019) Distilbert, a distilled version of bert: smaller, faster, cheaper and lighter. arXiv"},{"key":"2443_CR86","doi-asserted-by":"crossref","unstructured":"Liu W, Zhou P, Zhao Z, Wang Z, Deng H, Ju Q (2020) Fastbert: a self-distilling bert with adaptive inference time. arXiv","DOI":"10.18653\/v1\/2020.acl-main.537"},{"key":"2443_CR87","doi-asserted-by":"publisher","unstructured":"Martin L, M\u00fcller B, Su\u00e1rez PJO, Dupont Y, Romary L, Clergerie \u00c9, Seddah D, Sagot B (2020) Camembert: a tasty french language model. In: ACL. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.645","DOI":"10.18653\/v1\/2020.acl-main.645"},{"key":"2443_CR88","doi-asserted-by":"publisher","unstructured":"Conneau A, Khandelwal K, Goyal N, Chaudhary V, Wenzek G, Guzm\u00e1n F, Grave E, Ott M, Zettlemoyer L, Stoyanov V (2020) Unsupervised cross-lingual representation learning at scale. In: ACL. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.747","DOI":"10.18653\/v1\/2020.acl-main.747"},{"key":"2443_CR89","unstructured":"Kitaev N, Kaiser L, Levskaya A (2020) Reformer: the efficient transformer. In: ICLR"},{"key":"2443_CR90","doi-asserted-by":"crossref","unstructured":"Shen S, Dong Z, Ye J, Ma L, Yao Z, Gholami A, Mahoney MW, Keutzer K (2020) Q-bert: Hessian based ultra low precision quantization of bert. In: AAAI","DOI":"10.1609\/aaai.v34i05.6409"},{"key":"2443_CR91","doi-asserted-by":"crossref","unstructured":"Chi Z, Dong L, Wei F, Wang W, Mao X-L, Huang H (2020) Cross-lingual natural language generation via pre-training. In: AAAI","DOI":"10.1609\/aaai.v34i05.6256"},{"key":"2443_CR92","doi-asserted-by":"crossref","unstructured":"Liu W, Zhou P, Zhao Z, Wang Z, Ju Q, Deng H, Wang P (2020) K-bert: enabling language representation with knowledge graph. In: AAAI","DOI":"10.1609\/aaai.v34i03.5681"},{"key":"2443_CR93","unstructured":"Jiang Z, Yu W, Zhou D, Chen Y, Feng J, Yan S (2020) Convbert: improving BERT with span-based dynamic convolution. In: NeurIPS"},{"key":"2443_CR94","doi-asserted-by":"crossref","unstructured":"Wang W, Wei F, Dong L, Bao H, Yang N, Zhou M (2020) Minilm: deep self-attention distillation for task-agnostic compression of pre-trained transformers. In: NeurIPS","DOI":"10.18653\/v1\/2021.findings-acl.188"},{"key":"2443_CR95","doi-asserted-by":"crossref","unstructured":"Liu Y, Gu J, Goyal N, Li X, Edunov S, Ghazvininejad M, Lewis M, Zettlemoyer L (2020) Multilingual denoising pre-training for neural machine translation. Trans Assoc Comput Linguist 8:726\u2013742","DOI":"10.1162\/tacl_a_00343"},{"key":"2443_CR96","doi-asserted-by":"publisher","unstructured":"Sun T, Shao Y, Qiu X, Guo Q, Hu Y, Huang X, Zhang Z (2020) Colake: contextualized language and knowledge embedding. In: COLING. https:\/\/doi.org\/10.18653\/v1\/2020.coling-main.327","DOI":"10.18653\/v1\/2020.coling-main.327"},{"key":"2443_CR97","unstructured":"Le H, Vial L, Frej J, Segonne V, Coavoux M, Lecouteux B, Allauzen A, Crabb\u00e9 B, Besacier L, Schwab D (2020) Flaubert: unsupervised language model pre-training for French. In: LREC"},{"key":"2443_CR98","doi-asserted-by":"publisher","unstructured":"Shen T, Mao Y, He P, Long G, Trischler A, Chen W (2020) Exploiting structured knowledge in text via graph-guided representation learning. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.722","DOI":"10.18653\/v1\/2020.emnlp-main.722"},{"key":"2443_CR99","doi-asserted-by":"publisher","unstructured":"Jiao X, Yin Y, Shang L, Jiang X, Chen X, Li L, Wang F, Liu Q (2020) Tinybert: distilling BERT for natural language understanding. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.372","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"2443_CR100","doi-asserted-by":"publisher","unstructured":"Delobelle P, Winters T, Berendt B (2020) Robbert: a dutch roberta-based language model. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.292","DOI":"10.18653\/v1\/2020.findings-emnlp.292"},{"key":"2443_CR101","doi-asserted-by":"publisher","unstructured":"He B, Zhou D, Xiao J, Jiang X, Liu Q, Yuan NJ, Xu T (2020) Integrating graph contextualized knowledge into pre-trained language models. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.207","DOI":"10.18653\/v1\/2020.findings-emnlp.207"},{"key":"2443_CR102","unstructured":"Raffel C, Shazeer N, Roberts A, Lee K, Narang S, Matena M, Zhou Y, Li W, Liu PJ (2020) Exploring the limits of transfer learning with a unified text-to-text transformer. J Mach Learn Res 21(140):1\u201367"},{"key":"2443_CR103","doi-asserted-by":"crossref","unstructured":"Schick T, Sch\u00fctze H (2021) Exploiting cloze-questions for few-shot text classification and natural language inference. In: EACL","DOI":"10.18653\/v1\/2021.eacl-main.20"},{"key":"2443_CR104","doi-asserted-by":"crossref","unstructured":"Wang X, Gao T, Zhu Z, Zhang Z, Liu Z, Li J, Tang J (2021) KEPLER: a unified model for knowledge embedding and pre-trained language representation. Trans Assoc Comput Linguist 9:176\u2013194","DOI":"10.1162\/tacl_a_00360"},{"key":"2443_CR105","doi-asserted-by":"crossref","unstructured":"Gao T, Yao X, Chen D (2021) Simcse: simple contrastive learning of sentence embeddings. CoRR arXiv:2104.08821","DOI":"10.18653\/v1\/2021.emnlp-main.552"},{"key":"2443_CR106","unstructured":"Du N, Huang Y, Dai AM, Tong S, Lepikhin D, Xu Y, Krikun M, Zhou Y, Yu AW, Firat O et\u00a0al (2022) Glam: efficient scaling of language models with mixture-of-experts. In: International conference on machine learning. PMLR, pp 5547\u20135569"},{"key":"2443_CR107","doi-asserted-by":"crossref","unstructured":"Chi Z, Huang S, Dong L, Ma S, Singhal S, Bajaj P, Song X, Wei F (2021) Xlm-e: cross-lingual language model pre-training via electra. arXiv preprint arXiv:2106.16138","DOI":"10.18653\/v1\/2022.acl-long.427"},{"key":"2443_CR108","unstructured":"Sanh V, Webson A, Raffel C, Bach SH, Sutawika L, Alyafeai Z, Chaffin A, Stiegler A, Scao TL, Raja A et\u00a0al (2021) Multitask prompted training enables zero-shot task generalization. arXiv preprint arXiv:2110.08207"},{"key":"2443_CR109","unstructured":"Rae JW, Borgeaud S, Cai T, Millican K, Hoffmann J, Song F, Aslanides J, Henderson S, Ring R, Young S et\u00a0al (2021) Scaling language models: methods, analysis & insights from training gopher. arXiv preprint arXiv:2112.11446"},{"key":"2443_CR110","unstructured":"Smith S, Patwary M, Norick B, LeGresley P, Rajbhandari S, Casper J, Liu Z, Prabhumoye S, Zerveas G, Korthikanti V et\u00a0al (2022) Using deepspeed and megatron to train megatron-turing nlg 530b, a large-scale generative language model. arXiv preprint arXiv:2201.11990"},{"key":"2443_CR111","unstructured":"Hoffmann J, Borgeaud S, Mensch A, Buchatskaya E, Cai T, Rutherford E, Casas DL, Hendricks LA, Welbl J, Clark A et\u00a0al (2022) Training compute-optimal large language models. arXiv preprint arXiv:2203.15556"},{"key":"2443_CR112","unstructured":"Zhang S, Roller S, Goyal N, Artetxe M, Chen M, Chen S, Dewan C, Diab M, Li X, Lin XV et\u00a0al (2022) Opt: open pre-trained transformer language models. arXiv preprint arXiv:2205.01068"},{"key":"2443_CR113","unstructured":"Wei J, Bosma M, Zhao V, Guu K, Yu AW, Lester B, Du N, Dai AM, Le QV (2022) Finetuned language models are zero-shot learners. In: International conference on learning representations"},{"key":"2443_CR114","doi-asserted-by":"crossref","unstructured":"Honovich O, Scialom T, Levy O, Schick T (2022) Unnatural instructions: tuning language models with (almost) no human labor. arXiv preprint arXiv:2212.09689","DOI":"10.18653\/v1\/2023.acl-long.806"},{"key":"2443_CR115","doi-asserted-by":"crossref","unstructured":"Wang Y, Mishra S, Alipoormolabashi P, Kordi Y, Mirzaei A, Naik A, Ashok A, Dhanasekaran AS, Arunkumar A, Stap D et\u00a0al (2022) Super-natural instructions: generalization via declarative instructions on 1600+ nlp tasks. In: Proceedings of the 2022 conference on empirical methods in natural language processing, pp 5085\u20135109","DOI":"10.18653\/v1\/2022.emnlp-main.340"},{"key":"2443_CR116","doi-asserted-by":"crossref","unstructured":"Mishra S, Khashabi D, Baral C, Hajishirz, H (2022) Cross-task generalization via natural language crowdsourcing instructions. In: Proceedings of the 60th Annual meeting of the association for computational linguistics (volume 1: long papers), pp 3470\u20133487","DOI":"10.18653\/v1\/2022.acl-long.244"},{"key":"2443_CR117","doi-asserted-by":"crossref","unstructured":"Wang Y, Kordi Y, Mishra S, Liu A, Smith NA, Khashabi D, Hajishirzi H (2022) Self-instruct: aligning language model with self generated instructions. arXiv preprint arXiv:2212.10560","DOI":"10.18653\/v1\/2023.acl-long.754"},{"key":"2443_CR118","unstructured":"Weidinger L, Mellor J, Rauh M, Griffin C, Uesato J, Huang P-S, Cheng M, Glaese M, Balle B, Kasirzadeh A et\u00a0al (2021) Ethical and social risks of harm from language models. arXiv preprint arXiv:2112.04359"},{"key":"2443_CR119","doi-asserted-by":"crossref","unstructured":"Kiegeland S, Kreutzer J (2021) Revisiting the weaknesses of reinforcement learning for neural machine translation. In: Proceedings of the 2021 conference of the North American chapter of the association for computational linguistics: human language technologies, pp 1673\u20131681","DOI":"10.18653\/v1\/2021.naacl-main.133"},{"key":"2443_CR120","doi-asserted-by":"crossref","unstructured":"Jaques N, Shen JH, Ghandeharioun A, Ferguson C, Lapedriza A, Jones N, Gu S, Picard R (2020) Human-centric dialog training via offline reinforcement learning. In: Proceedings of the 2020 conference on empirical methods in natural language processing (EMNLP), pp 3985\u20134003","DOI":"10.18653\/v1\/2020.emnlp-main.327"},{"key":"2443_CR121","doi-asserted-by":"crossref","unstructured":"Rennie SJ, Marcheret E, Mroueh Y, Ross J, Goel V (2017) Self-critical sequence training for image captioning. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7008\u20137024","DOI":"10.1109\/CVPR.2017.131"},{"key":"2443_CR122","unstructured":"Pang RY, He H (2021) Text generation by learning from demonstrations. In: Proceedings of the international conference on learning representations"},{"key":"2443_CR123","first-page":"7903","volume":"34","author":"M Hausknecht","year":"2020","unstructured":"Hausknecht M, Ammanabrolu P, C\u00f4t\u00e9 M-A, Yuan X (2020) Interactive fiction games: a colossal adventure. Proc AAAI Conf Artif Intell 34:7903\u20137910","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"2443_CR124","unstructured":"Snell C, Kostrikov I, Su Y, Yang M, Levine S (2022) Offline rl for natural language generation with implicit language q learning. arXiv preprint arXiv:2206.11871"},{"key":"2443_CR125","unstructured":"Lu X, Welleck S, Jiang L, Hessel J, Qin L, West P, Ammanabrolu P, Choi Y (2022) Quark: controllable text generation with reinforced unlearning. arXiv preprint arXiv:2205.13636"},{"key":"2443_CR126","doi-asserted-by":"publisher","first-page":"1543","DOI":"10.1007\/s10462-022-10205-5","volume":"56","author":"V Uc-Cetina","year":"2022","unstructured":"Uc-Cetina V, Navarro-Guerrero N, Martin-Gonzalez A, Weber C, Wermter S (2022) Survey on reinforcement learning for language processing. Artif Intell Rev 56:1543\u20131575","journal-title":"Artif Intell Rev"},{"key":"2443_CR127","unstructured":"Ramamurthy R, Ammanabrolu P, Brantley K, Hessel J, Sifa R, Bauckhage C, Hajishirzi H, Choi Y (2022) Is reinforcement learning (not) for natural language processing?: benchmarks, baselines, and building blocks for natural language policy optimization. arXiv preprint arXiv:2210.01241"},{"key":"2443_CR128","unstructured":"Wu J, Ouyang L, Ziegler DM, Stiennon N, Lowe R, Leike J, Christiano P (2021) Recursively summarizing books with human feedback. arXiv preprint arXiv:2109.10862"},{"key":"2443_CR129","unstructured":"Nakano R, Hilton J, Balaji S, Wu J, Ouyang L, Kim C, Hesse C, Jain S, Kosaraju V, Saunders W et\u00a0al (2021) Webgpt: browser-assisted question-answering with human feedback. arXiv preprint arXiv:2112.09332"},{"key":"2443_CR130","unstructured":"Glaese A, McAleese N, Tr\u0119bacz M, Aslanides J, Firoiu V, Ewalds T, Rauh M, Weidinger L, Chadwick M, Thacker P et\u00a0al (2022) Improving alignment of dialogue agents via targeted human judgements. arXiv preprint arXiv:2209.14375"},{"key":"2443_CR131","unstructured":"Bai Y, Kadavath S, Kundu S, Askell A, Kernion J, Jones A, Chen A, Goldie A, Mirhoseini A, McKinnon C et\u00a0al (2022) Constitutional AI: harmlessness from AI feedback. arXiv preprint arXiv:2212.08073"},{"key":"2443_CR132","unstructured":"Chung HW, Hou L, Longpre S, Zoph B, Tay Y, Fedus W, Li E, Wang X, Dehghani M, Brahma S et\u00a0al (2022) Scaling instruction-finetuned language models. arXiv preprint arXiv:2210.11416"},{"key":"2443_CR133","unstructured":"Kojima T, Gu SS, Reid M, Matsuo Y, Iwasawa Y (2022) Large language models are zero-shot reasoners. Adv Neu Inf Process Syst 35:22199\u201322213"},{"key":"2443_CR134","unstructured":"Dosovitskiy A, Springenberg JT, Riedmiller M, Brox T (2014) Discriminative unsupervised feature learning with convolutional neural networks. Adv Neu Inf Process Syst"},{"key":"2443_CR135","unstructured":"Dosovitskiy A, Fischer P, Springenberg JT, Riedmiller M, Brox T (2016) Discriminative unsupervised feature learning with exemplar convolutional neural networks. TPAMI. TPAMI-2015-05-0348.R1"},{"key":"2443_CR136","doi-asserted-by":"crossref","unstructured":"Doersch C, Gupta A, Efros AA (2015) Unsupervised visual representation learning by context prediction. In: ICCV","DOI":"10.1109\/ICCV.2015.167"},{"key":"2443_CR137","doi-asserted-by":"crossref","unstructured":"Pathak D, Krahenbuhl P, Donahue J, Darrell T, Efros AA (2016) Context encoders: feature learning by inpainting. In: CVPR","DOI":"10.1109\/CVPR.2016.278"},{"key":"2443_CR138","doi-asserted-by":"crossref","unstructured":"Zhang R, Isola P, Efros AA (2016) Colorful image colorization. In: ECCV","DOI":"10.1007\/978-3-319-46487-9_40"},{"key":"2443_CR139","doi-asserted-by":"crossref","unstructured":"Zhang R, Isola P, Efros AA (2017) Split-brain autoencoders: unsupervised learning by cross-channel prediction. In: CVPR","DOI":"10.1109\/CVPR.2017.76"},{"key":"2443_CR140","doi-asserted-by":"crossref","unstructured":"Noroozi M, Favaro P (2016) Unsupervised learning of visual representations by solving jigsaw puzzles. In: ECCV","DOI":"10.1007\/978-3-319-46466-4_5"},{"key":"2443_CR141","doi-asserted-by":"crossref","unstructured":"Kim D, Cho D, Yoo D, Kweon IS (2018) Learning image representations by completing damaged jigsaw puzzles. In: WACV","DOI":"10.1109\/WACV.2018.00092"},{"key":"2443_CR142","doi-asserted-by":"crossref","unstructured":"Noroozi M, Pirsiavash H, Favaro P (2017) Representation learning by learning to count. In: ICCV","DOI":"10.1109\/ICCV.2017.628"},{"key":"2443_CR143","unstructured":"Bojanowski P, Joulin A (2017) Unsupervised learning by predicting noise. In: ICML"},{"key":"2443_CR144","unstructured":"Gidaris S, Singh P, Komodakis N (2018) Unsupervised representation learning by predicting image rotations. arXiv"},{"key":"2443_CR145","unstructured":"Oord Avd, Li Y, Vinyals O (2018) Representation learning with contrastive predictive coding. arXiv"},{"key":"2443_CR146","unstructured":"Henaff O (2020) Data-efficient image recognition with contrastive predictive coding. In: ICML"},{"key":"2443_CR147","unstructured":"Donahue J, Simonyan K (2019) Large scale adversarial representation learning. In: NeurIPS, pp 10541\u201310551"},{"key":"2443_CR148","unstructured":"Dumoulin V, Belghazi I, Poole B, Mastropietro O, Lamb A, Arjovsky M, Courville A (2016) Adversarially learned inference. arXiv"},{"key":"2443_CR149","unstructured":"Chen M, Radford A, Child R, Wu J, Jun H, Luan D, Sutskever I (2020) Generative pretraining from pixels. In: ICML"},{"key":"2443_CR150","unstructured":"Bao H, Dong L, Piao S, Wei F (2021) Beit: Bert pre-training of image transformers. In: International conference on learning representations"},{"key":"2443_CR151","doi-asserted-by":"crossref","unstructured":"Caron M, Touvron H, Misra I, J\u00e9gou H, Mairal J, Bojanowski P, Joulin A (2021) Emerging properties in self-supervised vision transformers. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9650\u20139660","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"2443_CR152","unstructured":"Ramesh A, Pavlov M, Goh G, Gray S, Voss C, Radford A, Chen M, Sutskever I (2021) Zero-shot text-to-image generation. In: International conference on machine learning. PMLR, pp 8821\u20138831"},{"key":"2443_CR153","doi-asserted-by":"crossref","unstructured":"He K, Chen X, Xi, S, Li Y, Doll\u00e1r P, Girshick R (2022) Masked autoencoders are scalable vision learners. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 16000\u201316009","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"2443_CR154","doi-asserted-by":"crossref","unstructured":"Xie Z, Zhang Z, Cao Y, Lin Y, Bao J, Yao Z, Dai Q, Hu H (2022) Simmim: a simple framework for masked image modeling. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9653\u20139663","DOI":"10.1109\/CVPR52688.2022.00943"},{"key":"2443_CR155","doi-asserted-by":"crossref","unstructured":"Kirillov A, Mintun E, Ravi N, Mao H, Rolland C, Gustafson L, Xiao T, Whitehead S, Berg AC, Lo W-Y et\u00a0al (2023) Segment anything. arXiv preprint arXiv:2304.02643","DOI":"10.1109\/ICCV51070.2023.00371"},{"key":"2443_CR156","doi-asserted-by":"crossref","unstructured":"Wu Z, Xiong Y, Yu SX, Lin D (2018) Unsupervised feature learning via non-parametric instance discrimination. In: CVPR","DOI":"10.1109\/CVPR.2018.00393"},{"key":"2443_CR157","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2443_CR158","unstructured":"Li X, Wang W, Yang L, Yang J (2022) Uniform masking: enabling mae pre-training for pyramid-based vision transformers with locality. arXiv preprint arXiv:2205.10063"},{"key":"2443_CR159","unstructured":"Chen J, Hu M, Li B, Elhoseiny M (2022) Efficient self-supervised vision pretraining with local masked reconstruction. arXiv preprint arXiv:2206.00790"},{"key":"2443_CR160","unstructured":"Hinton G, Vinyals O, Dean J (2015) Distilling the knowledge in a neural network. arXiv"},{"key":"2443_CR161","doi-asserted-by":"crossref","unstructured":"Zhuang C, Zhai AL, Yamins D (2019) Local aggregation for unsupervised learning of visual embeddings. In: ICCV","DOI":"10.1109\/ICCV.2019.00610"},{"key":"2443_CR162","doi-asserted-by":"crossref","unstructured":"Misra I, Maaten L (2020) Self-supervised learning of pretext-invariant representations. In: CVPR","DOI":"10.1109\/CVPR42600.2020.00674"},{"key":"2443_CR163","doi-asserted-by":"crossref","unstructured":"He K, Fan H, Wu Y, Xie S, Girshick R (2020) Momentum contrast for unsupervised visual representation learning. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 9729\u20139738","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"2443_CR164","unstructured":"Chen X, Fan H, Girshick R, He K (2020) Improved baselines with momentum contrastive learning. arXiv"},{"key":"2443_CR165","unstructured":"Grill J-B, Strub F, Altch\u00e9 F, Tallec C, Richemond PH, Buchatskaya E, Doersch C, Pires BA, Guo ZD, Azar MG et\u00a0al (2020) Bootstrap your own latent: a new approach to self-supervised learning. arXiv"},{"key":"2443_CR166","unstructured":"Chen T, Kornblith S, Norouzi M, Hinton G (2020) A simple framework for contrastive learning of visual representations. In: ICML"},{"key":"2443_CR167","unstructured":"Caron M, Misra I, Mairal J, Goyal P, Bojanowski P, Joulin A (2020) Unsupervised learning of visual features by contrasting cluster assignments. arXiv"},{"key":"2443_CR168","unstructured":"Goyal P, Caron M, Lefaudeux B, Xu M, Wang P, Pai V, Singh M, Liptchinsky V, Misra I, Joulin A et\u00a0al (2021) Self-supervised pretraining of visual features in the wild. arXiv"},{"key":"2443_CR169","doi-asserted-by":"crossref","unstructured":"Radosavovic I, Kosaraju RP, Girshick R, He K, Doll\u00e1r P (2020) Designing network design spaces. In: CVPR","DOI":"10.1109\/CVPR42600.2020.01044"},{"key":"2443_CR170","doi-asserted-by":"crossref","unstructured":"Chen X, He K (2021) Exploring simple siamese representation learning. In: CVPR","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"2443_CR171","unstructured":"Li J, Zhou P, Xiong C, Hoi SCH (2021) Prototypical contrastive learning of unsupervised representations. In: ICLR. OpenReview.net"},{"key":"2443_CR172","doi-asserted-by":"crossref","unstructured":"Zhang L, Qi G-J, Wang L, Luo J (2019) Aet vs. aed: unsupervised representation learning by auto-encoding transformations rather than data. In: CVPR","DOI":"10.1109\/CVPR.2019.00265"},{"key":"2443_CR173","unstructured":"Bachman P, Hjelm RD, Buchwalter W (2019) Learning representations by maximizing mutual information across views. arXiv"},{"key":"2443_CR174","doi-asserted-by":"crossref","unstructured":"Yan X, Misra I, Gupta A, Ghadiyaram D, Mahajan D (2020) Clusterfit: improving generalization of visual representations. In: CVPR","DOI":"10.1109\/CVPR42600.2020.00654"},{"key":"2443_CR175","unstructured":"Asano YM, Rupprecht C, Vedaldi A (2019) Self-labelling via simultaneous clustering and representation learning. arXiv preprint arXiv:1911.05371"},{"key":"2443_CR176","unstructured":"Chen T, Kornblith S, Swersky K, Norouzi M, Hinton G (2020) Big self-supervised models are strong semi-supervised learners. arXiv"},{"key":"2443_CR177","doi-asserted-by":"crossref","unstructured":"Tian Y, Krishnan D, Isola P (2019) Contrastive multiview coding. arXiv","DOI":"10.1007\/978-3-030-58621-8_45"},{"key":"2443_CR178","doi-asserted-by":"crossref","unstructured":"Cubuk ED, Zoph B, Shlens J, Le QV (2019) Randaugment: practical data augmentation with no separate search. arXiv","DOI":"10.1109\/CVPRW50498.2020.00359"},{"key":"2443_CR179","unstructured":"Tian Y, Sun C, Poole B, Krishnan D, Schmid C, Isola P (2020) What makes for good views for contrastive learning. arXiv"},{"key":"2443_CR180","doi-asserted-by":"crossref","unstructured":"Chen X, Xie S, He K (2021) An empirical study of training self-supervised vision transformers. arXiv","DOI":"10.1109\/ICCV48922.2021.00950"},{"key":"2443_CR181","unstructured":"Mitrovic J, McWilliams B, Walker JC, Buesing LH, Blundell C (2021) Representation learning via invariant causal mechanisms. In: ICLR"},{"key":"2443_CR182","unstructured":"Tian Y, Chen X, Ganguli S (2021) Understanding self-supervised learning dynamics without contrastive pairs. In: ICML. Proceedings of machine learning research"},{"key":"2443_CR183","unstructured":"Xie Z, Lin Y, Yao Z, Zhang Z, Dai Q, Cao Y, Hu H (2021) Self-supervised learning with swin transformers. arXiv"},{"key":"2443_CR184","unstructured":"Li Z, Chen Z, Yang F, Li W, Zhu Y, Zhao C, Deng R, Wu L, Zhao R, Tang M et\u00a0al (2021) Mst: masked self-supervised transformer for visual representation. Adv Neural Inf Process Syst 34"},{"key":"2443_CR185","unstructured":"Bao H, Dong L, Piao S, Wei F (2022) BEit: BERT pre-training of image transformers. In: International conference on learning representations. https:\/\/openreview.net\/forum?id=p-BhZSz59o4"},{"key":"2443_CR186","doi-asserted-by":"crossref","unstructured":"Chen X, Ding M, Wang X, Xin Y, Mo S, Wang Y, Han S, Luo P, Zeng G, Wang J (2022) Context autoencoder for self-supervised representation learning. arXiv preprint arXiv:2202.03026","DOI":"10.1007\/s11263-023-01852-4"},{"key":"2443_CR187","unstructured":"Dong X, Bao J, Zhang T, Chen D, Zhang W, Yuan L, Chen D, Wen F, Yu N (2021) Peco: perceptual codebook for bert pre-training of vision transformers. arXiv preprint arXiv:2111.12710"},{"key":"2443_CR188","unstructured":"You Y, Chen T, Wang Z, Shen Y (2020) When does self-supervision help graph convolutional networks? In: ICML. Proceedings of machine learning research, pp 10871\u201310880"},{"key":"2443_CR189","unstructured":"Jin W, Derr T, Liu H, Wang Y, Wang S, Liu Z, Tang J (2020) Self-supervised learning on graphs: deep insights and new direction. CoRR arXiv:2006.10141"},{"key":"2443_CR190","unstructured":"Hu W, Liu B, Gomes J, Zitnik M, Liang P, Pande VS, Leskovec J (2020) Strategies for pre-training graph neural networks. In: ICLR"},{"key":"2443_CR191","doi-asserted-by":"crossref","unstructured":"Perozzi B, Al-Rfou R, Skien S (2014) Deepwalk: online learning of social representations. In: ACM SIGKDD","DOI":"10.1145\/2623330.2623732"},{"key":"2443_CR192","doi-asserted-by":"crossref","unstructured":"Grover A, Leskovec J (2016) node2vec: scalable feature learning for networks. In: ACM SIGKDD","DOI":"10.1145\/2939672.2939754"},{"key":"2443_CR193","doi-asserted-by":"crossref","unstructured":"Tang J, Qu M, Wang M, Zhang M, Yan J, Mei Q (2015) LINE: large-scale information network embedding. In: WWW","DOI":"10.1145\/2736277.2741093"},{"key":"2443_CR194","unstructured":"Kipf TN, Welling M (2016) Variational graph auto-encoders. CoRR"},{"key":"2443_CR195","doi-asserted-by":"crossref","unstructured":"Qiu J, Chen Q, Dong Y, Zhang J, Yang H, Ding M, Wang K, Tang J (2020) GCC: graph contrastive coding for graph neural network pre-training. In: KDD","DOI":"10.1145\/3394486.3403168"},{"key":"2443_CR196","doi-asserted-by":"crossref","unstructured":"Zhu Y, Xu Y, Yu F, Liu Q, Wu S, Wang L (2021) Graph contrastive learning with adaptive augmentation. In: WWW","DOI":"10.1145\/3442381.3449802"},{"key":"2443_CR197","unstructured":"You Y, Chen T, Sui Y, Chen T, Wang Z, Shen Y (2020) Graph contrastive learning with augmentations. In: NeurIPS"},{"key":"2443_CR198","doi-asserted-by":"crossref","unstructured":"Mavromatis C, Karypis G (2021) Graph infoclust: maximizing coarse-grain mutual information in graphs. In: PAKDD","DOI":"10.1007\/978-3-030-75762-5_43"},{"key":"2443_CR199","doi-asserted-by":"crossref","unstructured":"Sun Q, Li J, Peng H, Wu J, Ning Y, Yu PS, He L (2021) SUGAR: subgraph neural network with reinforcement pooling and self-supervised mutual information mechanism. In: WWW","DOI":"10.1145\/3442381.3449822"},{"key":"2443_CR200","unstructured":"Velickovic P, Fedus W, Hamilton WL, Li\u00f2 P, Bengio Y, Hjelm RD (2019) Deep graph infomax. In: ICLR"},{"key":"2443_CR201","unstructured":"Hassani K, Ahmadi AHK (2020) Contrastive multi-view representation learning on graphs. In: ICML. Proceedings of machine learning research, pp 4116\u20134126"},{"key":"2443_CR202","doi-asserted-by":"crossref","unstructured":"Jiao Y, Xiong Y, Zhang J, Zhang Y, Zhang T, Zhu Y (2020) Sub-graph contrast for scalable self-supervised graph representation learning. In: ICDM, pp 222\u2013231","DOI":"10.1109\/ICDM50108.2020.00031"},{"key":"2443_CR203","doi-asserted-by":"crossref","unstructured":"Sun K, Lin Z, Zhu Z (2020) Multi-stage self-supervised learning for graph convolutional networks on graphs with few labeled nodes. In: AAAI, pp 5892\u20135899","DOI":"10.1609\/aaai.v34i04.6048"},{"key":"2443_CR204","unstructured":"Rong Y, Bian Y, Xu T, Xie W, Wei Y, Huang W, Huang J (2020) Self-supervised graph transformer on large-scale molecular data. In: NeurIPS"},{"key":"2443_CR205","unstructured":"Tan Q, Liu N, Huang X, Chen R, Choi S-H, Hu X (2022) Mgae: Masked autoencoders for self-supervised learning on graphs. arXiv preprint arXiv:2201.02534"},{"key":"2443_CR206","doi-asserted-by":"crossref","unstructured":"Hou Z, Liu X, Dong Y, Wang C, Tang J et\u00a0al (2022) Graphmae: Self-supervised masked graph autoencoders. arXiv preprint arXiv:2205.10803","DOI":"10.1145\/3534678.3539321"},{"key":"2443_CR207","unstructured":"Li J, Wu R, Sun W, Chen L, Tian S, Zhu L, Meng C, Zheng Z, Wang W (2022) Maskgae: masked graph modeling meets graph autoencoders. arXiv preprint arXiv:2205.10053"},{"key":"2443_CR208","unstructured":"Tian Y, Dong K, Zhang C, Zhang C, Chawla NV (2022) Heterogeneous graph masked autoencoders. arXiv preprint arXiv:2208.09957"},{"key":"2443_CR209","doi-asserted-by":"crossref","unstructured":"Wan S, Pan S, Yang J, Gong C (2021) Contrastive and generative graph convolutional networks for graph-based semi-supervised learning. In: AAAI","DOI":"10.1609\/aaai.v35i11.17206"},{"key":"2443_CR210","unstructured":"Zhang J, Zhang H, Xia C, Sun L (2020) Graph-bert: only attention is needed for learning graph representations. arXiv arXiv:2001.05140"},{"key":"2443_CR211","doi-asserted-by":"crossref","unstructured":"Peng Z, Huang W, Luo M, Zheng Q, Rong Y, Xu T, Huang J (2020) Graph representation learning via graphical mutual information maximization. In: WWW","DOI":"10.1145\/3366423.3380112"},{"key":"2443_CR212","doi-asserted-by":"crossref","unstructured":"Hu Z, Dong Y, Wang K, Chang K, Sun Y (2020) GPT-GNN: generative pre-training of graph neural networks. In: KDD","DOI":"10.1145\/3394486.3403237"},{"key":"2443_CR213","doi-asserted-by":"crossref","unstructured":"Wang G, Guo H, Li A, Liu X, Yan Q (2023) Federated iot interaction vulnerability analysis. In: 2023 IEEE 39th international conference on data engineering (ICDE). IEEE","DOI":"10.1109\/ICDE55515.2023.00120"},{"key":"2443_CR214","unstructured":"Hamilton WL, Ying Z, Leskovec J (2017) Inductive representation learning on large graphs. In: NIPS"},{"key":"2443_CR215","unstructured":"Hwang D, Park J, Kwon S, Kim K, Ha J, Kim HJ (2020) Self-supervised auxiliary learning with meta-paths for heterogeneous graphs. In: NeurIPS"},{"key":"2443_CR216","unstructured":"Sun F, Hoffmann J, Verma V, Tang J (2020) Infograph: unsupervised and semi-supervised graph-level representation learning via mutual information maximization. In: ICLR"},{"key":"2443_CR217","doi-asserted-by":"crossref","unstructured":"Park C, Kim D, Han J, Yu H (2020) Unsupervised attributed multiplex network embedding. In: AAAI, pp 5371\u20135378","DOI":"10.1609\/aaai.v34i04.5985"},{"key":"2443_CR218","unstructured":"You Y, Chen T, Shen Y, Wang Z (2021) Graph contrastive learning automated. CoRR arXiv:2106.07594"},{"key":"2443_CR219","doi-asserted-by":"crossref","unstructured":"Zeng J, Xie P (2021) Contrastive self-supervised learning for graph classification. In: AAAI, pp 10824\u201310832","DOI":"10.1609\/aaai.v35i12.17293"},{"key":"2443_CR220","unstructured":"Xu M, Wang H, Ni B, Guo H, Tang J (2021) Self-supervised graph-level representation learning with local and global structure. CoRR arXiv:2106.04113"},{"key":"2443_CR221","doi-asserted-by":"crossref","unstructured":"Wang P, Agarwal K, Ham C, Choudhury S, Reddy CK (2021) Self-supervised learning of contextual embeddings for link prediction in heterogeneous networks. In: WWW","DOI":"10.1145\/3442381.3450060"},{"key":"2443_CR222","doi-asserted-by":"crossref","unstructured":"Cao J, Lin X, Guo S, Liu L, Liu T, Wang B (2021) Bipartite graph embedding via mutual information maximization. In: WSDM","DOI":"10.1145\/3437963.3441783"},{"key":"2443_CR223","doi-asserted-by":"crossref","unstructured":"Wang X, Liu N, Han H, Shi C (2021) Self-supervised heterogeneous graph neural network with co-contrastive learning. KDD arXiv:2105.09111","DOI":"10.1145\/3447548.3467415"},{"key":"2443_CR224","unstructured":"Kim D, Oh A (2021) How to find your friendly neighborhood: graph attention design with self-supervision. In: ICLR. https:\/\/openreview.net\/forum?id=Wi5KUNlqWty"},{"key":"2443_CR225","unstructured":"Sun M, Xing J, Wang H, Chen B, Zhou J (2021) Mocl: contrastive learning on molecular graphs with multi-level domain knowledge. CoRR arXiv:2106.04509"},{"key":"2443_CR226","doi-asserted-by":"crossref","unstructured":"Schneider S, Baevski A, Collobert R, Auli M (2019) wav2vec: unsupervised pre-training for speech recognition. In: INTERSPEECH","DOI":"10.21437\/Interspeech.2019-1873"},{"key":"2443_CR227","unstructured":"Baevski A, Schneider S, Auli M (2020) vq-wav2vec: self-supervised learning of discrete speech representations. In: ICLR"},{"key":"2443_CR228","unstructured":"Baevski A, Zhou Y, Mohamed A, Auli M (2020) wav2vec 2.0: a framework for self-supervised learning of speech representations. In: NeurIPS"},{"key":"2443_CR229","doi-asserted-by":"crossref","unstructured":"Chung Y, Glass JR (2020) Generative pre-training for speech with autoregressive predictive coding. In: ICASSP","DOI":"10.1109\/ICASSP40776.2020.9054438"},{"key":"2443_CR230","doi-asserted-by":"crossref","unstructured":"Song X, Wang G, Huang Y, Wu Z, Su D, Meng H (2020) Speech-xlnet: unsupervised acoustic model pretraining for self-attention networks. In: INTERSPEECH","DOI":"10.21437\/Interspeech.2020-1511"},{"key":"2443_CR231","doi-asserted-by":"crossref","unstructured":"Chung Y, Wang Y, Hsu W, Zhang Y, Skerry-Ryan RJ (2019) Semi-supervised training for improving data efficiency in end-to-end speech synthesis. In: ICASSP","DOI":"10.1109\/ICASSP.2019.8683862"},{"key":"2443_CR232","doi-asserted-by":"crossref","unstructured":"Denisov P, Vu NT (2020) Pretrained semantic speech embeddings for end-to-end spoken language understanding via cross-modal teacher-student learning. In: Interspeech","DOI":"10.21437\/Interspeech.2020-2456"},{"key":"2443_CR233","doi-asserted-by":"crossref","unstructured":"Chung Y-A, Zhu C, Zeng M (2021) SPLAT: speech-language joint pre-training for spoken language understanding. In: ACL","DOI":"10.18653\/v1\/2021.naacl-main.152"},{"key":"2443_CR234","doi-asserted-by":"crossref","unstructured":"Zeng M, Tan X, Wang R, Ju Z, Qin T, Liu T-Y (2021) Musicbert: symbolic music understanding with large-scale pre-training. arXiv preprint arXiv:2106.05630","DOI":"10.18653\/v1\/2021.findings-acl.70"},{"key":"2443_CR235","doi-asserted-by":"crossref","unstructured":"Huang Y-S, Yang Y-H (2020) Pop music transformer: Beat-based modeling and generation of expressive pop piano compositions. In: Proceedings of the 28th ACM international conference on multimedia, pp 1180\u20131188","DOI":"10.1145\/3394171.3413671"},{"key":"2443_CR236","unstructured":"Verma P, Berger J (2021) Audio transformers: Transformer architectures for large scale audio understanding. adieu convolutions. arXiv preprint arXiv:2105.00335"},{"key":"2443_CR237","doi-asserted-by":"crossref","unstructured":"Fernando B, Bilen H, Gavves E, Gould S (2017) Self-supervised video representation learning with odd-one-out networks. In: CVPR","DOI":"10.1109\/CVPR.2017.607"},{"key":"2443_CR238","doi-asserted-by":"crossref","unstructured":"Misra I, Zitnick CL, Hebert M (2016) Shuffle and learn: unsupervised learning using temporal order verification. In: ECCV","DOI":"10.1007\/978-3-319-46448-0_32"},{"key":"2443_CR239","doi-asserted-by":"crossref","unstructured":"Kim D, Cho D, Kweon IS (2019) Self-supervised video representation learning with space-time cubic puzzles. In: AAAI","DOI":"10.1609\/aaai.v33i01.33018545"},{"key":"2443_CR240","doi-asserted-by":"crossref","unstructured":"Tao L, Wang X, Yamasaki T (2020) Self-supervised video representation learning using inter-intra contrastive framework. In: ACM multimedia","DOI":"10.1145\/3394171.3413694"},{"key":"2443_CR241","doi-asserted-by":"crossref","unstructured":"Lorre G, Rabarisoa J, Orcesi A, Ainouz S, Canu S (2020) Temporal contrastive pretraining for video action recognition. In: WACV","DOI":"10.1109\/WACV45572.2020.9093278"},{"key":"2443_CR242","doi-asserted-by":"crossref","unstructured":"Yao T, Zhang Y, Qiu Z, Pan Y, Mei T (2020) Seco: Exploring sequence supervision for unsupervised representation learning. arXiv","DOI":"10.1609\/aaai.v35i12.17274"},{"key":"2443_CR243","unstructured":"Li LH, Yatskar M, Yin D, Hsieh C, Chang K (2019) Visualbert: a simple and performant baseline for vision and language. CoRR arXiv:1908.03557"},{"key":"2443_CR244","doi-asserted-by":"crossref","unstructured":"Li G, Duan N, Fang Y, Gong M, Jiang D (2020) Unicoder-vl: a universal encoder for vision and language by cross-modal pre-training. In: AAAI","DOI":"10.1609\/aaai.v34i07.6795"},{"key":"2443_CR245","unstructured":"Su W, Zhu X, Cao Y, Li B, Lu L, Wei F, Dai J (2020) VL-BERT: pre-training of generic visual-linguistic representations. In: ICLR"},{"key":"2443_CR246","unstructured":"Lu J, Batra D, Parikh D, Lee S (2019) Vilbert: pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. In: NeurIPS"},{"key":"2443_CR247","unstructured":"Ramesh A, Dhariwal P, Nichol A, Chu C, Chen M (2022) Hierarchical text-conditional image generation with clip latents. arXiv preprint arXiv:2204.06125"},{"key":"2443_CR248","unstructured":"Radford A, Kim JW, Hallacy C, Ramesh A, Goh G, Agarwal S, Sastry G, Askell A, Mishkin P, Clark J, et\u00a0al. (2021) Learning transferable visual models from natural language supervision. In: International conference on machine learning. PMLR, pp 8748\u20138763"},{"key":"2443_CR249","unstructured":"Nichol A, Dhariwal P, Ramesh A, Shyam P, Mishkin P, McGrew B, Sutskever I, Chen M (2021) Glide: towards photorealistic image generation and editing with text-guided diffusion models. arXiv preprint arXiv:2112.10741"},{"key":"2443_CR250","doi-asserted-by":"crossref","unstructured":"Sayed N, Brattoli B, Ommer B (2018) Cross and learn: cross-modal self-supervision. In: GCPR","DOI":"10.1007\/978-3-030-12939-2_17"},{"key":"2443_CR251","doi-asserted-by":"crossref","unstructured":"Ren Z, Lee YJ (2018) Cross-domain self-supervised multi-task feature learning using synthetic imagery. In: CVPR","DOI":"10.1109\/CVPR.2018.00086"},{"key":"2443_CR252","doi-asserted-by":"crossref","unstructured":"Tian Y, Krishnan D, Isola P (2020) Contrastive multiview coding. In: ECCV","DOI":"10.1007\/978-3-030-58621-8_45"},{"key":"2443_CR253","doi-asserted-by":"crossref","unstructured":"Zlotchevski A, Drain D, Svyatkovskiy A, Clement CB, Sundaresan N, Tufano M (2022) Exploring and evaluating personalized models for code generation. In: Proceedings of the 30th ACM joint European software engineering conference and symposium on the foundations of software engineering, pp 1500\u20131508","DOI":"10.1145\/3540250.3558959"},{"key":"2443_CR254","doi-asserted-by":"crossref","unstructured":"Thakur, S., Ahmad B, Fan Z, Pearce H, Tan B, Karri R, Dolan-Gavitt B, Garg S (2022) Benchmarking large language models for automated verilog rtl code generation. arXiv preprint arXiv:2212.11140","DOI":"10.23919\/DATE56975.2023.10137086"},{"key":"2443_CR255","unstructured":"Nijkamp E, Pang B, Hayashi H, Tu L, Wang H, Zhou Y, Savarese S, Xiong C (2022) Codegen: an open large language model for code with multi-turn program synthesis. arXiv preprint arXiv:2203.13474"},{"key":"2443_CR256","unstructured":"Poesia G, Polozov O, Le V, Tiwari A, Soares G, Meek C, Gulwani S (2022) Synchromesh: reliable code generation from pre-trained language models. arXiv preprint arXiv:2201.11227"},{"key":"2443_CR257","doi-asserted-by":"crossref","unstructured":"Chen Y-C, Li L, Yu L, El\u00a0Kholy A, Ahmed F, Gan Z, Cheng Y, Liu J (2020) Uniter: universal image-text representation learning. In: European conference on computer vision. Springer, pp 104\u2013120","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"2443_CR258","doi-asserted-by":"crossref","unstructured":"Zhu X, Zhu J, Li H, Wu X, Li H, Wang X, Dai J (2022) Uni-perceiver: pre-training unified architecture for generic perception for zero-shot and few-shot tasks. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 16804\u201316815","DOI":"10.1109\/CVPR52688.2022.01630"},{"key":"2443_CR259","unstructured":"Reed S, Zolna K, Parisotto E, Colmenarejo SG, Novikov A, Barth-Maron G, Gimenez M, Sulsky Y, Kay J, Springenberg JT et\u00a0al (2022) A generalist agent. arXiv preprint arXiv:2205.06175"},{"key":"2443_CR260","doi-asserted-by":"crossref","unstructured":"Li W, Gao C, Niu G, Xiao X, Liu H, Liu J, Wu H, Wang H (2020) Unimo: towards unified-modal understanding and generation via cross-modal contrastive learning. arXiv preprint arXiv:2012.15409","DOI":"10.18653\/v1\/2021.acl-long.202"},{"key":"2443_CR261","unstructured":"Stadie BC, Levine S, Abbeel P (2015) Incentivizing exploration in reinforcement learning with deep predictive models. arXiv preprint arXiv:1507.00814"},{"key":"2443_CR262","unstructured":"Achiam J, Sastry S (2017) Surprise-based intrinsic motivation for deep reinforcement learning. arXiv preprint arXiv:1703.01732"},{"key":"2443_CR263","doi-asserted-by":"crossref","unstructured":"Pathak D, Agrawal P, Efros AA, Darrell T (2017) Curiosity-driven exploration by self-supervised prediction. In: International conference on machine learning. PMLR, pp 2778\u20132787","DOI":"10.1109\/CVPRW.2017.70"},{"key":"2443_CR264","unstructured":"Tang H, Houthooft R, Foote D, Stooke A, Xi\u00a0Chen O, Duan Y, Schulman J, DeTurck F, Abbeel P (2017) # Exploration: a study of count-based exploration for deep reinforcement learning. Adv Neural Inf Process Syst 30"},{"key":"2443_CR265","unstructured":"Dey P, Medya S (2019) Manipulating node similarity measures in network. arXiv"},{"key":"2443_CR266","first-page":"764","volume":"34","author":"B Han","year":"2021","unstructured":"Han B, Zheng C, Chan H, Paster K, Zhang M, Ba J (2021) Learning domain invariant representations in goal-conditioned block mdps. Adv Neural Inf Process Syst 34:764\u2013776","journal-title":"Adv Neural Inf Process Syst"},{"key":"2443_CR267","unstructured":"Ding Y, Florensa C, Abbeel P, Phielipp M (2019) Goal-conditioned imitation learning. Adv Neural Inf Process Syst 32"},{"key":"2443_CR268","unstructured":"Shah R, Kumar V (2021) Rrl: Resnet as representation for reinforcement learning. arXiv preprint arXiv:2107.03380"},{"key":"2443_CR269","unstructured":"Xiao T, Radosavovic I, Darrell T, Malik J (2022) Masked visual pre-training for motor control. arXiv preprint arXiv:2203.06173"},{"key":"2443_CR270","first-page":"12686","volume":"34","author":"M Schwarzer","year":"2021","unstructured":"Schwarzer M, Rajkumar N, Noukhovitch M, Anand A, Charlin L, Hjelm RD, Bachman P, Courville AC (2021) Pretraining representations for data-efficient reinforcement learning. Adv Neural Inf Process Syst 34:12686\u201312699","journal-title":"Adv Neural Inf Process Syst"},{"key":"2443_CR271","unstructured":"Schwarzer M, Anand A, Goel R, Hjelm RD, Courville A, Bachman P (2020) Data-efficient reinforcement learning with self-predictive representations. arXiv preprint arXiv:2007.05929"},{"key":"2443_CR272","doi-asserted-by":"publisher","unstructured":"Ha D, Schmidhuber J (2018) World Models https:\/\/doi.org\/10.5281\/zenodo.1207631arXiv:1803.10122 [cs, stat]","DOI":"10.5281\/zenodo.1207631"},{"key":"2443_CR273","doi-asserted-by":"publisher","unstructured":"Jaderberg M, Mnih V, Czarnecki WM, Schaul T, Leibo JZ, Silver D, Kavukcuoglu K (2016) Reinforcement learning with unsupervised auxiliary tasks. arXiv. https:\/\/doi.org\/10.48550\/arXiv.1611.05397","DOI":"10.48550\/arXiv.1611.05397"},{"key":"2443_CR274","doi-asserted-by":"publisher","unstructured":"Higgins I, Pal A, Rusu AA, Matthey L, Burgess CP, Pritzel A, Botvinick M, Blundell C, Lerchner A (2018) DARLA: improving zero-shot transfer in reinforcement learning. arXiv. https:\/\/doi.org\/10.48550\/arXiv.1707.08475","DOI":"10.48550\/arXiv.1707.08475"},{"key":"2443_CR275","unstructured":"Finn C, Yu T, Fu J, Abbeel P, Levine S (2016) Generalizing skills with semi-supervised reinforcement learning. arXiv preprint arXiv:1612.00429"},{"key":"2443_CR276","unstructured":"Shah R, Kumar V (2021) RRL: Resnet as representation for reinforcement learning. arXiv"},{"key":"2443_CR277","doi-asserted-by":"publisher","unstructured":"Schwarzer M, Anand A, Goel R, Hjelm RD, Courville A, Bachman P (2021) Data-efficient reinforcement learning with self-predictive representations. arXiv. https:\/\/doi.org\/10.48550\/arXiv.2007.05929","DOI":"10.48550\/arXiv.2007.05929"},{"key":"2443_CR278","unstructured":"Hafner D, Lillicrap T, Ba J, Norouzi M (2019) Dream to control: learning behaviors by latent imagination. arXiv preprint arXiv:1912.01603"},{"key":"2443_CR279","unstructured":"Hafner D, Lillicrap T, Norouzi M, Ba J (2020) Mastering atari with discrete world models. arXiv preprint arXiv:2010.02193"},{"key":"2443_CR280","unstructured":"Deng F, Jang I, Ah, S (2022) Dreamerpro: reconstruction-free model-based reinforcement learning with prototypical representations. In: International conference on machine learning. PMLR, pp 4956\u20134975"},{"key":"2443_CR281","unstructured":"Wu P, Escontrela A, Hafner D, Goldberg K, Abbeel P (2022) Daydreamer: world models for physical robot learning. arXiv preprint arXiv:2206.14176"},{"key":"2443_CR282","unstructured":"Laskin M, Srinivas A, Abbeel P (2020) Curl: contrastive unsupervised representations for reinforcement learning. In: International conference on machine learning. PMLR, pp 5639\u20135650"},{"key":"2443_CR283","first-page":"19884","volume":"33","author":"M Laskin","year":"2020","unstructured":"Laskin M, Lee K, Stooke A, Pinto L, Abbeel P, Srinivas A (2020) Reinforcement learning with augmented data. Adv Neural Inf Process Syst 33:19884\u201319895","journal-title":"Adv Neural Inf Process Syst"},{"key":"2443_CR284","unstructured":"Kostrikov I, Yarats D, Fergus R (2020) Image augmentation is all you need: regularizing deep reinforcement learning from pixels. arXiv preprint arXiv:2004.13649"},{"key":"2443_CR285","unstructured":"Yarats D, Fergus R, Lazaric A, Pinto L (2021) Mastering visual continuous control: improved data-augmented reinforcement learning. arXiv preprint arXiv:2107.09645"},{"key":"2443_CR286","unstructured":"Nair S, Rajeswaran A, Kumar V, Finn C, Gupta A (2022) R3m: a universal visual representation for robot manipulation. arXiv preprint arXiv:2203.12601"},{"key":"2443_CR287","unstructured":"Parisi S, Rajeswaran A, Purushwalkam S, Gupta A (2022) The unsurprising effectiveness of pre-trained vision models for control. arXiv preprint arXiv:2203.03580"},{"key":"2443_CR288","unstructured":"Zhou C, Yan Q, Shi Y, Sun L (2022) $$\\{$$DoubleStar$$\\}$$:$$\\{$$Long-Range$$\\}$$ attack towards depth estimation based obstacle avoidance in autonomous systems. In: 31st USENIX security symposium (USENIX Security 22), pp 1885\u20131902"},{"key":"2443_CR289","doi-asserted-by":"crossref","unstructured":"Zhou C, Yan Q, Kent D, Wang G, Zhang Z, Radha H (2024) Optical lens attack on deep learning based monocular depth estimation. arXiv preprint arXiv:2409.17376","DOI":"10.1007\/978-3-031-94445-1_12"},{"key":"2443_CR290","doi-asserted-by":"crossref","unstructured":"Zhou C, Yan Q, Liu S (2024) Transient adversarial 3d projection attacks on object detection in autonomous driving. arXiv preprint arXiv:2409.17403","DOI":"10.1007\/978-3-031-93354-7_12"},{"key":"2443_CR291","doi-asserted-by":"crossref","unstructured":"Jin D, Jin Z, Zhou JT, Szolovits P (2020) Is bert really robust? A strong baseline for natural language attack on text classification and entailment. In: AAAI","DOI":"10.1609\/aaai.v34i05.6311"},{"key":"2443_CR292","doi-asserted-by":"crossref","unstructured":"Zang Y, Qi F, Yang C, Liu Z, Zhang M, Liu Q, Sun M (2020) Word-level textual adversarial attacking as combinatorial optimization. In: ACL","DOI":"10.18653\/v1\/2020.acl-main.540"},{"key":"2443_CR293","doi-asserted-by":"crossref","unstructured":"Wallace E, Feng S, Kandpal N, Gardner M, Singh S (2019) Universal adversarial triggers for attacking and analyzing NLP. In: EMNLP-IJCNLP","DOI":"10.18653\/v1\/D19-1221"},{"key":"2443_CR294","doi-asserted-by":"crossref","unstructured":"Kurita K, Michel P, Neubig G (2020) Weight poisoning attacks on pretrained models. In: ACL","DOI":"10.18653\/v1\/2020.acl-main.249"},{"key":"2443_CR295","doi-asserted-by":"crossref","unstructured":"Schuster R, Schuster T, Meri Y, Shmatikov V (2020) Humpty dumpty: controlling word meanings via corpus poisoning. In: IEEE symposium on security and privacy","DOI":"10.1109\/SP40000.2020.00115"},{"key":"2443_CR296","doi-asserted-by":"crossref","unstructured":"Bao R, Wang J, Zhao H (2021) Defending pre-trained language models from adversarial word substitution without performance sacrifice. In: ACL\/IJCNLP","DOI":"10.18653\/v1\/2021.findings-acl.287"},{"key":"2443_CR297","doi-asserted-by":"crossref","unstructured":"Zhang Z, Li Y, Wang J, Liu B, Li D, Guo Y, Chen X, Liu Y (2022) Remos: reducing defect inheritance in transfer learning via relevant model slicing. In: Proceedings of the 44th international conference on software engineering, pp 1856\u20131868","DOI":"10.1145\/3510003.3510191"},{"key":"2443_CR298","unstructured":"Carlini N, Tramer F, Wallace E, Jagielski M, Herbert-Voss A, Lee K, Roberts A, Brown TB, Song D, Erlingsson U et\u00a0al (2021) Extracting training data from large language models. In: USENIX security symposium, vol 6"},{"key":"2443_CR299","doi-asserted-by":"crossref","unstructured":"Wang G, Zhang L, Yang Z, Li X-Y (2018) Socialite: social activity mining and friend auto-labeling. In: 2018 IEEE 37th international performance computing and communications conference (IPCCC). IEEE, pp 1\u20138","DOI":"10.1109\/PCCC.2018.8710834"},{"key":"2443_CR300","doi-asserted-by":"crossref","unstructured":"Han F, Zhang L, You X, Wang G, Li X-Y (2019) Shad: privacy-friendly shared activity detection and data sharing. In: 2019 IEEE 16th international conference on mobile ad hoc and sensor systems (MASS). IEEE, pp 109\u2013117","DOI":"10.1109\/MASS.2019.00022"},{"key":"2443_CR301","doi-asserted-by":"crossref","unstructured":"Chen T, Zhai X, Ritter M, Lucic M, Houlsby N (2019) Self-supervised gans via auxiliary rotation loss. In: CVPR","DOI":"10.1109\/CVPR.2019.01243"},{"key":"2443_CR302","unstructured":"Abnar S, Dehghani M, Neyshabur B, Sedghi H (2021) Exploring the limits of large scale pre-training. arXiv"},{"key":"2443_CR303","doi-asserted-by":"crossref","unstructured":"Peters ME, Neumann M, Iyyer M, Gardner M, Clark C, Lee K, Zettlemoyer L (2018) Deep contextualized word representations. In: NAACL-HLT","DOI":"10.18653\/v1\/N18-1202"},{"key":"2443_CR304","unstructured":"Mikolov T, Sutskever I, Chen K, Corrado GS, Dean J (2013) Distributed representations of words and phrases and their compositionality. In: NIPS"},{"key":"2443_CR305","unstructured":"Collobert R, Weston J, Bottou L, Karlen M, Kavukcuoglu K, Kuksa PP (2011) Natural language processing (almost) from scratch. J Mach Learn Res 12:2493\u20132537"},{"key":"2443_CR306","doi-asserted-by":"publisher","unstructured":"Neelakantan A, Shankar J, Passos A, McCallum A (2014) Efficient non-parametric estimation of multiple embeddings per word in vector space. In: EMNLP. https:\/\/doi.org\/10.3115\/v1\/d14-1113","DOI":"10.3115\/v1\/d14-1113"},{"key":"2443_CR307","unstructured":"Zhou P, Qi Z, Zheng S, Xu J, Bao H, Xu B (2016) Text classification improved by integrating bidirectional LSTM with two-dimensional max pooling. In: COLING"},{"key":"2443_CR308","unstructured":"Hui DU, Xueke XU, Dayong WU, Liu Y, Zhihua YU, Cheng X (2017) A sentiment classification method based on sentiment-specific word embedding. J Chin Inf Process 31(3):170\u2013176"},{"key":"2443_CR309","unstructured":"Liu Y, Ma C, Zhang Y (2017) Hierarchical machine translation model based on deep recursive neural network. Chin J Comput 40(4):861\u2013871"},{"key":"2443_CR310","unstructured":"Liang X, Ren F, Liu Y, Pan L, Hou Y, Zhang Y, Yan LI (2018) N-reader: machine reading comprehension model based on double layers of self-attention. J Chin Inf Process"},{"key":"2443_CR311","unstructured":"Zhichang Z, Zhenwen Z, Zhiman Z (2019) User intent classification based on indrnn-attention. J Comput Res Dev"},{"key":"2443_CR312","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li L-J, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"2443_CR313","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2012) Imagenet classification with deep convolutional neural networks. Adv Neural Inf Proces Syst"},{"key":"2443_CR314","unstructured":"Lin M, Chen Q, Yan S (2013) Network in network. arXiv"},{"key":"2443_CR315","unstructured":"Simonyan K, Zisserman A (2014) Very deep convolutional networks for large-scale image recognition. arXiv"},{"key":"2443_CR316","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia Y, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: CVPR","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"2443_CR317","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: CVPR","DOI":"10.1109\/CVPR.2016.90"},{"key":"2443_CR318","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, Van Der\u00a0Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: CVPR","DOI":"10.1109\/CVPR.2017.243"},{"key":"2443_CR319","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: CVPR","DOI":"10.1109\/CVPR.2014.81"},{"key":"2443_CR320","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast r-cnn. In: ICCV","DOI":"10.1109\/ICCV.2015.169"},{"key":"2443_CR321","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: towards real-time object detection with region proposal networks. arXiv"},{"key":"2443_CR322","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask r-cnn. In: ICCV","DOI":"10.1109\/ICCV.2017.322"},{"key":"2443_CR323","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. In: CVPR","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"2443_CR324","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu C-Y, Berg AC (2016) Ssd: single shot multibox detector. In: European conference on computer vision","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"2443_CR325","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: unified, real-time object detection. In: CVPR","DOI":"10.1109\/CVPR.2016.91"},{"key":"2443_CR326","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) Yolo9000: better, faster, stronger. In: CVPR","DOI":"10.1109\/CVPR.2017.690"},{"key":"2443_CR327","unstructured":"Redmon J, Farhadi A (2018) Yolov3: An incremental improvement. arXiv"},{"key":"2443_CR328","unstructured":"Bochkovskiy A, Wang C-Y, Liao H-YM (2020) Yolov4: optimal speed and accuracy of object detection. arXiv"},{"key":"2443_CR329","doi-asserted-by":"crossref","unstructured":"Chen Q, Wang Y, Yang T, Zhang X, Cheng J, Sun J (2021) You only look one-level feature. arXiv","DOI":"10.1109\/CVPR46437.2021.01284"},{"key":"2443_CR330","doi-asserted-by":"crossref","unstructured":"Badrinarayanan V, Kendall A, Cipolla R (2017) Segnet: a deep convolutional encoder-decoder architecture for image segmentation. IEEE Trans  Pattern Anal Mach Intell 39(12):2481\u20132495","DOI":"10.1109\/TPAMI.2016.2644615"},{"key":"2443_CR331","doi-asserted-by":"crossref","unstructured":"Zhao H, Shi J, Qi X, Wang X, Jia J (2017) Pyramid scene parsing network. In: CVPR","DOI":"10.1109\/CVPR.2017.660"},{"key":"2443_CR332","unstructured":"Chen L-C, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2014) Semantic image segmentation with deep convolutional nets and fully connected crfs. arXiv"},{"key":"2443_CR333","doi-asserted-by":"crossref","unstructured":"Chen L-C, Papandreou G, Kokkinos I, Murphy K, Yuille AL (2017) Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs. IEEE Trans  Pattern Anal Mach Intell 40(4):834\u2013848","DOI":"10.1109\/TPAMI.2017.2699184"},{"key":"2443_CR334","unstructured":"Chen L-C, Papandreou G, Schroff F, Adam H (2017) Rethinking atrous convolution for semantic image segmentation. arXiv"},{"key":"2443_CR335","doi-asserted-by":"crossref","unstructured":"Chen L-C, Zhu Y, Papandreou G, Schroff F, Adam H (2018) Encoder-decoder with atrous separable convolution for semantic image segmentation. In: ECCV","DOI":"10.1007\/978-3-030-01234-2_49"},{"key":"2443_CR336","doi-asserted-by":"crossref","unstructured":"Lin G, Milan A, Shen C, Reid I (2017) Refinenet: multi-path refinement networks for high-resolution semantic segmentation. In: CVPR","DOI":"10.1109\/CVPR.2017.549"},{"key":"2443_CR337","doi-asserted-by":"crossref","unstructured":"Everingham M, Eslami SA, Van Gool L, Williams CK, Winn J, Zisserman A (2015) The pascal visual object classes challenge: a retrospective. Int J Comput Vision 111:98-136","DOI":"10.1007\/s11263-014-0733-5"},{"key":"2443_CR338","doi-asserted-by":"crossref","unstructured":"Everingham M, Van Gool L, Williams CK, Winn J, Zisserman A (2010) The pascal visual object classes (voc) challenge. Int J Comput Vision 88:303\u2013338","DOI":"10.1007\/s11263-009-0275-4"},{"key":"2443_CR339","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: common objects in context. In: European Conference on Computer Vision","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"2443_CR340","doi-asserted-by":"crossref","unstructured":"Rumelhart DE, Hinton GE, Williams RJ (1986) Learning representations by back-propagating errors. Nature 323(6088):533\u2013536","DOI":"10.1038\/323533a0"},{"key":"2443_CR341","doi-asserted-by":"crossref","unstructured":"Jordan MI (1997) Serial order: a parallel distributed processing approach. In: Advances in psychology, vol 121, pp 471\u2013495. North-Holland","DOI":"10.1016\/S0166-4115(97)80111-2"},{"key":"2443_CR342","doi-asserted-by":"crossref","unstructured":"Elman JL (1990) Finding structure in time. Cogn Sci 14(2):179\u2013211","DOI":"10.1016\/0364-0213(90)90002-E"},{"key":"2443_CR343","doi-asserted-by":"crossref","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput MIT-Press","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"2443_CR344","doi-asserted-by":"crossref","unstructured":"Graves A, Liwicki M, Fern\u00e1ndez S, Bertolami R, Bunke H, Schmidhuber J (2008) A novel connectionist system for unconstrained handwriting recognition. IEEE Trans Pattern Anal Mach Intell 31(5):855\u2013868","DOI":"10.1109\/TPAMI.2008.137"},{"key":"2443_CR345","doi-asserted-by":"crossref","unstructured":"Vinyals O, Toshev A, Bengio S, Erhan D (2015) Show and tell: a neural image caption generator. In: CVPR","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"2443_CR346","unstructured":"Sutskever I, Vinyals O, Le QV (2014) Sequence to sequence learning with neural networks. arXiv"},{"key":"2443_CR347","doi-asserted-by":"crossref","unstructured":"Graves A (2013) Generating sequences with recurrent neural networks. arXiv","DOI":"10.1007\/978-3-642-24797-2"},{"key":"2443_CR348","doi-asserted-by":"crossref","unstructured":"Sundermeyer M, Schl\u00fcter R, Ney H (2012) Lstm neural networks for language modeling. In: INTERSPEECH","DOI":"10.21437\/Interspeech.2012-65"},{"key":"2443_CR349","unstructured":"Goodfellow IJ, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014) Generative adversarial networks. arXiv"},{"key":"2443_CR350","doi-asserted-by":"crossref","unstructured":"Isola P, Zhu J-Y, Zhou T, Efros AA (2017) Image-to-image translation with conditional adversarial networks. In: CVPR","DOI":"10.1109\/CVPR.2017.632"},{"key":"2443_CR351","doi-asserted-by":"crossref","unstructured":"Li C, Wand M (2016) Precomputed real-time texture synthesis with Markovian generative adversarial networks. In: European conference on computer vision","DOI":"10.1007\/978-3-319-46487-9_43"},{"key":"2443_CR352","doi-asserted-by":"crossref","unstructured":"Zhu J-Y, Park T, Isola P, Efros AA (2017) Unpaired image-to-image translation using cycle-consistent adversarial networks. In: ICCV","DOI":"10.1109\/ICCV.2017.244"},{"key":"2443_CR353","doi-asserted-by":"crossref","unstructured":"Karras T, Laine S, Aila T (2019) A style-based generator architecture for generative adversarial networks. In: CVPR","DOI":"10.1109\/CVPR.2019.00453"},{"key":"2443_CR354","unstructured":"Van\u00a0Oord A, Kalchbrenner N, Kavukcuoglu K (2016) Pixel recurrent neural networks. In: International conference on machine learning"},{"key":"2443_CR355","unstructured":"Kim T, Cha M, Kim H, Lee JK, Kim J (2017) Learning to discover cross-domain relations with generative adversarial networks. In: International conference on machine learning"},{"key":"2443_CR356","unstructured":"Denton E, Chintala S, Szlam A, Fergus R (2015) Deep generative image models using a laplacian pyramid of adversarial networks. arXiv"},{"key":"2443_CR357","doi-asserted-by":"crossref","unstructured":"Huang X, Li Y, Poursaeed O, Hopcroft J, Belongie S (2017) Stacked generative adversarial networks. In: CVPR","DOI":"10.1109\/CVPR.2017.202"},{"key":"2443_CR358","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: CVPR","DOI":"10.1109\/CVPR.2018.00745"},{"key":"2443_CR359","doi-asserted-by":"crossref","unstructured":"Woo S, Park J, Lee J-Y, Kweon IS (2018) Cbam: convolutional block attention module. In: ECCV","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"2443_CR360","doi-asserted-by":"crossref","unstructured":"Cao Y, Xu J, Lin S, Wei F, Hu H (2019) Gcnet: non-local networks meet squeeze-excitation networks and beyond. In: ICCV workshops","DOI":"10.1109\/ICCVW.2019.00246"},{"key":"2443_CR361","doi-asserted-by":"crossref","unstructured":"Huang Z, Wang X, Huang L, Huang C, Wei Y, Liu W (2019) Ccnet: Criss-cross attention for semantic segmentation. In: ICCV","DOI":"10.1109\/ICCV.2019.00069"},{"key":"2443_CR362","unstructured":"Zhang H, Goodfellow I, Metaxas D, Odena A (2019) Self-attention generative adversarial networks. In: International conference on machine learning"},{"key":"2443_CR363","unstructured":"Touvron H, Cord M, Douze M, Massa F, Sablayrolles A, J\u00e9gou H (2020) Training data-efficient image transformers & distillation through attention. arXiv"},{"key":"2443_CR364","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European conference on computer vision","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"2443_CR365","doi-asserted-by":"crossref","unstructured":"Graham B, El-Nouby A, Touvron H, Stock P, Joulin A, J\u00e9gou H, Douze M (2021) Levit: a vision transformer in convnet\u2019s clothing for faster inference. arXiv","DOI":"10.1109\/ICCV48922.2021.01204"},{"key":"2443_CR366","unstructured":"Zhou D, Kang B, Jin X, Yang L, Lian X, Jiang Z, Hou Q, Feng J (2021) Deepvit: towards deeper vision transformer. arXiv"},{"key":"2443_CR367","doi-asserted-by":"crossref","unstructured":"Wang W, Xie E, Li X, Fan D-P, Song K, Liang D, Lu T, Luo P, Shao L (2021) Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. arXiv","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"2443_CR368","doi-asserted-by":"crossref","unstructured":"Guan T, Wang J, Lan S, Chandra R, Wu Z, Davis L, Manocha D (2021) M3detr: multi-representation, multi-scale, mutual-relation 3d object detection with transformers. arXiv","DOI":"10.1109\/WACV51458.2022.00235"},{"key":"2443_CR369","doi-asserted-by":"crossref","unstructured":"Valanarasu JMJ, Oza P, Hacihaliloglu I, Patel VM (2021) Medical transformer: gated axial-attention for medical image segmentation. arXiv","DOI":"10.1007\/978-3-030-87193-2_4"},{"key":"2443_CR370","doi-asserted-by":"publisher","first-page":"185","DOI":"10.1109\/TSE.1976.225946","volume":"3","author":"RCT Lee","year":"1976","unstructured":"Lee RCT, Chin YH, Chang SC (1976) Application of principal component analysis to multikey searching. IEEE Trans Softw Eng 3:185\u2013193","journal-title":"IEEE Trans Softw Eng"},{"key":"2443_CR371","unstructured":"Ye J, Janardan R, Li Q (2004) Two-dimensional linear discriminant analysis. In: Advances in neural information processing systems vol 17 [Neural information processing systems, NIPS 2004, December 13\u201318, 2004, Vancouver, British Columbia, Canada], pp 1569\u20131576"},{"key":"2443_CR372","doi-asserted-by":"publisher","first-page":"555","DOI":"10.2307\/256693","volume":"38","author":"S Robinson","year":"1995","unstructured":"Robinson S, Bennett R (1995) A typology of deviant workplace behaviors: a multidimensional scaling study. Acad Manag J 38:555\u2013572","journal-title":"Acad Manag J"},{"key":"2443_CR373","doi-asserted-by":"publisher","first-page":"968","DOI":"10.1016\/j.patrec.2005.11.017","volume":"9","author":"O Samko","year":"2006","unstructured":"Samko O, Marshall AD, Rosin PL (2006) Selection of the optimal parameter value for the isomap algorithm. Pattern Recogn. Lett. 9:968\u2013979","journal-title":"Pattern Recogn. Lett."},{"issue":"5500","key":"2443_CR374","doi-asserted-by":"publisher","first-page":"2323","DOI":"10.1126\/science.290.5500.2323","volume":"290","author":"ST Roweis","year":"2000","unstructured":"Roweis ST, Saul LK (2000) Nonlinear dimensionality reduction by locally linear embedding. Science 290(5500):2323\u20132326","journal-title":"Science"},{"key":"2443_CR375","doi-asserted-by":"publisher","first-page":"1373","DOI":"10.1162\/089976603321780317","volume":"6","author":"M Belkin","year":"2003","unstructured":"Belkin M, Niyogi P (2003) Laplacian eigenmaps for dimensionality reduction and data representation. Neural Comput 6:1373\u20131396","journal-title":"Neural Comput"},{"key":"2443_CR376","doi-asserted-by":"crossref","unstructured":"Singh AP, Gordon GJ. Relational learning via collective matrix factorization. In: ACM SIGKDD, pp 650\u2013658","DOI":"10.1145\/1401890.1401969"},{"key":"2443_CR377","doi-asserted-by":"crossref","unstructured":"Cao S, Lu W, Xu Q (2015) Grarep: learning graph representations with global structural information. In: CIKM, pp 891\u2013900","DOI":"10.1145\/2806416.2806512"},{"key":"2443_CR378","doi-asserted-by":"crossref","unstructured":"Ou M, Cui P, Pei J, Zhang Z, Zhu W (2016) Asymmetric transitivity preserving graph embedding. In: ACM SIGKDD, pp 1105\u20131114","DOI":"10.1145\/2939672.2939751"},{"key":"2443_CR379","unstructured":"Sugiyama M, Borgwardt KM (2015) Halting in random walk kernels. In: NIPS"},{"key":"2443_CR380","doi-asserted-by":"crossref","unstructured":"Kang U, Tong H, Sun J (2012) Fast random walk graph kernel. In: SIAM, pp 828\u2013838","DOI":"10.1137\/1.9781611972825.71"},{"key":"2443_CR381","first-page":"2539","volume":"12","author":"N Shervashidze","year":"2011","unstructured":"Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM (2011) Weisfeiler-lehman graph kernels. J Mach Learn Res 12:2539\u20132561","journal-title":"J Mach Learn Res"},{"key":"2443_CR382","unstructured":"Erhan D, Manzagol P-A, Bengio Y, Bengio S, Vincent P (2009) The difficulty of training deep architectures and the effect of unsupervised pre-training. In: Artificial intelligence and statistics"},{"key":"2443_CR383","unstructured":"Erhan D, Courville A, Bengio Y, Vincent P (2010) Why does unsupervised pre-training help deep learning? In: AISTATS"},{"key":"2443_CR384","unstructured":"Lee JD, Lei Q, Saunshi N, Zhuo J (2020) Predicting what you already know helps: provable self-supervised learning. arXiv"},{"key":"2443_CR385","unstructured":"Tosh C, Krishnamurthy A, Hsu D (2021) Contrastive learning, multi-view redundancy, and linear models. In: Algorithmic learning theory"},{"key":"2443_CR386","unstructured":"Arora S, Khandeparkar H, Khodak M, Plevrakis O, Saunshi N (2019) A theoretical analysis of contrastive unsupervised representation learning. arXiv"},{"key":"2443_CR387","unstructured":"Anwar S, Tahir M, Li C, Mian A, Khan FS, Muzaffar AW (2020) Image colorization: a survey and dataset. arXiv"},{"key":"2443_CR388","doi-asserted-by":"crossref","unstructured":"Ledig C, Theis L, Husz\u00e1r F, Caballero J, Cunningham A, Acosta A, Aitken A, Tejani A, Totz J, Wang Z et\u00a0al (2017) Photo-realistic single image super-resolution using a generative adversarial network. In: CVPR","DOI":"10.1109\/CVPR.2017.19"},{"key":"2443_CR389","unstructured":"Perarnau G, Van De\u00a0Weijer J, Raducanu B, \u00c1lvarez JM (2016) Invertible conditional gans for image editing. arXiv"},{"key":"2443_CR390","unstructured":"Vondrick C, Pirsiavash H, Torralba A (2016) Generating videos with scene dynamics. arXiv"},{"key":"2443_CR391","doi-asserted-by":"crossref","unstructured":"Tulyakov S, Liu M-Y, Yang X, Kautz J (2018) Mocogan: decomposing motion and content for video generation. In: CVPR","DOI":"10.1109\/CVPR.2018.00165"},{"key":"2443_CR392","doi-asserted-by":"crossref","unstructured":"Wang X, Gupta A (2015) Unsupervised learning of visual representations using videos. In: ICCV","DOI":"10.1109\/ICCV.2015.320"},{"key":"2443_CR393","doi-asserted-by":"crossref","unstructured":"Wei C, Xie L, Ren X, Xia Y, Su C, Liu J, Tian Q, Yuille AL (2019) Iterative reorganization with weak spatial constraints: solving arbitrary jigsaw puzzles for unsupervised representation learning. In: CVPR","DOI":"10.1109\/CVPR.2019.00201"},{"key":"2443_CR394","doi-asserted-by":"crossref","unstructured":"Ahsan U, Madhok R, Essa I (2019) Video jigsaw: Unsupervised learning of spatiotemporal context for video action recognition. In: WACV","DOI":"10.1109\/WACV.2019.00025"},{"key":"2443_CR395","doi-asserted-by":"crossref","unstructured":"Pathak D, Girshick R, Doll\u00e1r P, Darrell T, Hariharan B (2017) Learning features by watching objects move. In: CVPR","DOI":"10.1109\/CVPR.2017.638"},{"key":"2443_CR396","doi-asserted-by":"crossref","unstructured":"Croitoru I, Bogolin S-V, Leordeanu M (2017) Unsupervised learning from video to detect foreground objects in single images. In: ICCV","DOI":"10.1109\/ICCV.2017.465"},{"key":"2443_CR397","doi-asserted-by":"crossref","unstructured":"Sermanet P, Lynch C, Chebotar Y, Hsu J, Jang E, Schaal S, Levine S, Brain G (2018) Time-contrastive networks: self-supervised learning from video. In: ICRA","DOI":"10.1109\/ICRA.2018.8462891"},{"key":"2443_CR398","unstructured":"Korbar B, Tran D, Torresani L (2018) Cooperative learning of audio and video models from self-supervised synchronization. arXiv"},{"key":"2443_CR399","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511809071","author":"CD Manning","year":"2008","unstructured":"Manning CD, Raghavan P, Sch\u00fctze H (2008). Introduction to information retrieval. https:\/\/doi.org\/10.1017\/CBO9780511809071","journal-title":"Introduction to information retrieval."},{"key":"2443_CR400","doi-asserted-by":"publisher","DOI":"10.1023\/A:1007614523901","author":"RE Schapire","year":"1999","unstructured":"Schapire RE, Singer Y (1999) Improved boosting algorithms using confidence-rated predictions. Mach Learn. https:\/\/doi.org\/10.1023\/A:1007614523901","journal-title":"Mach Learn"},{"key":"2443_CR401","doi-asserted-by":"crossref","unstructured":"Reiter E (2018) A structured review of the validity of bleu. Comput Linguist","DOI":"10.1162\/coli_a_00322"},{"key":"2443_CR402","doi-asserted-by":"crossref","unstructured":"Denkowski M, Lavie A (2014) Meteor universal: language specific translation evaluation for any target language. In: Proceedings of the ninth workshop on statistical machine translation","DOI":"10.3115\/v1\/W14-3348"},{"key":"2443_CR403","doi-asserted-by":"crossref","unstructured":"Lin C-Y, Hovy E (2003) Automatic evaluation of summaries using n-gram co-occurrence statistics. In: HLT-NAACL","DOI":"10.3115\/1073445.1073465"},{"key":"2443_CR404","doi-asserted-by":"publisher","unstructured":"Kim Y (2014) Convolutional neural networks for sentence classification. In: EMNLP. https:\/\/doi.org\/10.3115\/v1\/d14-1181","DOI":"10.3115\/v1\/d14-1181"},{"key":"2443_CR405","doi-asserted-by":"crossref","unstructured":"Kalchbrenner N, Grefenstette E, Blunsom P (2014) A convolutional neural network for modelling sentences. In: ACL","DOI":"10.3115\/v1\/P14-1062"},{"key":"2443_CR406","doi-asserted-by":"crossref","unstructured":"Yang M, Zhao W, Ye J, Lei Z, Zhao Z, Zhang S (2018) Investigating capsule networks with dynamic routing for text classification. In: EMNLP","DOI":"10.18653\/v1\/D18-1350"},{"key":"2443_CR407","doi-asserted-by":"publisher","unstructured":"Yao L, Mao C, Luo Y (2019) Graph convolutional networks for text classification. In: AAAI. https:\/\/doi.org\/10.1609\/aaai.v33i01.33017370","DOI":"10.1609\/aaai.v33i01.33017370"},{"key":"2443_CR408","doi-asserted-by":"publisher","unstructured":"Wang Y, Sun A, Han J, Liu Y, Zhu X (2018) Sentiment analysis by capsules In: WWW. https:\/\/doi.org\/10.1145\/3178876.3186015","DOI":"10.1145\/3178876.3186015"},{"key":"2443_CR409","doi-asserted-by":"crossref","unstructured":"Socher R, Perelygin A, Wu J, Chuang J, Manning CD, Ng AY, Potts C (2013) Recursive deep models for semantic compositionality over a sentiment treebank. In: EMNLP","DOI":"10.18653\/v1\/D13-1170"},{"key":"2443_CR410","doi-asserted-by":"publisher","unstructured":"Tai KS, Socher R, Manning CD (2015) Improved semantic representations from tree-structured long short-term memory networks. In: ACL. https:\/\/doi.org\/10.3115\/v1\/p15-1150","DOI":"10.3115\/v1\/p15-1150"},{"key":"2443_CR411","unstructured":"Zhu X, Sobhani P, Guo H (2015) Long short-term memory over recursive structures. In: ICML"},{"key":"2443_CR412","doi-asserted-by":"publisher","unstructured":"Cheng J, Dong L, Lapata M (2016) Long short-term memory-networks for machine reading. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/d16-1053","DOI":"10.18653\/v1\/d16-1053"},{"key":"2443_CR413","doi-asserted-by":"publisher","unstructured":"Liu P, Qiu X, Chen X, Wu S, Huang X (2015) Multi-timescale long short-term memory neural network for modelling sentences and documents. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/d15-1280","DOI":"10.18653\/v1\/d15-1280"},{"key":"2443_CR414","unstructured":"Liu P, Qiu X, Huang X (2016) Recurrent neural network for text classification with multi-task learning. In: IJCAI"},{"key":"2443_CR415","unstructured":"Socher R, Pennington J, Huang EH, Ng AY, Manning CD (2011) Semi-supervised recursive autoencoders for predicting sentiment distributions. In: EMNLP"},{"key":"2443_CR416","unstructured":"Shen T, Zhou T, Long G, Jiang J, Zhang C (2018) Bi-directional block self-attention for fast and memory-efficient sequence modeling. In: ICLR"},{"key":"2443_CR417","unstructured":"Le QV, Mikolov T (2014) Distributed representations of sentences and documents. In: ICML"},{"key":"2443_CR418","doi-asserted-by":"publisher","unstructured":"Iyyer M, Manjunatha V, Boyd-Graber JL, III HD (2015) Deep unordered composition rivals syntactic methods for text classification. In: ACL. https:\/\/doi.org\/10.3115\/v1\/p15-1162","DOI":"10.3115\/v1\/p15-1162"},{"key":"2443_CR419","unstructured":"Miyato T, Dai AM, Goodfellow IJ (2017) Adversarial training methods for semi-supervised text classification. In: ICLR"},{"key":"2443_CR420","doi-asserted-by":"crossref","unstructured":"Lai S, Xu L, Liu K, Zhao J (2015) Recurrent convolutional neural networks for text classification. In: AAAI","DOI":"10.1609\/aaai.v29i1.9513"},{"key":"2443_CR421","unstructured":"Johnson R, Zhang T (2016) Supervised and semi-supervised text categorization using LSTM for region embeddings. In: ICML"},{"key":"2443_CR422","unstructured":"Bao Y, Wu M, Chang S, Barzilay R (2020) Few-shot text classification with distributional signatures. In: ICLR"},{"key":"2443_CR423","unstructured":"Wu F, Jr AHS, Zhang T, Fifty C, Yu T, Weinberger KQ (2019) Simplifying graph convolutional networks. In: ICML"},{"key":"2443_CR424","unstructured":"Zhang X, Zhao JJ, LeCun Y (2015) Character-level convolutional networks for text classification. In: NIPS"},{"key":"2443_CR425","doi-asserted-by":"publisher","unstructured":"Johnson R, Zhang T (2017) Deep pyramid convolutional neural networks for text categorization. In: ACL. https:\/\/doi.org\/10.18653\/v1\/P17-1052","DOI":"10.18653\/v1\/P17-1052"},{"key":"2443_CR426","doi-asserted-by":"publisher","unstructured":"Wang J, Wang Z, Zhang D, Yan J (2017) Combining knowledge with deep convolutional neural networks for short text classification. In: IJCAI. https:\/\/doi.org\/10.24963\/ijcai.2017\/406","DOI":"10.24963\/ijcai.2017\/406"},{"key":"2443_CR427","doi-asserted-by":"publisher","unstructured":"Huang L, Ma D, Li S, Zhang X, Wang H (2019) Text level graph neural network for text classification. In: EMNLP-IJCNLP. https:\/\/doi.org\/10.18653\/v1\/D19-1345","DOI":"10.18653\/v1\/D19-1345"},{"key":"2443_CR428","doi-asserted-by":"publisher","unstructured":"Sun C, Qiu X, Xu Y, Huang X (2019) How to fine-tune BERT for text classification? In: CCL. https:\/\/doi.org\/10.1007\/978-3-030-32381-3_16","DOI":"10.1007\/978-3-030-32381-3_16"},{"key":"2443_CR429","doi-asserted-by":"crossref","unstructured":"Yang Z, Yang D, Dyer C, He X, Smola AJ, Hovy EH (2016) Hierarchical attention networks for document classification. In: NAACL-HLT","DOI":"10.18653\/v1\/N16-1174"},{"key":"2443_CR430","doi-asserted-by":"publisher","unstructured":"Bowman SR. Angeli G, Potts C, Manning CD (2015) A large annotated corpus for learning natural language inference. In: EMNLP.https:\/\/doi.org\/10.18653\/v1\/d15-1075","DOI":"10.18653\/v1\/d15-1075"},{"key":"2443_CR431","doi-asserted-by":"publisher","unstructured":"Wang Z, Hamza W, Florian R (2017) Bilateral multi-perspective matching for natural language sentences. In: IJCAI. https:\/\/doi.org\/10.24963\/ijcai.2017\/579","DOI":"10.24963\/ijcai.2017\/579"},{"key":"2443_CR432","doi-asserted-by":"publisher","unstructured":"Liu X, He P, Chen W, Gao J (2019) Multi-task deep neural networks for natural language understanding. In: ACL. https:\/\/doi.org\/10.18653\/v1\/p19-1441","DOI":"10.18653\/v1\/p19-1441"},{"key":"2443_CR433","doi-asserted-by":"publisher","unstructured":"Williams A, Nangia N, Bowman SR (2018) A broad-coverage challenge corpus for sentence understanding through inference. In: NAACL-HLT. https:\/\/doi.org\/10.18653\/v1\/n18-1101","DOI":"10.18653\/v1\/n18-1101"},{"key":"2443_CR434","doi-asserted-by":"publisher","unstructured":"Marelli M, Bentivogli L, Baroni M, Bernardi R, Menini S, Zamparelli R (2014) Semeval-2014 task 1: Evaluation of compositional distributional semantic models on full sentences through semantic relatedness and textual entailment. In: SemEval@COLING. https:\/\/doi.org\/10.3115\/v1\/s14-2001","DOI":"10.3115\/v1\/s14-2001"},{"key":"2443_CR435","doi-asserted-by":"crossref","unstructured":"Dolan B, Quirk C, Brockett C (2004) Unsupervised construction of large paraphrase corpora: Exploiting massively parallel news sources. In: COLING","DOI":"10.3115\/1220355.1220406"},{"key":"2443_CR436","doi-asserted-by":"publisher","unstructured":"Fu J, Liu P, Neubig G (2020) Interpretable multi-dataset evaluation for named entity recognition. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.489","DOI":"10.18653\/v1\/2020.emnlp-main.489"},{"key":"2443_CR437","doi-asserted-by":"publisher","unstructured":"Lester B, Pressel D, Hemmeter A, Choudhury SR, Bangalore S (2020) Constrained decoding for computationally efficient named entity recognition taggers. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.findings-emnlp.166","DOI":"10.18653\/v1\/2020.findings-emnlp.166"},{"key":"2443_CR438","doi-asserted-by":"publisher","unstructured":"Luo Y, Zhao H, Zhan J (2020) Named entity recognition only from word embeddings. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.723","DOI":"10.18653\/v1\/2020.emnlp-main.723"},{"key":"2443_CR439","doi-asserted-by":"publisher","unstructured":"Li X, Feng J, Meng Y, Han Q, Wu F, Li J (2020) A unified MRC framework for named entity recognition. In: ACL. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.519","DOI":"10.18653\/v1\/2020.acl-main.519"},{"key":"2443_CR440","doi-asserted-by":"publisher","unstructured":"Zhang Y, Yang J (2018) Chinese NER using lattice LSTM. In: ACL. https:\/\/doi.org\/10.18653\/v1\/P18-1144","DOI":"10.18653\/v1\/P18-1144"},{"key":"2443_CR441","unstructured":"Meng Y, Wu W, Wang F, Li X, Nie P, Yin F, Li M, Han Q, Sun X, Li J (2019) Glyce: Glyph-vectors for Chinese character representations. In: NeurIPS"},{"key":"2443_CR442","doi-asserted-by":"publisher","unstructured":"Katiyar A, Cardie C (2018) Nested named entity recognition revisited. In: NAACL-HLT. https:\/\/doi.org\/10.18653\/v1\/n18-1079","DOI":"10.18653\/v1\/n18-1079"},{"key":"2443_CR443","doi-asserted-by":"publisher","unstructured":"Wang B, Lu W (2018) Neural segmental hypergraphs for overlapping mention recognition. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/d18-1019","DOI":"10.18653\/v1\/d18-1019"},{"key":"2443_CR444","doi-asserted-by":"publisher","unstructured":"Luan Y, Wadden D, He L, Shah A, Ostendorf M, Hajishirzi H (2019) A general framework for information extraction using dynamic span graphs. In: NAACL-HLT. https:\/\/doi.org\/10.18653\/v1\/n19-1308","DOI":"10.18653\/v1\/n19-1308"},{"key":"2443_CR445","doi-asserted-by":"crossref","unstructured":"Shibuya T, Hovy EH (2020) Nested named entity recognition via second-best sequence learning and decoding. Trans Assoc Comput Linguist 8:605\u2013620","DOI":"10.1162\/tacl_a_00334"},{"key":"2443_CR446","doi-asserted-by":"publisher","unstructured":"Lin H, Lu Y, Han X, Sun L (2019) Sequence-to-nuggets: Nested entity mention detection via anchor-region networks. In: ACL. https:\/\/doi.org\/10.18653\/v1\/p19-1511","DOI":"10.18653\/v1\/p19-1511"},{"key":"2443_CR447","doi-asserted-by":"publisher","unstructured":"Lai G, Xie Q, Liu H, Yang Y, Hovy EH (2017) RACE: large-scale reading comprehension dataset from examinations. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/d17-1082","DOI":"10.18653\/v1\/d17-1082"},{"key":"2443_CR448","doi-asserted-by":"publisher","unstructured":"Yang Y, Yih W, Meek C (2015) Wikiqa: a challenge dataset for open-domain question answering. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/d15-1237","DOI":"10.18653\/v1\/d15-1237"},{"key":"2443_CR449","unstructured":"Santos CN, Tan M, Xiang B, Zhou B (2016) Attentive pooling networks. CoRR arXiv:1602.03609"},{"key":"2443_CR450","doi-asserted-by":"publisher","unstructured":"Lee JY, Dernoncourt F (2016) Sequential short-text classification with recurrent and convolutional neural networks. In: NAACL-HLT. https:\/\/doi.org\/10.18653\/v1\/n16-1062","DOI":"10.18653\/v1\/n16-1062"},{"key":"2443_CR451","doi-asserted-by":"publisher","unstructured":"Kim S, D\u2019Haro LF, Banchs RE, Williams JD, Henderson M (2016) The fourth dialog state tracking challenge. In: Dialogues with social robots\u2014enablements, analyses, and evaluation, seventh international workshop on spoken dialogue systems, IWSDS 2016, Saariselk\u00e4, Finland, January 13\u201316, 2016. https:\/\/doi.org\/10.1007\/978-981-10-2585-3_36","DOI":"10.1007\/978-981-10-2585-3_36"},{"key":"2443_CR452","doi-asserted-by":"publisher","unstructured":"Ang J, Liu Y, Shriberg E (2005) Automatic dialog act segmentation and classification in multiparty meetings. In: 2005 IEEE international conference on acoustics, speech, and signal processing, ICASSP \u201905, Philadelphia, Pennsylvania, USA, March 18\u201323, 2005.https:\/\/doi.org\/10.1109\/ICASSP.2005.1415300","DOI":"10.1109\/ICASSP.2005.1415300"},{"key":"2443_CR453","doi-asserted-by":"publisher","unstructured":"Wan Y, Yan W, Gao J, Zhao Z, Wu J, Yu PS (2018) Improved dynamic memory network for dialogue act classification with adversarial training. In: IEEE international conference on Big Data, Big Data 2018, Seattle, WA, USA, December 10\u201313, 2018. https:\/\/doi.org\/10.1109\/BigData.2018.8622245","DOI":"10.1109\/BigData.2018.8622245"},{"key":"2443_CR454","doi-asserted-by":"publisher","unstructured":"Raheja V, Tetreault JR (2019) Dialogue act classification with context-aware self-attention. In: Proc. NAACL, 2019. https:\/\/doi.org\/10.18653\/v1\/n19-1373","DOI":"10.18653\/v1\/n19-1373"},{"key":"2443_CR455","doi-asserted-by":"publisher","unstructured":"Xu J, Gan Z, Cheng Y, Liu J (2020) Discourse-aware neural extractive text summarization. In: ACL. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.451","DOI":"10.18653\/v1\/2020.acl-main.451"},{"key":"2443_CR456","doi-asserted-by":"publisher","unstructured":"Zou Y, Zhang X, Lu W, Wei F, Zhou M (2020) Pre-training for abstractive document summarization by reinstating source text. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.297","DOI":"10.18653\/v1\/2020.emnlp-main.297"},{"key":"2443_CR457","doi-asserted-by":"crossref","unstructured":"Liu L, Lu Y, Yang M, Qu Q, Zhu J, Li H (2018) Generative adversarial network for abstractive text summarization. In: AAAI","DOI":"10.1609\/aaai.v32i1.12141"},{"key":"2443_CR458","doi-asserted-by":"publisher","unstructured":"Yang M, Qu Q, Tu W, Shen Y, Zhao Z, Chen X (2019) Exploring human-like reading strategy for abstractive text summarization. In: AAAI. https:\/\/doi.org\/10.1609\/aaai.v33i01.33017362","DOI":"10.1609\/aaai.v33i01.33017362"},{"key":"2443_CR459","doi-asserted-by":"publisher","unstructured":"Bhandari M, Gour PN, Ashfaq A, Liu P, Neubig G (2020) Re-evaluating evaluation in text summarization. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.751","DOI":"10.18653\/v1\/2020.emnlp-main.751"},{"key":"2443_CR460","doi-asserted-by":"publisher","unstructured":"Dong Y, Wang S, Gan Z, Cheng Y, Cheung JCK, Liu J (2020) Multi-fact correction in abstractive text summarization. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.749","DOI":"10.18653\/v1\/2020.emnlp-main.749"},{"key":"2443_CR461","doi-asserted-by":"publisher","unstructured":"Huang D, Cui L, Yang S, Bao G, Wang K, Xie J, Zhang Y (2020) What have we achieved on text summarization? In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.33","DOI":"10.18653\/v1\/2020.emnlp-main.33"},{"key":"2443_CR462","doi-asserted-by":"publisher","unstructured":"Kryscinski W, Paulus R, Xiong C, Socher R (2018) Improving abstraction in text summarization. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/d18-1207","DOI":"10.18653\/v1\/d18-1207"},{"key":"2443_CR463","doi-asserted-by":"publisher","unstructured":"Kryscinski W, McCann B, Xiong C, Socher R (2020) Evaluating the factual consistency of abstractive text summarization. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.750","DOI":"10.18653\/v1\/2020.emnlp-main.750"},{"key":"2443_CR464","doi-asserted-by":"publisher","unstructured":"Kouris P, Alexandridis G, Stafylopatis A (2019) Abstractive text summarization based on deep learning and semantic content generalization. In: ACL. https:\/\/doi.org\/10.18653\/v1\/p19-1501","DOI":"10.18653\/v1\/p19-1501"},{"key":"2443_CR465","doi-asserted-by":"publisher","unstructured":"Chen K, Wang R, Utiyama M, Sumita E (2020) Content word aware neural machine translation. In: ACL. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.34","DOI":"10.18653\/v1\/2020.acl-main.34"},{"key":"2443_CR466","doi-asserted-by":"publisher","unstructured":"Lin Z, Pan X, Wang M, Qiu X, Feng J, Zhou H, Li L (2020) Pre-training multilingual neural machine translation by leveraging alignment information. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.210","DOI":"10.18653\/v1\/2020.emnlp-main.210"},{"key":"2443_CR467","doi-asserted-by":"publisher","unstructured":"Bugliarello E, Okazaki N (2020) Enhancing machine translation with dependency-aware self-attention. In: ACL. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.147","DOI":"10.18653\/v1\/2020.acl-main.147"},{"key":"2443_CR468","doi-asserted-by":"publisher","unstructured":"Aji AF, Bogoychev N, Heafield K, Sennrich R (2020) In neural machine translation, what does transfer learning transfer? In: ACL. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.688","DOI":"10.18653\/v1\/2020.acl-main.688"},{"key":"2443_CR469","doi-asserted-by":"publisher","unstructured":"Baziotis C, Haddow B, Birch A (2020) Language model prior for low-resource neural machine translation. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.615","DOI":"10.18653\/v1\/2020.emnlp-main.615"},{"key":"2443_CR470","doi-asserted-by":"crossref","unstructured":"Cui Q, Huang S, Li J, Geng X, Zheng Z, Huang G, Chen J (2021) Directqe: Direct pretraining for machine translation quality estimation. In: AAAI","DOI":"10.1609\/aaai.v35i14.17506"},{"key":"2443_CR471","doi-asserted-by":"publisher","unstructured":"Wu C, Hoi SCH, Socher R, Xiong C (2020) TOD-BERT: pre-trained natural language understanding for task-oriented dialogue. In: EMNLP. https:\/\/doi.org\/10.18653\/v1\/2020.emnlp-main.66","DOI":"10.18653\/v1\/2020.emnlp-main.66"},{"key":"2443_CR472","doi-asserted-by":"publisher","unstructured":"Campagna G, Foryciarz A, Moradshahi M, Lam MS (2020) Zero-shot transfer learning with synthesized data for multi-domain dialogue state tracking. In: ACL. https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.12","DOI":"10.18653\/v1\/2020.acl-main.12"},{"key":"2443_CR473","doi-asserted-by":"crossref","unstructured":"Liu Q, Yu L, Rimell L, Blunsom P (2021) Pretraining the noisy channel model for task-oriented dialogue. CoRR arXiv:2103.10518","DOI":"10.1162\/tacl_a_00390"},{"key":"2443_CR474","unstructured":"SST Corpus. http:\/\/nlp.stanford.edu\/sentiment (2013)"},{"key":"2443_CR475","doi-asserted-by":"crossref","unstructured":"Pang B, Lee L (2005) Seeing stars: Exploiting class relationships for sentiment categorization with respect to rating scales. In: ACL","DOI":"10.3115\/1219840.1219855"},{"key":"2443_CR476","doi-asserted-by":"crossref","unstructured":"Cer D, Diab M, Agirre E, Lopez-Gazpio I, Specia L (2017) Semeval-2017 task 1: semantic textual similarity-multilingual and cross-lingual focused evaluation. arXiv","DOI":"10.18653\/v1\/S17-2001"},{"key":"2443_CR477","doi-asserted-by":"crossref","unstructured":"Hendrickx I, Kim SN, Kozareva Z, Nakov P, S\u00e9aghdha D\u00d3, Pad\u00f3 S, Pennacchiotti M, Romano L, Szpakowicz S (2009) Semeval-2010 task 8: multi-way classification of semantic relations between pairs of nominals. In: Proc. NAACL, 2009","DOI":"10.3115\/1621969.1621986"},{"key":"2443_CR478","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-005-7880-9","author":"J Wiebe","year":"2005","unstructured":"Wiebe J, Wilson T, Cardie C (2005) Annotating expressions of opinions and emotions in language. Lang Resour Eval. https:\/\/doi.org\/10.1007\/s10579-005-7880-9","journal-title":"Lang Resour Eval"},{"key":"2443_CR479","unstructured":"MPQA Corpus. http:\/\/www.cs.pitt.edu\/mpqa\/ (2005)"},{"key":"2443_CR480","doi-asserted-by":"publisher","unstructured":"Diao Q, Qiu M, Wu C, Smola AJ, Jiang J, Wang C (2014) Jointly modeling aspects, ratings and sentiments for movie recommendation (JMARS). In: ACM SIGKDD. https:\/\/doi.org\/10.1145\/2623330.2623758","DOI":"10.1145\/2623330.2623758"},{"key":"2443_CR481","unstructured":"20NG Corpus. http:\/\/ana.cachopo.org\/datasets-for-single-label-text-categorization (2007)"},{"key":"2443_CR482","unstructured":"AG Corpus. http:\/\/www.di.unipi.it\/~gulli\/AG_corpus_of_news_articles.html (2004)"},{"key":"2443_CR483","unstructured":"Reuters Corpus. https:\/\/www.cs.umb.edu\/~smimarog\/textmining\/datasets\/ (2007)"},{"key":"2443_CR484","unstructured":"Reuters Corpus. https:\/\/martin-thoma.com\/nlp-reuters (2017)"},{"key":"2443_CR485","doi-asserted-by":"publisher","DOI":"10.3233\/SW-140134","author":"J Lehmann","year":"2015","unstructured":"Lehmann J, Isele R, Jakob M, Jentzsch A, Kontokostas D, Mendes PN, Hellmann S, Morsey M, Kleef P, Auer S, Bizer C (2015) Dbpedia\u2013a large-scale, multilingual knowledge base extracted from wikipedia. Semantic Web. https:\/\/doi.org\/10.3233\/SW-140134","journal-title":"Semantic Web"},{"key":"2443_CR486","unstructured":"Ohsumed Corpus (2015) http:\/\/davis.wpi.edu\/xmdv\/datasets\/ohsumed.html"},{"key":"2443_CR487","doi-asserted-by":"crossref","unstructured":"Williams A, Nangia N, Bowman SR (2017) A broad-coverage challenge corpus for sentence understanding through inference. arXiv","DOI":"10.18653\/v1\/N18-1101"},{"key":"2443_CR488","doi-asserted-by":"crossref","unstructured":"Rajpurkar P, Zhang J, Lopyrev K, Liang P (2016) Squad: 100,000+ questions for machine comprehension of text. arXiv","DOI":"10.18653\/v1\/D16-1264"},{"key":"2443_CR489","unstructured":"Levesque H, Davis E, Morgenstern L (2012) The winograd schema challenge. In: Thirteenth international conference on the principles of knowledge representation and reasoning"},{"key":"2443_CR490","unstructured":"Dolan WB, Brockett C (2005) Automatically constructing a corpus of sentential paraphrases. In: IWP"},{"key":"2443_CR491","doi-asserted-by":"crossref","unstructured":"Rajpurkar P, Jia R, Liang P (2018) Know what you don\u2019t know: unanswerable questions for squad. arXiv","DOI":"10.18653\/v1\/P18-2124"},{"key":"2443_CR492","doi-asserted-by":"crossref","unstructured":"Lai G, Xie Q, Liu H, Yang Y, Hovy E (2017) Race: large-scale reading comprehension dataset from examinations. arXiv","DOI":"10.18653\/v1\/D17-1082"},{"key":"2443_CR493","unstructured":"Jurafsky D, Shriberg E (1997) Switchboard swbd-damsl shallow-discourse-function annotation coders manual"},{"key":"2443_CR494","unstructured":"Li J, Zhou P, Xiong C, Socher R, Hoi SC (2020) Prototypical contrastive learning of unsupervised representations. arXiv preprint arXiv:2005.04966"},{"key":"2443_CR495","unstructured":"Donahue J, Simonyan K (2019) Large scale adversarial representation learning. Adv Neural Inf Process Syst 32"},{"key":"2443_CR496","doi-asserted-by":"crossref","unstructured":"He K, Chen X, Xie S, Li Y, Doll\u00e1r P, Girshick R (2021) Masked autoencoders are scalable vision learners. arXiv preprint arXiv:2111.06377","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"2443_CR497","unstructured":"http:\/\/yann.lecun.com\/exdb\/mnist\/"},{"key":"2443_CR498","unstructured":"http:\/\/ufldl.stanford.edu\/housenumbers\/"},{"key":"2443_CR499","unstructured":"https:\/\/www.cs.toronto.edu\/~kriz\/index.html"},{"key":"2443_CR500","unstructured":"Coates A, Ng A, Lee H (2011) An analysis of single-layer networks in unsupervised feature learning. In: Proceedings of the fourteenth international conference on artificial intelligence and statistics"},{"key":"2443_CR501","unstructured":"https:\/\/cs.stanford.edu\/~acoates\/stl10\/"},{"key":"2443_CR502","unstructured":"http:\/\/www.vision.caltech.edu\/Image_Datasets\/Caltech101\/"},{"key":"2443_CR503","unstructured":"Miller GA (1998) WordNet: an electronic lexical database"},{"key":"2443_CR504","unstructured":"https:\/\/image-net.org\/"},{"key":"2443_CR505","unstructured":"https:\/\/serre-lab.clps.brown.edu\/resource\/hmdb-a-large-human-motion-database\/"},{"key":"2443_CR506","doi-asserted-by":"crossref","unstructured":"Kuehne H, Jhuang H, Garrote E, Poggio T, Serre T (2011) HMDB: a large video database for human motion recognition. In: ICCV","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"2443_CR507","unstructured":"https:\/\/www.crcv.ucf.edu\/data\/UCF101.php"},{"key":"2443_CR508","unstructured":"https:\/\/www.crcv.ucf.edu\/data\/UCF50.php"},{"key":"2443_CR509","doi-asserted-by":"crossref","unstructured":"Bossard L, Guillaumin M, Van\u00a0Gool L (2014) Food-101\u2014mining discriminative components with random forests. In: European conference on computer vision","DOI":"10.1007\/978-3-319-10599-4_29"},{"key":"2443_CR510","doi-asserted-by":"crossref","unstructured":"Berg T, Liu J, Woo\u00a0Lee S, Alexander ML, Jacobs DW, Belhumeur PN (2014) Birdsnap: large-scale fine-grained visual categorization of birds. In: CVPR","DOI":"10.1109\/CVPR.2014.259"},{"key":"2443_CR511","doi-asserted-by":"crossref","unstructured":"Xiao J, Hays J, Ehinger KA, Oliva A, Torralba A (2010) Sun database: large-scale scene recognition from abbey to zoo. In: 2010 IEEE computer society conference on computer vision and pattern recognition","DOI":"10.1109\/CVPR.2010.5539970"},{"key":"2443_CR512","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s11263-014-0748-y","volume":"119","author":"J Xiao","year":"2016","unstructured":"Xiao J, Ehinger KA, Hays J, Torralba A, Oliva A (2016) Sun database: exploring a large collection of scene categories. Int J Comput Vis 119:3\u201322","journal-title":"Int J Comput Vis"},{"key":"2443_CR513","unstructured":"http:\/\/places.csail.mit.edu\/downloadData.html"},{"key":"2443_CR514","unstructured":"http:\/\/ai.stanford.edu\/~jkrause\/cars\/car_dataset.html"},{"key":"2443_CR515","unstructured":"Maji S, Kannala J, Rahtu E, Blaschko,M. Vedaldi A (2013) Fine-grained visual classification of aircraft. Technical report"},{"key":"2443_CR516","unstructured":"https:\/\/sites.google.com\/site\/fgcomp2013\/"},{"key":"2443_CR517","unstructured":"https:\/\/www.robots.ox.ac.uk\/~vgg\/data\/fgvc-aircraft\/"},{"key":"2443_CR518","unstructured":"https:\/\/www.robots.ox.ac.uk\/~vgg\/data\/pets\/"},{"key":"2443_CR519","unstructured":"https:\/\/www.robots.ox.ac.uk\/~vgg\/data\/flowers\/"},{"key":"2443_CR520","unstructured":"https:\/\/www.robots.ox.ac.uk\/~vgg\/data\/dtd\/"},{"key":"2443_CR521","unstructured":"https:\/\/sites.google.com\/view\/fgvc5\/competitions\/inaturalist"},{"key":"2443_CR522","unstructured":"https:\/\/www.inaturalist.org\/"},{"key":"2443_CR523","doi-asserted-by":"crossref","unstructured":"Sun C, Shrivastava A, Singh S, Gupta A (2017) Revisiting unreasonable effectiveness of data in deep learning era. In: Proceedings of the IEEE international conference on computer vision, pp 843\u2013852","DOI":"10.1109\/ICCV.2017.97"},{"key":"2443_CR524","unstructured":"http:\/\/host.robots.ox.ac.uk\/pascal\/VOC\/"},{"key":"2443_CR525","unstructured":"http:\/\/host.robots.ox.ac.uk\/pascal\/VOC\/voc2007\/index.html"},{"key":"2443_CR526","unstructured":"http:\/\/host.robots.ox.ac.uk\/pascal\/VOC\/voc2011\/index.html"},{"key":"2443_CR527","unstructured":"http:\/\/host.robots.ox.ac.uk\/pascal\/VOC\/voc2012\/index.html"},{"key":"2443_CR528","doi-asserted-by":"crossref","unstructured":"Zhou B, Zhao H, Puig X, Fidler S, Barriuso A, Torralba A (2017) Scene parsing through ade20k dataset. In: CVPR","DOI":"10.1109\/CVPR.2017.544"},{"key":"2443_CR529","doi-asserted-by":"publisher","first-page":"301","DOI":"10.1007\/s11263-018-1140-0","volume":"127","author":"B Zhou","year":"2019","unstructured":"Zhou B, Zhao H, Puig X, Xiao T, Fidler S, Barriuso A, Torralba A (2019) Semantic understanding of scenes through the ade20k dataset. Int J Comput Vis 127:301\u2013321","journal-title":"Int J Comput Vis"},{"key":"2443_CR530","unstructured":"https:\/\/cs.nyu.edu\/~silberman\/datasets\/nyu_depth_v2.html"},{"key":"2443_CR531","unstructured":"Cordts M, Omran M, Ramos S, Scharw\u00e4chter T, Enzweiler M, Benenson R, Franke U, Roth S, Schiele B (2015) The cityscapes dataset. In: CVPR workshop on the future of datasets in vision"},{"key":"2443_CR532","doi-asserted-by":"crossref","unstructured":"Cordts M, Omran M, Ramos S, Rehfeld T, Enzweiler M, Benenson R, Franke U, Roth S, Schiele B (2016) The cityscapes dataset for semantic urban scene understanding. In: Proc. of the IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2016.350"},{"key":"2443_CR533","doi-asserted-by":"crossref","unstructured":"Gupta A, Dollar P, Girshick R (2019) LVIS: A dataset for large vocabulary instance segmentation. In: CVPR","DOI":"10.1109\/CVPR.2019.00550"},{"key":"2443_CR534","unstructured":"https:\/\/davischallenge.org\/"},{"key":"2443_CR535","unstructured":"https:\/\/davischallenge.org\/davis2017\/code.html"},{"key":"2443_CR536","unstructured":"Doersch C. Data analysis project: what makes Paris look like Paris?"},{"key":"2443_CR537","unstructured":"http:\/\/www.cs.toronto.edu\/~nitish\/unsupervised_video\/"},{"key":"2443_CR538","unstructured":"Srivastava N, Mansimov E, Salakhudinov R (2015) Unsupervised learning of video representations using lstms. In: International conference on machine learning"},{"key":"2443_CR539","doi-asserted-by":"crossref","unstructured":"Thomee B, Shamma DA, Friedland G, Elizalde B, Ni K, Poland D, Borth D, Li L-J (2016) Yfcc100m: the new data in multimedia research. Commun ACM","DOI":"10.1145\/2812802"},{"key":"2443_CR540","unstructured":"http:\/\/projects.dfki.uni-kl.de\/yfcc100m\/"},{"key":"2443_CR541","unstructured":"Jin W, Liu X, Zhao X, Ma Y, Shah N, Tang J (2021) Automated self-supervised learning for graphs. CoRR arXiv:2106.05470"},{"key":"2443_CR542","unstructured":"Peng Z, Dong Y, Luo M, Wu X, Zheng Q (2020) Self-supervised graph representation learning via global context prediction. CoRR arXiv:2003.01604"},{"key":"2443_CR543","unstructured":"Zhu Y, Xu Y, Yu F, Liu Q, Wu S, Wang L (2020) Deep graph contrastive representation learning. CoRR arXiv:2006.04131"},{"key":"2443_CR544","doi-asserted-by":"crossref","unstructured":"Jin M, Zheng Y, Li Y, Gong C, Zhou C, Pan S (2021) Multi-scale contrastive siamese networks for self-supervised graph representation learning. CoRR arXiv:2105.05682","DOI":"10.24963\/ijcai.2021\/204"},{"key":"2443_CR545","unstructured":"Hu Z, Fan C, Chen T, Chang K, Sun Y (2019) Pre-training graph neural networks for generic structural feature extraction. CoRR arXiv:1905.13728"},{"key":"2443_CR546","unstructured":"Zhu Y, Xu Y, Yu F, Wu S, Wang L (2020) Cagnn: cluster-aware graph neural networks for unsupervised graph representation learning. arXiv preprint arXiv:2009.01674"},{"key":"2443_CR547","unstructured":"Zhang H, Lin S, Liu W, Zhou P, Tang J, Liang X, Xing EP (2020) Iterative graph self-distillation. CoRR arXiv:2010.12609"},{"key":"2443_CR548","doi-asserted-by":"crossref","unstructured":"Lin S, Zhou P, Hu Z-Y, Wang S, Zhao R, Zheng Y, Lin L, Xing E, Liang X (2021) Prototypical graph contrastive learning. IEEE trans neural networks learn syst 35(2):2747\u20132758","DOI":"10.1109\/TNNLS.2022.3191086"},{"key":"2443_CR549","first-page":"15980","volume":"35","author":"A Subramonian","year":"2021","unstructured":"Subramonian A (2021) Motif-driven contrastive learning of graph representations. Proc AAAI Conf Artif Intell 35:15980\u201315981","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"2443_CR550","unstructured":"Opolka FL, Solomon A, Cangea C, Velickovic P, Li\u00f2 P, Hjelm RD (2019) Spatio-temporal deep graph infomax. CoRR"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02443-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-024-02443-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02443-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,13]],"date-time":"2025-12-13T09:41:36Z","timestamp":1765618896000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-024-02443-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,24]]},"references-count":550,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2025,12]]}},"alternative-id":["2443"],"URL":"https:\/\/doi.org\/10.1007\/s13042-024-02443-6","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,24]]},"assertion":[{"value":"6 October 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 October 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 November 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}