{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T04:25:25Z","timestamp":1773807925546,"version":"3.50.1"},"reference-count":88,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,6,1]],"date-time":"2022-06-01T00:00:00Z","timestamp":1654041600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2017YFB0304100"],"award-info":[{"award-number":["2017YFB0304100"]}]},{"name":"Key Projects of National Natural Science Foundation of China","award":["U1836222"],"award-info":[{"award-number":["U1836222"]}]},{"name":"Key Projects of National Natural Science Foundation of China","award":["61733011"],"award-info":[{"award-number":["61733011"]}]},{"name":"Huawei-SJTU long term AI project"},{"name":"Cutting-edge Machine Reading Comprehension and Language Model"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2022,6,1]]},"DOI":"10.1109\/tpami.2020.3046683","type":"journal-article","created":{"date-parts":[[2020,12,22]],"date-time":"2020-12-22T20:58:03Z","timestamp":1608670683000},"page":"3285-3299","source":"Crossref","is-referenced-by-count":40,"title":["SG-Net: Syntax Guided Transformer for Language Representation"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4183-3645","authenticated-orcid":false,"given":"Zhuosheng","family":"Zhang","sequence":"first","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"given":"Yuwei","family":"Wu","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"given":"Junru","family":"Zhou","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"given":"Sufeng","family":"Duan","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7290-0487","authenticated-orcid":false,"given":"Hai","family":"Zhao","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8007-2503","authenticated-orcid":false,"given":"Rui","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Engineering, Shanghai Jiao Tong University, Shanghai, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6511"},{"key":"ref2","article-title":"Improving language understanding by generative pre-training","author":"Radford","year":"2018"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/vl\/N19-142"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6510"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.399"},{"key":"ref6","article-title":"Neural machine translation with universal visual representation","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhang"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-2124"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/d15-1075"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1146\/annurev.neuro.26.041002.131047"},{"key":"ref10","first-page":"5998","article-title":"Attention is all you need","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Vaswani"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1202"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2017\/578"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1176"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1264"},{"key":"ref15","first-page":"449","article-title":"One-shot learning for question-answering in Gaokao history challenge","volume-title":"Proc. 27th Int. Conf. Comput. Linguistics","author":"Zhang"},{"key":"ref16","first-page":"1802","article-title":"Subword-augmented embedding for cloze reading comprehension","volume-title":"Proc. 27th Int. Conf. Comput. Linguistics","author":"Zhang"},{"key":"ref17","first-page":"3740","article-title":"Modeling multi-turn conversation with deep utterance aggregation","volume-title":"Proc. 27th Int. Conf. Comput. Linguistics","author":"Zhang"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1172"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6347"},{"key":"ref20","article-title":"Neural language modeling by jointly learning syntax and lexicon","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Shen"},{"key":"ref21","first-page":"5754","article-title":"XLNet: Generalized autoregressive pretraining for language understanding","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Yang"},{"key":"ref22","article-title":"RoBERTa: A robustly optimized bert pretraining approach","author":"Liu","year":"2019"},{"key":"ref23","article-title":"ALBERT: A lite bert for self-supervised learning of language representations","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lan"},{"issue":"4","key":"ref24","first-page":"467","article-title":"Class-based n-gram models of natural language","volume":"18","author":"Brown","year":"1992","journal-title":"Comput. Linguistics"},{"issue":"61","key":"ref25","first-page":"1817","article-title":"A framework for learning predictive structures from multiple tasks and unlabeled data","volume":"6","author":"Ando","year":"2005","journal-title":"J. Mach. Learn. Res."},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.3115\/1610075.1610094"},{"key":"ref27","first-page":"3111","article-title":"Distributed representations of words and phrases and their compositionality","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Mikolov"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1162"},{"key":"ref29","first-page":"1631","article-title":"Recursive deep models for semantic compositionality over a sentiment treebank","volume-title":"Proc. Conf. Empir. Methods Natural Lang. Process.","author":"Socher"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.3115\/1119176.1119195"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-5301"},{"key":"ref33","article-title":"Inductive dependency parsing of natural language text","author":"Nivre","year":"2005"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.21236\/ADA273556"},{"key":"ref35","first-page":"31","article-title":"Survey on parsing three dependency representations for english","volume-title":"Proc. 51st Annu. Meeting Assoc. Comput. Linguistics Proc. Student Res. Workshop","author":"Ivanova"},{"key":"ref36","volume-title":"Head-Driven Phrase Structure Grammar","author":"Pollard","year":"1994"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.2200\/s00169ed1v01y200901hlt002"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/p15-1033"},{"key":"ref39","first-page":"188","article-title":"Transition-based dependency parsing with rich non-local features","volume-title":"Proc. 49th Annu. Meeting Assoc. Comput. Linguistics: Hum. Lang. Technol.","author":"Zhang"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1218"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1131"},{"key":"ref42","first-page":"1204","article-title":"Unsupervised induction of tree substitution grammars for dependency parsing","volume-title":"Proc. Conf. Empir. Methods Natural Lang. Process.","author":"Blunsom"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-7712"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1131"},{"key":"ref45","first-page":"3203","article-title":"Seq2seq dependency parsing","volume-title":"Proc. 27th Int. Conf. Comput. Linguistics","author":"Li"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1130"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6348"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1304"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2772846"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11910"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1098"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/IALP48816.2019.9037672"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1139"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1075"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1548"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-4828"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D18-1408"},{"key":"ref58","article-title":"Gaussian error linear units (GELUs)","author":"Hendrycks","year":"2016"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1082"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p16-2022"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-5307"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/n18-1132"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1441"},{"key":"ref64","first-page":"298","article-title":"Explicit contextual semantics for text comprehension","volume-title":"Proc. 33rd Pacific Asia Conf. Lang. Inf. Comput.","author":"Zhang"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1230"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1249"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.21236\/ADA273556"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2885032"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1162"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00051"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2922537"},{"key":"ref72","first-page":"5998","article-title":"Attention is all you need","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Vaswani"},{"key":"ref73","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1007\/BF02295996"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i16.17705"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1002\/0470011815.b2a15112"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.3115\/1219840.1219906"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1270"},{"key":"ref79","article-title":"Option comparison network for multiple-choice reading comprehension","author":"Ran","year":"2019"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6502"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016586"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/p18-1198"},{"key":"ref83","article-title":"Bidirectional attention flow for machine comprehension","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Seo"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1116"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1064"},{"key":"ref86","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1078"},{"key":"ref87","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11164"},{"key":"ref88","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-76298-0_52"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/9769881\/09303437.pdf?arnumber=9303437","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,9]],"date-time":"2024-01-09T23:43:26Z","timestamp":1704843806000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9303437\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,1]]},"references-count":88,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2020.3046683","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,6,1]]}}}