{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T12:31:41Z","timestamp":1776083501351,"version":"3.50.1"},"reference-count":74,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"4","license":[{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U22B2017"],"award-info":[{"award-number":["U22B2017"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62076105"],"award-info":[{"award-number":["62076105"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Big Data"],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1109\/tbdata.2024.3524081","type":"journal-article","created":{"date-parts":[[2024,12,30]],"date-time":"2024-12-30T14:37:55Z","timestamp":1735569475000},"page":"2085-2098","source":"Crossref","is-referenced-by-count":14,"title":["NAGphormer+: A Tokenized Graph Transformer With Neighborhood Augmentation for Node Classification in Large Graphs"],"prefix":"10.1109","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7588-6713","authenticated-orcid":false,"given":"Jinsong","family":"Chen","sequence":"first","affiliation":[{"name":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"given":"Chang","family":"Liu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"given":"Kaiyuan","family":"Gao","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7563-2165","authenticated-orcid":false,"given":"Gaichao","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7627-4604","authenticated-orcid":false,"given":"Kun","family":"He","sequence":"additional","affiliation":[{"name":"School of Computer Science and Technology, Huazhong University of Science and Technology, Wuhan, China"}]}],"member":"263","reference":[{"key":"ref1","first-page":"1725","article-title":"Simple and deep graph convolutional networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Chen"},{"key":"ref2","article-title":"Semi-supervised classification with graph convolutional networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kipf"},{"key":"ref3","article-title":"Graph attention networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Veli\u010dkovi\u0107"},{"key":"ref4","first-page":"1263","article-title":"Neural message passing for quantum chemistry","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Gilmer"},{"key":"ref5","article-title":"How powerful are graph neural networks","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Xu"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313488"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219890"},{"key":"ref8","first-page":"5171","article-title":"Link prediction based on graph neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhang"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.3301152"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5747"},{"key":"ref11","article-title":"Revisiting over-smoothing in deep GCNs","author":"Yang","year":"2020"},{"key":"ref12","article-title":"SkipNode: On alleviating over-smoothing for deep graph convolutional networks","author":"Lu","year":"2021"},{"key":"ref13","article-title":"Tackling over-smoothing for general graph convolutional networks","author":"Huang","year":"2020"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557419"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.1810.04805"},{"key":"ref17","article-title":"RoBERTa: A robustly optimized BERT pretraining approach","author":"Liu","year":"2019"},{"key":"ref18","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Dosovitskiy"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref20","article-title":"A generalization of transformer networks to graphs","author":"Dwivedi","year":"2020"},{"key":"ref21","first-page":"21618","article-title":"Rethinking graph transformers with spectral attention","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Kreuzer"},{"key":"ref22","first-page":"28877","article-title":"Do transformers really perform badly for graph representation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ying"},{"key":"ref23","first-page":"13266","article-title":"Representing long-range context for graph neural networks with global attention","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Jain"},{"key":"ref24","article-title":"Transformer for graphs: An overview from architecture perspective","author":"Min","year":"2022"},{"key":"ref25","first-page":"3469","article-title":"Structure-aware transformer for graph representation learning","volume-title":"Proc. Int. Conf. Mach. Learn.","volume":"162","author":"Chen"},{"key":"ref26","article-title":"FastGCN: Fast learning with graph convolutional networks via importance sampling","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chen"},{"key":"ref27","first-page":"11247","article-title":"Layer-dependent importance sampling for training deep and large graph convolutional networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zou"},{"key":"ref28","first-page":"14556","article-title":"Scalable graph neural networks via bidirectional propagation","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Chen"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512044"},{"key":"ref30","first-page":"14501","article-title":"Recipe for a general, powerful, scalable graph transformer","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Ramp\u00e1sek"},{"key":"ref31","first-page":"27387","article-title":"NodeFormer: A. scalable graph structure learning transformer for node classification","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wu"},{"key":"ref32","article-title":"Rethinking attention with performers","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Choromanski"},{"key":"ref33","first-page":"17283","article-title":"Big bird: Transformers for longer sequences","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zaheer"},{"key":"ref34","article-title":"Predict then propagate: Graph neural networks meet personalized PageRank","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Klicpera"},{"key":"ref35","first-page":"6861","article-title":"Simplifying graph convolutional networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wu"},{"key":"ref36","article-title":"Adaptive universal generalized PageRank graph neural network","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chien"},{"key":"ref37","article-title":"NAGphormer: A. tokenized graph transformer for node classification in large graphs","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Chen"},{"key":"ref38","first-page":"1024","article-title":"Inductive representation learning on large graphs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hamilton"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330925"},{"key":"ref40","article-title":"GraphSAINT: Graph sampling based inductive learning method","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zeng"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403296"},{"key":"ref42","article-title":"VCR-graphormer: A mini-batch graph transformer via virtual connections","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Fu"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/396"},{"key":"ref44","first-page":"54656","article-title":"Less is more: On the over-globalizing problem in graph transformers","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xing"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/244"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2024\/240"},{"key":"ref47","article-title":"Gophormer: Ego-graph transformer for node classification","author":"Zhao","year":"2021"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539296"},{"key":"ref49","first-page":"64753","article-title":"SGFormer: Simplifying and empowering transformers for large-graph representations","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Wu"},{"key":"ref50","first-page":"21171","article-title":"Hierarchical graph transformer with adaptive node sampling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhang"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449796"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i11.17203"},{"key":"ref53","first-page":"22092","article-title":"Graph random neural networks for semi-supervised learning on graphs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Feng"},{"key":"ref54","article-title":"DropEdge: Towards deep graph convolutional networks on node classification","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Rong"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5747"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17315"},{"key":"ref57","first-page":"19010","article-title":"Metropolis-hastings data augmentation for graph neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Park"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00016"},{"key":"ref59","first-page":"14054","article-title":"Local augmentation for graph neural networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Liu"},{"key":"ref60","first-page":"3837","article-title":"Convolutional neural networks on graphs with fast localized spectral filtering","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Defferrard"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.111861"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM54844.2022.00028"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/TCSS.2024.3387487"},{"key":"ref64","first-page":"13333","article-title":"Diffusion improves graph learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Gasteiger"},{"key":"ref65","first-page":"13333","article-title":"Diffusion improves graph learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Klicpera"},{"key":"ref66","first-page":"10524","article-title":"On layer normalization in the transformer architecture","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Xiong"},{"key":"ref67","first-page":"43:1","article-title":"Benchmarking graph neural networks","volume":"24","author":"Dwivedi","year":"2023","journal-title":"J. Mach. Learn. Res."},{"key":"ref68","article-title":"Mixup: Beyond empirical risk minimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Zhang"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00612"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.7000"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00111"},{"key":"ref72","first-page":"22118","article-title":"Open graph benchmark: Datasets for machine learning on graphs","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Hu"},{"key":"ref73","article-title":"Decoupled weight decay regularization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Loshchilov"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599388"}],"container-title":["IEEE Transactions on Big Data"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/6687317\/11077791\/10818575.pdf?arnumber=10818575","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,15]],"date-time":"2025-07-15T17:43:47Z","timestamp":1752601427000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10818575\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8]]},"references-count":74,"journal-issue":{"issue":"4"},"URL":"https:\/\/doi.org\/10.1109\/tbdata.2024.3524081","relation":{},"ISSN":["2332-7790","2372-2096"],"issn-type":[{"value":"2332-7790","type":"electronic"},{"value":"2372-2096","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8]]}}}