{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T16:26:04Z","timestamp":1775665564932,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100006374","name":"NSF (National Science Foundation)","doi-asserted-by":"publisher","award":["2302720,2312758,2038029"],"award-info":[{"award-number":["2302720,2312758,2038029"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1145\/3589334.3645352","type":"proceedings-article","created":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T07:08:13Z","timestamp":1715152093000},"page":"2807-2816","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":18,"title":["ZipZap: Efficient Training of Language Models for Large-Scale Fraud Detection on Blockchain"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3297-6991","authenticated-orcid":false,"given":"Sihao","family":"Hu","sequence":"first","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4557-1865","authenticated-orcid":false,"given":"Tiansheng","family":"Huang","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5917-2577","authenticated-orcid":false,"given":"Ka-Ho","family":"Chow","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9177-114X","authenticated-orcid":false,"given":"Wenqi","family":"Wei","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8761-5486","authenticated-orcid":false,"given":"Yanzhao","family":"Wu","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4138-3082","authenticated-orcid":false,"given":"Ling","family":"Liu","sequence":"additional","affiliation":[{"name":"Georgia Institute of Technology, Atlanta, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,5,13]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Learning role-based graph embeddings. arXiv preprint arXiv:1802.02896","author":"Ahmed N. K.","year":"2018","unstructured":"N. K. Ahmed, R. Rossi, J. B. Lee, T. L. Willke, R. Zhou, X. Kong, and H. Eldardiry. Learning role-based graph embeddings. arXiv preprint arXiv:1802.02896, 2018."},{"key":"e_1_3_2_2_2_1","volume-title":"Longformer: The long-document transformer. arXiv preprint arXiv:2004.05150","author":"Beltagy I.","year":"2020","unstructured":"I. Beltagy, M. E. Peters, and A. Cohan. Longformer: The long-document transformer. arXiv preprint arXiv:2004.05150, 2020."},{"key":"e_1_3_2_2_3_1","first-page":"31","article-title":"Block-wise lowrank approximation for neural language model shrinking","author":"Chen P.","year":"2018","unstructured":"P. Chen, S. Si, Y. Li, C. Chelba, and C.-J. Hsieh. Groupreduce: Block-wise lowrank approximation for neural language model shrinking. Advances in Neural Information Processing Systems, 31, 2018.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_4_1","volume-title":"Rethinking attention with performers. arXiv preprint arXiv:2009.14794","author":"Choromanski K.","year":"2020","unstructured":"K. Choromanski, V. Likhosherstov, D. Dohan, X. Song, A. Gane, T. Sarlos, P. Hawkins, J. Davis, A. Mohiuddin, L. Kaiser, et al. Rethinking attention with performers. arXiv preprint arXiv:2009.14794, 2020."},{"key":"e_1_3_2_2_5_1","volume-title":"Electra: Pre-training text encoders as discriminators rather than generators. arXiv preprint arXiv:2003.10555","author":"Clark K.","year":"2020","unstructured":"K. Clark, M.-T. Luong, Q. V. Le, and C. D. Manning. Electra: Pre-training text encoders as discriminators rather than generators. arXiv preprint arXiv:2003.10555, 2020."},{"key":"e_1_3_2_2_6_1","volume-title":"Flash boys 2.0: Frontrunning, transaction reordering, and consensus instability in decentralized exchanges. arXiv preprint arXiv:1904.05234","author":"Daian P.","year":"2019","unstructured":"P. Daian, S. Goldfeder, T. Kell, Y. Li, X. Zhao, I. Bentov, L. Breidenbach, and A. Juels. Flash boys 2.0: Frontrunning, transaction reordering, and consensus instability in decentralized exchanges. arXiv preprint arXiv:1904.05234, 2019."},{"key":"e_1_3_2_2_7_1","volume-title":"inference: Memory consumption by neural networks. https:\/\/frankdenneman.nl\/2022\/07\/15\/training-vs-inference-memoryconsumption- by-neural-networks\/","author":"Denneman F.","year":"2022","unstructured":"F. Denneman. Training vs. inference: Memory consumption by neural networks. https:\/\/frankdenneman.nl\/2022\/07\/15\/training-vs-inference-memoryconsumption- by-neural-networks\/, 2022."},{"key":"e_1_3_2_2_8_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin J.","year":"2018","unstructured":"J. Devlin, M.-W. Chang, K. Lee, and K. Toutanova. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805, 2018."},{"key":"e_1_3_2_2_9_1","volume-title":"Ethereum unique addresses chart","year":"2023","unstructured":"Etherscan. Ethereum unique addresses chart, 2023. https:\/\/etherscan.io\/chart\/ address."},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISIT45174.2021.9517710"},{"key":"e_1_3_2_2_11_1","first-page":"2337","volume-title":"International conference on machine learning","author":"Gong L.","year":"2019","unstructured":"L. Gong, D. He, Z. Li, T. Qin, L. Wang, and T. Liu. Efficient training of bert by progressively stacking. In International conference on machine learning, pages 2337--2346. PMLR, 2019."},{"key":"e_1_3_2_2_12_1","volume-title":"Inductive representation learning on large graphs. Advances in neural information processing systems, 30","author":"Hamilton W.","year":"2017","unstructured":"W. Hamilton, Z. Ying, and J. Leskovec. Inductive representation learning on large graphs. Advances in neural information processing systems, 30, 2017."},{"key":"e_1_3_2_2_13_1","volume-title":"Token dropping for efficient bert pretraining. arXiv preprint arXiv:2203.13240","author":"Hou L.","year":"2022","unstructured":"L. Hou, R. Y. Pang, T. Zhou, Y.Wu, X. Song, X. Song, and D. Zhou. Token dropping for efficient bert pretraining. arXiv preprint arXiv:2203.13240, 2022."},{"key":"e_1_3_2_2_14_1","volume-title":"Gift: Graph-guided feature transfer for cold-start video click-through rate prediction. arXiv preprint arXiv:2202.11525","author":"Hu S.","year":"2022","unstructured":"S. Hu, Y. Cao, Y. Gong, Z. Li, Y. Yang, Q. Liu, W. Ou, and S. Ji. Gift: Graph-guided feature transfer for cold-start video click-through rate prediction. arXiv preprint arXiv:2202.11525, 2022."},{"key":"e_1_3_2_2_15_1","volume-title":"Large language modelpowered smart contract vulnerability detection: New perspectives. arXiv preprint arXiv:2310.01152","author":"Hu S.","year":"2023","unstructured":"S. Hu, T. Huang, F. \"lhan, S. F. Tekin, and L. Liu. Large language modelpowered smart contract vulnerability detection: New perspectives. arXiv preprint arXiv:2310.01152, 2023."},{"key":"e_1_3_2_2_16_1","volume-title":"Sequence-based target coin prediction for cryptocurrency pump-and-dump. arXiv preprint arXiv:2204.12929","author":"Hu S.","year":"2022","unstructured":"S. Hu, Z. Zhang, S. Lu, B. He, and Z. Li. Sequence-based target coin prediction for cryptocurrency pump-and-dump. arXiv preprint arXiv:2204.12929, 2022."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583345"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403288"},{"key":"e_1_3_2_2_19_1","volume-title":"Length-adaptive transformer: Train once with length drop, use anytime with search. arXiv preprint arXiv:2010.07003","author":"Kim G.","year":"2020","unstructured":"G. Kim and K. Cho. Length-adaptive transformer: Train once with length drop, use anytime with search. arXiv preprint arXiv:2010.07003, 2020."},{"key":"e_1_3_2_2_20_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf T. N.","year":"2016","unstructured":"T. N. Kipf and M. Welling. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907, 2016."},{"key":"e_1_3_2_2_21_1","volume-title":"Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942","author":"Lan Z.","year":"2019","unstructured":"Z. Lan, M. Chen, S. Goodman, K. Gimpel, P. Sharma, and R. Soricut. Albert: A lite bert for self-supervised learning of language representations. arXiv preprint arXiv:1909.11942, 2019."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380103"},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3485447.3512226"},{"key":"e_1_3_2_2_24_1","volume-title":"Learnable embedding sizes for recommender systems. arXiv preprint arXiv:2101.07577","author":"Liu S.","year":"2021","unstructured":"S. Liu, C. Gao, Y. Chen, D. Jin, and Y. Li. Learnable embedding sizes for recommender systems. arXiv preprint arXiv:2101.07577, 2021."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/2623330.2623732"},{"key":"e_1_3_2_2_26_1","volume-title":"Improving language understanding by generative pre-training","author":"Radford A.","year":"2018","unstructured":"A. Radford, K. Narasimhan, T. Salimans, I. Sutskever, et al. Improving language understanding by generative pre-training. 2018."},{"key":"e_1_3_2_2_27_1","unstructured":"V. RANDOM. Efficient large-scale transformer training via random and layerwise token dropping."},{"key":"e_1_3_2_2_28_1","volume-title":"Gensim-statistical semantics in python. Retrieved from genism. org","author":"Sojka P.","year":"2011","unstructured":"P. Sojka, et al. Gensim-statistical semantics in python. Retrieved from genism. org, 2011."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-73198-8_9"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-981-16-7993-3_1"},{"key":"e_1_3_2_2_31_1","volume-title":"Blockchain security and aml analysis report","year":"2023","unstructured":"SlowMist. Blockchain security and aml analysis report, 2023. https:\/\/www.slowmist.com\/report\/2022-Blockchain-Security-and-AMLAnalysis- Annual-Report(EN).pdf."},{"key":"e_1_3_2_2_32_1","volume-title":"Attention is all you need. arXiv:1706.03762","author":"Vaswani A.","year":"2017","unstructured":"A. Vaswani, N. Shazeer, N. Parmar, J. Uszkoreit, et al. Attention is all you need. arXiv:1706.03762, 2017."},{"key":"e_1_3_2_2_33_1","volume-title":"Graph attention networks. arXiv preprint arXiv:1710.10903","author":"Velickovic P.","year":"2017","unstructured":"P. Velickovic, G. Cucurull, A. Casanova, A. Romero, P. Lio, and Y. Bengio. Graph attention networks. arXiv preprint arXiv:1710.10903, 2017."},{"key":"e_1_3_2_2_34_1","volume-title":"Deep graph library: A graph-centric, highly-performant package for graph neural networks. arXiv preprint arXiv:1909.01315","author":"Wang M.","year":"2019","unstructured":"M. Wang, D. Zheng, Z. Ye, Q. Gan, M. Li, X. Song, J. Zhou, C. Ma, L. Yu, Y. Gai, et al. Deep graph library: A graph-centric, highly-performant package for graph neural networks. arXiv preprint arXiv:1909.01315, 2019."},{"key":"e_1_3_2_2_35_1","volume-title":"Linformer: Self-attention with linear complexity. arXiv preprint arXiv:2006.04768","author":"Wang S.","year":"2020","unstructured":"S. Wang, B. Z. Li, M. Khabsa, H. Fang, and H. Ma. Linformer: Self-attention with linear complexity. arXiv preprint arXiv:2006.04768, 2020."},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSMC.2020.3016821"},{"key":"e_1_3_2_2_37_1","volume-title":"Progressively stacking 2.0: A multi-stage layerwise training method for bert training speedup. arXiv preprint arXiv:2011.13635","author":"Yang C.","year":"2020","unstructured":"C. Yang, S. Wang, C. Yang, Y. Li, R. He, and J. Zhang. Progressively stacking 2.0: A multi-stage layerwise training method for bert training speedup. arXiv preprint arXiv:2011.13635, 2020."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01054"},{"key":"e_1_3_2_2_39_1","first-page":"17283","article-title":"Big bird: Transformers for longer sequences","volume":"33","author":"Zaheer M.","year":"2020","unstructured":"M. Zaheer, G. Guruganesh, K. A. Dubey, J. Ainslie, C. Alberti, S. Ontanon, P. Pham, A. Ravula, Q. Wang, L. Yang, et al. Big bird: Transformers for longer sequences. Advances in Neural Information Processing Systems, 33:17283--17297, 2020.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2017.05.038"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3449916"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450124"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM51629.2021.00101"}],"event":{"name":"WWW '24: The ACM Web Conference 2024","location":"Singapore Singapore","acronym":"WWW '24","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645352","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3589334.3645352","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:23:50Z","timestamp":1755822230000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645352"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":43,"alternative-id":["10.1145\/3589334.3645352","10.1145\/3589334"],"URL":"https:\/\/doi.org\/10.1145\/3589334.3645352","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]},"assertion":[{"value":"2024-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}