{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,12]],"date-time":"2025-07-12T01:09:01Z","timestamp":1752282541650,"version":"3.37.3"},"reference-count":59,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2021ZD0110700"],"award-info":[{"award-number":["2021ZD0110700"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62172361"],"award-info":[{"award-number":["62172361"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Zhejiang Lab Research Project","award":["2020KC0AC01"],"award-info":[{"award-number":["2020KC0AC01"]}]},{"DOI":"10.13039\/501100008982","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS 1906541"],"award-info":[{"award-number":["CNS 1906541"]}],"id":[{"id":"10.13039\/501100008982","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2022,12,1]]},"DOI":"10.1109\/tpds.2022.3193867","type":"journal-article","created":{"date-parts":[[2022,7,26]],"date-time":"2022-07-26T19:26:41Z","timestamp":1658863601000},"page":"4484-4498","source":"Crossref","is-referenced-by-count":5,"title":["Accelerating Tensor Swapping in GPUs With Self-Tuning Compression"],"prefix":"10.1109","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6973-0755","authenticated-orcid":false,"given":"Ping","family":"Chen","sequence":"first","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7075-4153","authenticated-orcid":false,"given":"Shuibing","family":"He","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3730-8901","authenticated-orcid":false,"given":"Xuechen","family":"Zhang","sequence":"additional","affiliation":[{"name":"School of Engineering and Computer Science, Washington State University Vancouver, Vancouver, WA, USA"}]},{"given":"Shuaiben","family":"Chen","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}]},{"given":"Peiyi","family":"Hong","sequence":"additional","affiliation":[{"name":"College of Computer Science and Technology, Zhejiang University, Hangzhou, China"}]},{"given":"Yanlong","family":"Yin","sequence":"additional","affiliation":[{"name":"Institute of Open Source Chip, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1093-0792","authenticated-orcid":false,"given":"Xian-He","family":"Sun","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Illinois Institute of Technology, Chicago, IL, USA"}]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378505"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.23919\/DATE.2018.8341972"},{"key":"ref33","first-page":"248","article-title":"ImageNet: A large-scale hierarchical image database","author":"deng","year":"2010","journal-title":"Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit"},{"year":"2022","key":"ref32","article-title":"YoLo"},{"year":"2022","key":"ref31","article-title":"ResNet with ELU"},{"year":"2020","key":"ref30","article-title":"Dead neurons"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/MCHPC54807.2021.00007"},{"year":"2019","key":"ref36","article-title":"ZFP compression algorithm"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1145\/3358191"},{"year":"2020","key":"ref34","article-title":"Pytorch\/vision"},{"key":"ref28","first-page":"1","article-title":"Fast and accurate deep network learning by exponential linear units (ELUs)","author":"clevert","year":"2016","journal-title":"Proc Int Conf Learn Representations"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.123"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3158369"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"article-title":"Empirical evaluation of rectified activations in convolutional network","year":"2015","author":"xu","key":"ref20"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1967.5493"},{"key":"ref21","first-page":"200","article-title":"Dynamic memory management for GPU-based training of deep neural networks","author":"shriram","year":"2019","journal-title":"Proc Int Parallel Distrib Process Symp"},{"year":"2020","key":"ref24","article-title":"NVIDIA\/nvcomp: A library for fast lossless compression\/decompression on the GPU"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.19026\/rjaset.14.4955"},{"article-title":"Very deep convolutional networks for large-scale image recognition","year":"2014","author":"simonyan","key":"ref26"},{"year":"2020","key":"ref25","article-title":"Pytorch\/pytorch: Tensors and dynamic neural networks in python with strong GPU acceleration"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01234-2_48"},{"article-title":"MobileNets: Efficient convolutional neural networks for mobile vision applications","year":"2017","author":"howard","key":"ref51"},{"year":"2020","key":"ref59","article-title":"scikit-learn: Machine learning in Python &#x2014; scikit-learn 0.23.2 documentation"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/21.97458"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1038\/nbt1206-1565"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1080\/01621459.1997.10473615"},{"year":"2020","key":"ref55","article-title":"NVIDIA\/Cuda-samples: Samples for CUDA developers which demonstrates features in CUDA toolkit"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref53","first-page":"1","article-title":"Learning multiple layers of features from tiny images","author":"krizhevsky","year":"2009"},{"article-title":"SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and <0.5MB model size","year":"2016","author":"iandola","key":"ref52"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3405671.3405810"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378530"},{"year":"2020","key":"ref40","article-title":"LZ4 (compression algorithm)"},{"article-title":"Sentinel: Runtime data management on heterogeneous main memory systems for deep learning","year":"2019","author":"ren","key":"ref12"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1145\/3178487.3178491"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783721"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3373376.3378465"},{"article-title":"TFLMS: Large model support in TensorFlow by graph rewriting","year":"2018","author":"le","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00017"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/Cluster48925.2021.00019"},{"article-title":"Deep learning using rectified linear units (ReLU)","year":"2018","author":"agarap","key":"ref19"},{"article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","year":"2018","author":"devlin","key":"ref4"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2013.6639215"},{"article-title":"Outrageously large neural networks: The sparsely-gated mixture-of-experts layer","year":"2017","author":"shazeer","key":"ref6"},{"article-title":"Inception-v4: Inception-ResNet and the impact of residual connections on learning","year":"2016","author":"szegedy","key":"ref5"},{"year":"2020","key":"ref8","article-title":"Introduction to tensors"},{"journal-title":"Deep Learning","year":"2016","author":"goodfellow","key":"ref7"},{"journal-title":"Introduction to Linear Regression Analysis","year":"2012","author":"montgomery","key":"ref49"},{"year":"2020","key":"ref9","article-title":"NVIDIA V100 Tensor Core GPU"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/3447818.3460366"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.327"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2022.3180991"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3307681.3326608"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2006.143"},{"key":"ref41","first-page":"1","article-title":"RFC1952: GZIP file format specification version 4.3","author":"deutsch","year":"1996"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3410463.3414624"},{"year":"2017","key":"ref43","article-title":"Blosc, an extremely fast, multi-threaded, meta-compressor library"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/71\/9790018\/9841008-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/71\/9790018\/09841008.pdf?arnumber=9841008","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,19]],"date-time":"2022-09-19T19:59:24Z","timestamp":1663617564000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9841008\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,1]]},"references-count":59,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2022.3193867","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"type":"print","value":"1045-9219"},{"type":"electronic","value":"1558-2183"},{"type":"electronic","value":"2161-9883"}],"subject":[],"published":{"date-parts":[[2022,12,1]]}}}