{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,11]],"date-time":"2026-02-11T12:35:42Z","timestamp":1770813342238,"version":"3.50.1"},"reference-count":24,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,9,22]],"date-time":"2020-09-22T00:00:00Z","timestamp":1600732800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,9,22]]},"DOI":"10.1109\/hpec43674.2020.9286218","type":"proceedings-article","created":{"date-parts":[[2020,12,22]],"date-time":"2020-12-22T21:07:15Z","timestamp":1608671235000},"page":"1-7","source":"Crossref","is-referenced-by-count":35,"title":["A Novel Inference Algorithm for Large Sparse Neural Network using Task Graph Parallelism"],"prefix":"10.1109","author":[{"given":"Dian-Lun","family":"Lin","sequence":"first","affiliation":[]},{"given":"Tsung-Wei","family":"Huang","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref10","author":"liu","year":"2018","journal-title":"Efficient sparse-winograd convolutional neural networks"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080254"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015676"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2019.8916378"},{"key":"ref14","article-title":"Exploring Hidden Dimensions in Parallelizing Convolutional Neural Networks","volume":"abs 1802 4924","author":"jia","year":"2018","journal-title":"CoRR"},{"key":"ref15","article-title":"One weird trick for parallelizing convolutional neural networks","volume":"abs 1404 5997","author":"krizhevsky","year":"2014","journal-title":"CoRR"},{"key":"ref16","article-title":"Unifying Data, Model and Hybrid Parallelism in Deep Learning via Tensor Tiling","volume":"abs 1805 4170","author":"wang","year":"2018","journal-title":"CoRR"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2019.8916223"},{"key":"ref18","first-page":"103","article-title":"GPipe: Efficient Training of Giant Neural Networks using Pipeline Parallelism","author":"huang","year":"2019","journal-title":"NIPS"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3341301.3359646"},{"key":"ref4","author":"han","year":"2015","journal-title":"Deep compression Compressing deep neural networks with pruning trained quantization and huffman coding"},{"key":"ref3","author":"radford","year":"2018","journal-title":"Language Models are Unsupervised Multitask Learners"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2018.8547742"},{"key":"ref5","author":"iandola","year":"2016","journal-title":"Squeezenet Alexnet-level accuracy with 50x fewer parameters and< 0 5 mb model size"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/HPEC.2019.8916336"},{"key":"ref7","first-page":"10414","article-title":"Mesh-TensorFlow: Deep Learning for Supercomputers","author":"shazeer","year":"2018","journal-title":"NIPS"},{"key":"ref2","article-title":"Learned in Translation: Contextualized Word Vectors","volume":"abs 1708 107","author":"mccann","year":"2017","journal-title":"CoRR"},{"key":"ref9","article-title":"A survey of sparse matrix-vector multiplication performance on large matrices","volume":"abs 1404 5997","author":"grossman","year":"2016","journal-title":"CoRR"},{"key":"ref1","first-page":"4171","article-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding","author":"devlin","year":"2019","journal-title":"ACL"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/72.286892"},{"key":"ref22","first-page":"974","author":"huang","year":"2019","journal-title":"Cpp-Taskfiow Fast Task-based Parallel Programming using Modern C++"},{"key":"ref21","year":"0","journal-title":"Nvidia CUDA Graph"},{"key":"ref24","year":"0","journal-title":"NVIDIA Visual Profiler"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2019.00051"}],"event":{"name":"2020 IEEE High Performance Extreme Computing Conference (HPEC)","location":"Waltham, MA, USA","start":{"date-parts":[[2020,9,22]]},"end":{"date-parts":[[2020,9,24]]}},"container-title":["2020 IEEE High Performance Extreme Computing Conference (HPEC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9285977\/9286137\/09286218.pdf?arnumber=9286218","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,27]],"date-time":"2022-06-27T15:54:48Z","timestamp":1656345288000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9286218\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,9,22]]},"references-count":24,"URL":"https:\/\/doi.org\/10.1109\/hpec43674.2020.9286218","relation":{},"subject":[],"published":{"date-parts":[[2020,9,22]]}}}