{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T04:32:47Z","timestamp":1750221167825,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":31,"publisher":"ACM","license":[{"start":{"date-parts":[[2018,6,12]],"date-time":"2018-06-12T00:00:00Z","timestamp":1528761600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2018,6,12]]},"DOI":"10.1145\/3205289.3205301","type":"proceedings-article","created":{"date-parts":[[2018,9,13]],"date-time":"2018-09-13T12:54:52Z","timestamp":1536843292000},"page":"295-306","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Isometry"],"prefix":"10.1145","author":[{"given":"Zhihao","family":"Jia","sequence":"first","affiliation":[{"name":"Stanford University"}]},{"given":"Sean","family":"Treichler","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"given":"Galen","family":"Shipman","sequence":"additional","affiliation":[{"name":"Los Alamos National Laboratory"}]},{"given":"Pat","family":"McCormick","sequence":"additional","affiliation":[{"name":"Los Alamos National Laboratory"}]},{"given":"Alex","family":"Aiken","sequence":"additional","affiliation":[{"name":"Stanford University"}]}],"member":"320","published-online":{"date-parts":[[2018,6,12]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"{n. d.}. CUDA Programming Guide Version 5.5. http:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/index.html. ({n. d.}).  {n. d.}. CUDA Programming Guide Version 5.5. http:\/\/docs.nvidia.com\/cuda\/cuda-c-programming-guide\/index.html. ({n. d.})."},{"key":"e_1_3_2_1_2_1","unstructured":"{n. d.}. Developing a Linux Kernel Module using GPUDirect RDMA. http:\/\/docs.nvidia.com\/cuda\/pdf\/GPUDirect_RDMA.pdf. ({n. d.}).  {n. d.}. Developing a Linux Kernel Module using GPUDirect RDMA. http:\/\/docs.nvidia.com\/cuda\/pdf\/GPUDirect_RDMA.pdf. ({n. d.})."},{"key":"e_1_3_2_1_3_1","unstructured":"{n. d.}. Lustre File System. http:\/\/www.lustre.org. ({n. d.}).  {n. d.}. Lustre File System. http:\/\/www.lustre.org. ({n. d.})."},{"key":"e_1_3_2_1_4_1","unstructured":"{n. d.}. The Message-Passing Interface. http:\/\/www.mpi-forum.org\/docs\/mpi-3.0\/mpi30-report.pdf. ({n. d.}).  {n. d.}. The Message-Passing Interface. http:\/\/www.mpi-forum.org\/docs\/mpi-3.0\/mpi30-report.pdf. ({n. d.})."},{"key":"e_1_3_2_1_5_1","unstructured":"{n. d.}. TensorFlow Performance Guide. https:\/\/www.tensorflow.org\/performance\/performance_guide. ({n. d.}).  {n. d.}. TensorFlow Performance Guide. https:\/\/www.tensorflow.org\/performance\/performance_guide. ({n. d.})."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.1631"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.5555\/2388996.2389086"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/MCSE.2013.98"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/945445.945450"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Tom Henretty Kevin Stock Louis-No\u00ebl Pouchet Franz Franchetti J. Ramanujam and P. Sadayappan. 2011. Data Layout Transformation for Stencil Computations on Short-vector SIMD Architectures (CC'11\/ETAPS' 11).   Tom Henretty Kevin Stock Louis-No\u00ebl Pouchet Franz Franchetti J. Ramanujam and P. Sadayappan. 2011. Data Layout Transformation for Stencil Computations on Short-vector SIMD Architectures (CC'11\/ETAPS' 11).","DOI":"10.1007\/978-3-642-19861-8_13"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/1345206.1345229"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/1993498.1993516"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.14778\/3157794.3157799"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2017.00043"},{"key":"e_1_3_2_1_15_1","volume-title":"Optimizing Memory Efficiency for Deep Convolutional Neural Networks on GPUs (SC '16)","author":"Li Chao","year":"2016","unstructured":"Chao Li , Yi Yang , Min Feng , Srimat Chakradhar , and Huiyang Zhou . 2016 . Optimizing Memory Efficiency for Deep Convolutional Neural Networks on GPUs (SC '16) . Chao Li, Yi Yang, Min Feng, Srimat Chakradhar, and Huiyang Zhou. 2016. Optimizing Memory Efficiency for Deep Convolutional Neural Networks on GPUs (SC '16)."},{"key":"e_1_3_2_1_17_1","volume-title":"Innovative Parallel Computing (InPar '12)","author":"Pharr Matt","year":"2012","unstructured":"Matt Pharr and William R Mark . 2012 . ISPC: A SPMD compiler for highperformance CPU programming . In Innovative Parallel Computing (InPar '12) . Matt Pharr and William R Mark. 2012. ISPC: A SPMD compiler for highperformance CPU programming. In Innovative Parallel Computing (InPar '12)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPP.2013.17"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2517349.2522715"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00450-011-0157-1"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-540-87475-1_11"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1145\/2451116.2451169"},{"key":"e_1_3_2_1_23_1","volume-title":"Very Deep Convolutional Networks for Large-Scale Image Recognition. CoRR abs\/1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman . 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. CoRR abs\/1409.1556 ( 2014 ). Karen Simonyan and Andrew Zisserman. 2014. Very Deep Convolutional Networks for Large-Scale Image Recognition. CoRR abs\/1409.1556 (2014)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.5555\/795668.796733"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.14778\/3090163.3090166"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2628071.2628084"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1007\/s00450-011-0171-3"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/2834976.2834983"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.5555\/1364813.1364815"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1007\/11846802_18"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the 30th IEEE International Parallel & Distributed Processing Symposium (IPDPS '10)","author":"Xiao S.","year":"2010","unstructured":"S. Xiao and W. c. Feng . 2010 . Inter-block GPU communication via fast barrier synchronization . In Proceedings of the 30th IEEE International Parallel & Distributed Processing Symposium (IPDPS '10) . S. Xiao and W. c. Feng. 2010. Inter-block GPU communication via fast barrier synchronization. In Proceedings of the 30th IEEE International Parallel & Distributed Processing Symposium (IPDPS '10)."},{"key":"e_1_3_2_1_32_1","volume-title":"Resilient Distributed Datasets: A Fault-tolerant Abstraction for In-memory Cluster Computing. In NSDI'12","author":"Zaharia Matei","year":"2008","unstructured":"Matei Zaharia , Mosharaf Chowdhury , Tathagata Das , Ankur Dave , Justin Ma , Murphy McCauley , Michael J. Franklin , Scott Shenker , and Ion Stoica . 2008 . Resilient Distributed Datasets: A Fault-tolerant Abstraction for In-memory Cluster Computing. In NSDI'12 . San Jose, CA. Matei Zaharia, Mosharaf Chowdhury, Tathagata Das, Ankur Dave, Justin Ma, Murphy McCauley, Michael J. Franklin, Scott Shenker, and Ion Stoica. 2008. Resilient Distributed Datasets: A Fault-tolerant Abstraction for In-memory Cluster Computing. In NSDI'12. San Jose, CA."}],"event":{"name":"ICS '18: 2018 International Conference on Supercomputing","sponsor":["SIGARCH ACM Special Interest Group on Computer Architecture"],"location":"Beijing China","acronym":"ICS '18"},"container-title":["Proceedings of the 2018 International Conference on Supercomputing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3205289.3205301","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3205289.3205301","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T01:08:55Z","timestamp":1750208935000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3205289.3205301"}},"subtitle":["A Path-Based Distributed Data Transfer System"],"short-title":[],"issued":{"date-parts":[[2018,6,12]]},"references-count":31,"alternative-id":["10.1145\/3205289.3205301","10.1145\/3205289"],"URL":"https:\/\/doi.org\/10.1145\/3205289.3205301","relation":{},"subject":[],"published":{"date-parts":[[2018,6,12]]},"assertion":[{"value":"2018-06-12","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}