{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T12:44:10Z","timestamp":1726058650012},"publisher-location":"Cham","reference-count":31,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783030349134"},{"type":"electronic","value":"9783030349141"}],"license":[{"start":{"date-parts":[[2019,1,1]],"date-time":"2019-01-01T00:00:00Z","timestamp":1546300800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019]]},"DOI":"10.1007\/978-3-030-34914-1_10","type":"book-chapter","created":{"date-parts":[[2019,11,9]],"date-time":"2019-11-09T03:03:09Z","timestamp":1573268589000},"page":"100-111","update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Overcoming GPU Memory Capacity Limitations in Hybrid MPI Implementations of CFD"],"prefix":"10.1007","author":[{"given":"Jake","family":"Choi","sequence":"first","affiliation":[]},{"given":"Yoonhee","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Heon-young","family":"Yeom","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2019,11,10]]},"reference":[{"key":"10_CR1","doi-asserted-by":"publisher","unstructured":"Lai, J., Li, H., Tian, Z.: CPU\/GPU heterogeneous parallel CFD solver and optimizations. In: Proceedings of the 2018 International Conference on Service Robotics Technologies (ICSRT \u201918), pp. 88\u201392. ACM, New York (2018). \nhttps:\/\/doi.org\/10.1145\/3208833.3208847","DOI":"10.1145\/3208833.3208847"},{"issue":"12","key":"10_CR2","doi-asserted-by":"publisher","first-page":"2674","DOI":"10.1109\/TVCG.2014.2346458","volume":"20","author":"P Lindstrom","year":"2014","unstructured":"Lindstrom, P.: Fixed-rate compressed floating-point arrays. IEEE Trans. Vis. Comput. Graph. 20(12), 2674\u20132683 (2014). \nhttps:\/\/doi.org\/10.1109\/TVCG.2014.2346458","journal-title":"IEEE Trans. Vis. Comput. Graph."},{"key":"10_CR3","first-page":"2574","volume":"2017","author":"P Lindstrom","year":"2017","unstructured":"Lindstrom, P.: Error distributions of lossy floating-point compressors. Joint Stat. Meet. 2017, 2574\u20132589 (2017)","journal-title":"Joint Stat. Meet."},{"key":"10_CR4","doi-asserted-by":"publisher","unstructured":"Deutsch, P.: GZIP file format specification version 4.3. RFC, vol. 1952, pp. 1\u201312 (1996). \nhttps:\/\/doi.org\/10.17487\/RFC1952","DOI":"10.17487\/RFC1952"},{"key":"10_CR5","unstructured":"Bzip2 (2018). \nhttp:\/\/www.bzip.org\/"},{"issue":"5","key":"10_CR6","doi-asserted-by":"publisher","first-page":"1245","DOI":"10.1109\/TVCG.2006.143","volume":"12","author":"P Lindstrom","year":"2006","unstructured":"Lindstrom, P., Isenburg, M.: Fast and efficient compression of floating-point data. IEEE Trans. Vis. Comput. Graph. 12(5), 1245\u20131250 (2006)","journal-title":"IEEE Trans. Vis. Comput. Graph."},{"issue":"8","key":"10_CR7","doi-asserted-by":"publisher","first-page":"1857","DOI":"10.1109\/TPDS.2019.2894404","volume":"30","author":"Dingwen Tao","year":"2019","unstructured":"Tao, D., Di, S., Liang, X., Chen, Z., Cappello, F.: Optimizing lossy compression rate-distortion from automatic online selection between SZ and ZFP (2019). \nhttps:\/\/doi.org\/10.1109\/TPDS.2019.2894404","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"10_CR8","doi-asserted-by":"publisher","unstructured":"Niksiar, P., Ashrafizadeh, A., Shams, M., Madani, A.H.: Implementation of a GPU-based CFD Code. In: 2014 International Conference on Computational Science and Computational Intelligence, Las Vegas, NV, pp. 84\u201389 (2014). \nhttps:\/\/doi.org\/10.1109\/CSCI.2014.21","DOI":"10.1109\/CSCI.2014.21"},{"key":"10_CR9","doi-asserted-by":"publisher","unstructured":"Mintu, S.A., Molyneux, D.: Application of GPGPU to accelerate CFD simulation. In: ASME International Conference on Offshore Mechanics and Arctic Engineering, vol. 2: CFD and FSI ():V002T08A001. \nhttps:\/\/doi.org\/10.1115\/OMAE2018-77649","DOI":"10.1115\/OMAE2018-77649"},{"key":"10_CR10","unstructured":"NVIDIA Corp: Profiler user\u2019s guide (2017). \nhttps:\/\/docs.nvidia.com\/cuda\/profiler-users-guide\/index.html#nvprof-overview\n\n. An optional note"},{"issue":"1-2","key":"10_CR11","doi-asserted-by":"publisher","first-page":"65","DOI":"10.1007\/s00450-010-0111-7","volume":"25","author":"Michael Griebel","year":"2010","unstructured":"Griebel, M., Zaspel, P.: Comput. Sci. Res. Dev. 25, 65 (2010). \nhttps:\/\/doi.org\/10.1007\/s00450-010-0111-7","journal-title":"Computer Science - Research and Development"},{"key":"10_CR12","doi-asserted-by":"publisher","first-page":"275","DOI":"10.1016\/j.jcp.2014.08.024","volume":"278","author":"Chuanfu Xu","year":"2014","unstructured":"Xu, H., et al.: Collaborating CPU and GPU for large-scale high-order CFD simulations with complex grids on the TianHe-1A supercomputer. J. Comput. Phys. 278(C), 275\u2013297 (2013). \nhttps:\/\/doi.org\/10.1016\/j.jcp.2014.08.024","journal-title":"Journal of Computational Physics"},{"key":"10_CR13","unstructured":"Videocardbenchmark.net. PassMark Software - Video Card (GPU) Benchmark Charts (2019). \nhttps:\/\/www.videocardbenchmark.net\/\n\n. Accessed 24 May 2019"},{"key":"10_CR14","unstructured":"Cpubenchmark.net. PassMark Software - CPU Benchmark Charts (2019). \nhttps:\/\/www.cpubenchmark.net\/\n\n. Accessed 24 May 2019"},{"key":"10_CR15","unstructured":"Ark.intel.com. Intel product specifications (2019). \nhttps:\/\/ark.intel.com\/content\/www\/us\/en\/ark.html\n\n. Accessed 24 May 2019"},{"key":"10_CR16","unstructured":"Wang, Y., Malkawi, A., Yi, Y.K.: Implementing CFD (computational fluid dynamics) in OpenCL for building simulation (2011)"},{"key":"10_CR17","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1016\/j.compfluid.2018.03.011","volume":"173","author":"A. Gorobets","year":"2018","unstructured":"Gorobets, A., Soukov, S., Bogdanov, P.: Multilevel parallelization for simulating compressible turbulent flows on most kinds of hybrid supercomputers. Comput. Fluids 173 (2018). \nhttps:\/\/doi.org\/10.1016\/j.compfluid.2018.03.011","journal-title":"Computers & Fluids"},{"key":"10_CR18","doi-asserted-by":"publisher","first-page":"786","DOI":"10.1016\/j.future.2017.09.029","volume":"79","author":"G. Oyarzun","year":"2018","unstructured":"Oyarzun, G., Borrell, R., Gorobets, A., Mantovani, F., Oliva, A.: Efficient CFD code implementation for the ARM-based mont-blanc architecture. Future Gener. Comput. Syst. 79 (2017). \nhttps:\/\/doi.org\/10.1016\/j.future.2017.09.029","journal-title":"Future Generation Computer Systems"},{"key":"10_CR19","doi-asserted-by":"publisher","DOI":"10.1016\/j.compfluid.2018.03.005","author":"YX Wang","year":"2018","unstructured":"Wang, Y.X., Zhang, L.L., Liu, W., Cheng, X.H., Zhuang, Y., Chronopoulos, A.: Performance optimizations for scalable CFD applications on hybrid CPU+MIC heterogeneous computing system with millions of cores. Comput. Fluids (2018). \nhttps:\/\/doi.org\/10.1016\/j.compfluid.2018.03.005","journal-title":"Comput. Fluids"},{"key":"10_CR20","first-page":"1377","volume":"33","author":"Y Che","year":"2014","unstructured":"Che, Y., Zhang, L., Xu, C., Wang, Y., Liu, W., Wang, Z.: Optimization of a parallel CFD code and its performance evaluation on Tianhe-1A. Comput. Inf. 33, 1377\u20131399 (2014)","journal-title":"Comput. Inf."},{"key":"10_CR21","doi-asserted-by":"publisher","first-page":"199","DOI":"10.1006\/jcph.1998.5892","volume":"141","author":"B Cockburn","year":"1998","unstructured":"Cockburn, B., Shu, C.W.: The Runge-Kutta discontinuous Galerkin method for conservation laws V. J. Comput. Phys. 141, 199\u2013224 (1998)","journal-title":"J. Comput. Phys."},{"key":"10_CR22","first-page":"1005","volume":"375","author":"H You","year":"2018","unstructured":"You, H., Kim, C.: High-order multi-dimensional limiting strategy with subcell resolution I. Two-Dimension. Mixed Meshes, J. Comput. Phys. 375, 1005\u20131032 (2018)","journal-title":"Two-Dimension. Mixed Meshes, J. Comput. Phys."},{"key":"10_CR23","doi-asserted-by":"publisher","first-page":"507","DOI":"10.1016\/j.compfluid.2003.08.004","volume":"34","author":"F Bassi","year":"2005","unstructured":"Bassi, F., Crivellini, A., Rebay, S., Savini, M.: Discontinuous Galerkin solution of the Reynolds-averaged Navier-Stokes and k-$$\\omega $$ turbulence model equations. Comput. Fluids 34, 507\u2013540 (2005)","journal-title":"Comput. Fluids"},{"key":"10_CR24","unstructured":"Cohen, J., Molemaker, M.J.: A fast double precision CFD code using CUDA. Parallel Computational Fluid Dynamics: Recent Advances and Future Directions (2009)"},{"key":"10_CR25","doi-asserted-by":"publisher","unstructured":"Li, W., Jin, G., Cui, X., See, S.: An evaluation of unified memory technology on NVIDIA GPUs. In: 2015 15th IEEE\/ACM International Symposium on Cluster, Cloud and Grid Computing, Shenzhen, pp. 1092-1098 (2015). \nhttps:\/\/doi.org\/10.1109\/CCGrid.2015.105","DOI":"10.1109\/CCGrid.2015.105"},{"key":"10_CR26","unstructured":"Harris, M., Harris, M., Harris, M., Sakharnykh, N., Harris, M.: Unified memory for CUDA beginners\u2014NVIDIA developer blog. NVIDIA Developer Blog (2019). \nhttps:\/\/devblogs.nvidia.com\/unified-memory-cuda-beginners\/\n\n. Accessed 17 May 2019"},{"key":"10_CR27","unstructured":"Harris, M., Perelygin, K., Luitjens, J., Karras, T., Karras, T., Karras, T.: Cooperative groups: flexible CUDA thread programming\u2014NVIDIA developer blog. NVIDIA Developer Blog (2019). \nhttps:\/\/devblogs.nvidia.com\/cooperative-groups\/\n\n. Accessed 22 May 2019"},{"key":"10_CR28","unstructured":"Oteski, L., Colin de Verdiere, G., Contassot-Vivier, S., Vialle, S., Ryan, J.: Towards a unified CPU-GPU code hybridization: a GPU based optimization strategy efficient on other modern architectures (2018)"},{"key":"10_CR29","doi-asserted-by":"publisher","unstructured":"Karypis, G., Kumar, V.: Parallel multilevel k-way partitioning scheme for irregular graphs. In: Proceedings of the 1996 ACM\/IEEE Conference on Supercomputing (CDROM), Ser. Supercomputing \u201996. IEEE Computer Society, Washington, DC, USA (1996). \nhttps:\/\/doi.org\/10.1145\/369028.369103","DOI":"10.1145\/369028.369103"},{"key":"10_CR30","unstructured":"NVIDIA: NVIDIA CUBLAS Library (2019). \nhttps:\/\/developer.nvidia.com\/cublas"},{"key":"10_CR31","unstructured":"Larsen, M.: mclarsen\/cuZFP. GitHub (2019). \nhttps:\/\/github.com\/mclarsen\/cuZFP\n\n. Accessed 22 May 2019"}],"container-title":["Lecture Notes in Computer Science","Internet and Distributed Computing Systems"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-030-34914-1_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,11,9]],"date-time":"2019-11-09T03:04:11Z","timestamp":1573268651000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-030-34914-1_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019]]},"ISBN":["9783030349134","9783030349141"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-030-34914-1_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2019]]},"assertion":[{"value":"10 November 2019","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"IDCS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Internet and Distributed Computing Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Naples","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2019","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"10 October 2019","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12 October 2019","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"12","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"idcs2019","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/idcs2019.uniparthenope.it\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Double-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"EasyChair","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"145","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"47","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"32% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"2","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"No","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}