{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,8]],"date-time":"2025-04-08T05:45:40Z","timestamp":1744091140019,"version":"3.37.3"},"reference-count":51,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"},{"start":{"date-parts":[[2024,12,23]],"date-time":"2024-12-23T00:00:00Z","timestamp":1734912000000},"content-version":"vor","delay-in-days":22,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0"}],"funder":[{"DOI":"10.13039\/501100012165","name":"Key Technologies Research and Development Program","doi-asserted-by":"publisher","award":["2022YFC2803805","2021YFF0704000"],"award-info":[{"award-number":["2022YFC2803805","2021YFF0704000"]}],"id":[{"id":"10.13039\/501100012165","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012476","name":"Fundamental Research Funds for Central Universities of the Central South University","doi-asserted-by":"publisher","award":["202313035"],"award-info":[{"award-number":["202313035"]}],"id":[{"id":"10.13039\/501100012476","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007129","name":"Natural Science Foundation of Shandong Province","doi-asserted-by":"publisher","award":["ZR2021QF124"],"award-info":[{"award-number":["ZR2021QF124"]}],"id":[{"id":"10.13039\/501100007129","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100020196","name":"Shandong Provincial Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2021M703031"],"award-info":[{"award-number":["2021M703031"]}],"id":[{"id":"10.13039\/501100020196","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Science Foundation of China","doi-asserted-by":"crossref","award":["62036010"],"award-info":[{"award-number":["62036010"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/100022963","name":"Key Research and Development Program of Zhejiang Province","doi-asserted-by":"publisher","award":["2022C03126"],"award-info":[{"award-number":["2022C03126"]}],"id":[{"id":"10.13039\/100022963","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["CCF Trans. HPC"],"published-print":{"date-parts":[[2024,12]]},"abstract":"<jats:title>Abstract<\/jats:title>\n          <jats:p>We present <jats:italic>swRender<\/jats:italic>, a new parallel rendering pipeline based on the new Sunway many-core architecture (SW26010P) for the Monte Carlo path-tracing algorithm. Previous parallel rendering schemes are unsuitable for our task due to issues such as vast differences in hardware architectures and bottlenecks in I\/O communication efficiency. To that end, we create a new two-level parallel tile rendering framework to fully utilize the Sunway computing resources, a practical tile-grouping load-balancing method to maintain the framework\u2019s stability, and a novel many-core acceleration optimization to improve the rendering performance at the pixel level. Our method achieves (1) an average speedup of 16x in multiple benchmarks when compared to the baseline path-tracing model on the Sunway architecture, and (2) an average speedup of 2x when compared to state-of-the-art CPU, co-processor, and GPU-based parallel rendering approaches. Moreover, we scale swRender to run on 15 million cores and obtain high scalable parallel efficiency of 92%.<\/jats:p>","DOI":"10.1007\/s42514-024-00196-w","type":"journal-article","created":{"date-parts":[[2024,12,23]],"date-time":"2024-12-23T04:36:19Z","timestamp":1734928579000},"page":"566-587","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Heterogeneous many-core optimization for Monte Carlo path-tracing on new generation Sunway HPC system"],"prefix":"10.1007","volume":"6","author":[{"given":"Xinjie","family":"Wang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guanghao","family":"Ma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiaying","family":"Song","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mingyao","family":"Geng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wenhui","family":"Hu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xi","family":"Duan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhigang","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jiali","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaogang","family":"Jin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fang","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dexun","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7253-4947","authenticated-orcid":false,"given":"Maoxue","family":"Yu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,12,23]]},"reference":[{"key":"196_CR1","doi-asserted-by":"publisher","unstructured":"Aila, T., Laine, S.: Understanding the efficiency of ray traversal on GPUs. In: Proceedings of the Conference on High Performance Graphics 2009, HPG \u201909, pp. 145\u2013149. Association for Computing Machinery, New York (2009). https:\/\/doi.org\/10.1145\/1572769.1572792","DOI":"10.1145\/1572769.1572792"},{"key":"196_CR2","doi-asserted-by":"publisher","unstructured":"Buades, A., Coll, B., Morel, J.M.: A non-local algorithm for image denoising. In: 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR\u201905), pp. 60\u201365 (2005). https:\/\/doi.org\/10.1109\/CVPR.2005.38","DOI":"10.1109\/CVPR.2005.38"},{"issue":"2","key":"196_CR3","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1109\/MM.2020.2971677","volume":"40","author":"J Burgess","year":"2020","unstructured":"Burgess, J.: Rtx on-the Nvidia turing GPU. IEEE Micro 40(2), 36\u201344 (2020). https:\/\/doi.org\/10.1109\/MM.2020.2971677","journal-title":"IEEE Micro"},{"issue":"12","key":"196_CR4","doi-asserted-by":"publisher","first-page":"4752","DOI":"10.1109\/TPDS.2022.3202518","volume":"33","author":"X Chen","year":"2022","unstructured":"Chen, X., Gao, Y., Shang, H., et al.: Increasing the efficiency of massively parallel sparse matrix-matrix multiplication in first-principles calculation on the new-generation Sunway supercomputer. IEEE Trans. Parallel Distrib. Syst. 33(12), 4752\u20134766 (2022). https:\/\/doi.org\/10.1109\/TPDS.2022.3202518","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"196_CR5","doi-asserted-by":"publisher","unstructured":"Cuomo, S., De\u00a0Michele, P., Galletti, A., et\u00a0al.: A GPU-parallel algorithm for ECG signal denoising based on the NLM method. In: 2016 30th International Conference on Advanced Information Networking and Applications Workshops (WAINA), pp. 35\u201339 (2016). https:\/\/doi.org\/10.1109\/WAINA.2016.110","DOI":"10.1109\/WAINA.2016.110"},{"key":"196_CR6","doi-asserted-by":"publisher","unstructured":"Das, A., Mueller, F., Rountree, B. Systemic assessment of node failures in HPC production platforms. In: 2021 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 267\u2013276 (2021). https:\/\/doi.org\/10.1109\/IPDPS49936.2021.00035","DOI":"10.1109\/IPDPS49936.2021.00035"},{"issue":"4","key":"196_CR7","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3104067","volume":"50","author":"Y Deng","year":"2017","unstructured":"Deng, Y., Ni, Y., Li, Z., et al.: Toward real-time ray tracing: a survey on hardware acceleration and microarchitecture techniques. ACM Comput. Surv. 50(4), 1\u201341 (2017). https:\/\/doi.org\/10.1145\/3104067","journal-title":"ACM Comput. Surv."},{"key":"196_CR8","doi-asserted-by":"publisher","unstructured":"Duan, X., Gao, P., Zhang, T., et\u00a0al.: Redesigning lammps for peta-scale and hundred-billion-atom simulation on Sunway taihulight. In: SC18: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 148\u2013159 (2018). https:\/\/doi.org\/10.1109\/SC.2018.00015","DOI":"10.1109\/SC.2018.00015"},{"issue":"2","key":"196_CR9","doi-asserted-by":"publisher","first-page":"1292","DOI":"10.1109\/TVCG.2018.2870822","volume":"26","author":"S Eilemann","year":"2020","unstructured":"Eilemann, S., Steiner, D., Pajarola, R.: Equalizer 2.0-convergence of a parallel rendering framework. IEEE Trans. Visual Comput. Graph. 26(2), 1292\u20131307 (2020). https:\/\/doi.org\/10.1109\/TVCG.2018.2870822","journal-title":"IEEE Trans. Visual Comput. Graph."},{"key":"196_CR10","doi-asserted-by":"crossref","unstructured":"Fang, J., Fu, H., Zhao, W., et\u00a0al.: swdnn: a library for accelerating deep learning applications on Sunway taihulight. In: 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 615\u2013624 (2017). https:\/\/api.semanticscholar.org\/CorpusID:20614526","DOI":"10.1109\/IPDPS.2017.20"},{"key":"196_CR11","doi-asserted-by":"publisher","unstructured":"Govindaraju, V., Djeu, P., Sankaralingam, K., et\u00a0al.: Toward a multicore architecture for real-time ray-tracing. In: 2008 41st IEEE\/ACM International Symposium on Microarchitecture, pp. 176\u2013187 (2008). https:\/\/doi.org\/10.1109\/MICRO.2008.4771789","DOI":"10.1109\/MICRO.2008.4771789"},{"key":"196_CR12","doi-asserted-by":"publisher","first-page":"105827","DOI":"10.1109\/ACCESS.2019.2932151","volume":"7","author":"Y Hu","year":"2019","unstructured":"Hu, Y., Wang, W., Li, D., et al.: Parallel BVH construction using locally density clustering. IEEE Access 7, 105827\u2013105839 (2019). https:\/\/doi.org\/10.1109\/ACCESS.2019.2932151","journal-title":"IEEE Access"},{"key":"196_CR13","doi-asserted-by":"publisher","first-page":"136421","DOI":"10.1109\/ACCESS.2020.3011685","volume":"8","author":"S Huang","year":"2020","unstructured":"Huang, S., Zhou, G., He, M., et al.: Detection of peach disease image based on asymptotic non-local means and PCNN-IPELM. IEEE Access 8, 136421\u2013136433 (2020). https:\/\/doi.org\/10.1109\/ACCESS.2020.3011685","journal-title":"IEEE Access"},{"key":"196_CR14","doi-asserted-by":"publisher","unstructured":"Jaros, M., Riha, L., Karasek, T., et\u00a0al.: Rendering in blender cycles using mpi and intel\u00ae xeon phi\u2122. In: Proceedings of the 2017 International Conference on Computer Graphics and Digital Image Processing, pp. 1\u20135 (2017). https:\/\/doi.org\/10.1145\/3110224.3110236","DOI":"10.1145\/3110224.3110236"},{"issue":"2","key":"196_CR15","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3447807","volume":"40","author":"M Jaro\u0161","year":"2021","unstructured":"Jaro\u0161, M., \u0158\u00edha, L., Strako\u0161, P., et al.: GPU accelerated path tracing of massive scenes. ACM Trans. Graph. 40(2), 1\u201317 (2021). https:\/\/doi.org\/10.1145\/3447807","journal-title":"ACM Trans. Graph."},{"key":"196_CR16","doi-asserted-by":"publisher","DOI":"10.1007\/s42514-022-00095-y","author":"L Kai","year":"2022","unstructured":"Kai, L., Yaohua, W., Yang, G., et al.: Mt-3000: a heterogeneous multi-zone processor for HPC. CCF Trans. High Perform. Comput. (2022). https:\/\/doi.org\/10.1007\/s42514-022-00095-y","journal-title":"CCF Trans. High Perform. Comput."},{"key":"196_CR17","doi-asserted-by":"publisher","unstructured":"Kajiya, J.T.: The rendering equation. In: Proceedings of the 13th Annual Conference on Computer Graphics and Interactive Techniques, pp. 143\u2013150. Association for Computing Machinery, New York (1986). https:\/\/doi.org\/10.1145\/15886.15902","DOI":"10.1145\/15886.15902"},{"key":"196_CR18","doi-asserted-by":"publisher","unstructured":"Karras, T., Aila, T.: Fast parallel construction of high-quality bounding volume hierarchies. In: Proceedings of the 5th High-Performance Graphics Conference, HPG \u201913, pp. 89-99. Association for Computing Machinery, New York (2013). https:\/\/doi.org\/10.1145\/2492045.2492055","DOI":"10.1145\/2492045.2492055"},{"key":"196_CR19","doi-asserted-by":"publisher","unstructured":"Kim, Y., Park, N.W.: Comparison of regression models for spatial downscaling of coarse scale satellite-based precipitation products. In: 2017 IEEE International Geoscience and Remote Sensing Symposium (IGARSS), pp. 4634\u20134637 (2017). https:\/\/doi.org\/10.1109\/IGARSS.2017.8128033","DOI":"10.1109\/IGARSS.2017.8128033"},{"key":"196_CR20","doi-asserted-by":"publisher","unstructured":"Laine, S., Karras, T., Aila, T.: Megakernels considered harmful: wavefront path tracing on GPUs. In: Proceedings of the 5th High-Performance Graphics Conference, HPG \u201913, pp. 137\u2013143. Association for Computing Machinery, New York (2013). https:\/\/doi.org\/10.1145\/2492045.2492060","DOI":"10.1145\/2492045.2492060"},{"key":"196_CR21","doi-asserted-by":"publisher","unstructured":"Lee, W.J., Hwang, S.J., Shin, Y., et\u00a0al.: Fast stereoscopic rendering on mobile ray tracing GPU for virtual reality applications. In: 2017 IEEE International Conference on Consumer Electronics (ICCE), pp. 355\u2013357 (2017). https:\/\/doi.org\/10.1109\/ICCE.2017.7889353","DOI":"10.1109\/ICCE.2017.7889353"},{"key":"196_CR23","doi-asserted-by":"publisher","unstructured":"Li, Z., Deng, Y., Gu, M.: Path compression kd-trees with multi-layer parallel construction a case study on ray tracing. In: Proceedings of the 21st ACM SIGGRAPH Symposium on Interactive 3D Graphics and Games, I3D \u201917, pp 1\u20138. Association for Computing Machinery, New York (2017). https:\/\/doi.org\/10.1145\/3023368.3023382","DOI":"10.1145\/3023368.3023382"},{"key":"196_CR22","doi-asserted-by":"publisher","unstructured":"Li, Q., Wu, W., Yuan, D., et\u00a0al.: A workload-aware load balancing algorithm for cluster rendering platform. In: 2020 IEEE 23rd International Conference on Computational Science and Engineering (CSE), pp. 50\u201357 (2020). https:\/\/doi.org\/10.1109\/CSE50738.2020.00015","DOI":"10.1109\/CSE50738.2020.00015"},{"issue":"2","key":"196_CR25","doi-asserted-by":"publisher","first-page":"270","DOI":"10.1007\/s11390-008-9129-8","volume":"23","author":"YL Liu","year":"2008","unstructured":"Liu, Y.L., Wang, J., Chen, X., et al.: A robust and fast non-local means algorithm for image denoising. J. Comput. Sci. Technol. 23(2), 270\u2013279 (2008). https:\/\/doi.org\/10.1007\/s11390-008-9129-8","journal-title":"J. Comput. Sci. Technol."},{"issue":"4","key":"196_CR24","doi-asserted-by":"publisher","first-page":"975","DOI":"10.1109\/TPDS.2020.3037082","volume":"32","author":"X Liu","year":"2021","unstructured":"Liu, X., Sun, J., Zheng, L., et al.: Parallelization and optimization of NSGA-II on Sunway taihulight system. IEEE Trans. Parallel Distrib. Syst. 32(4), 975\u2013987 (2021). https:\/\/doi.org\/10.1109\/TPDS.2020.3037082","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"196_CR26","doi-asserted-by":"publisher","unstructured":"Lu\u00fc, Y., Huang, L., Shen, L., et\u00a0al.: Unleashing the power of GPU for physically-based rendering via dynamic ray shuffling. In: Proceedings of the 50th Annual IEEE\/ACM International Symposium on Microarchitecture, pp. 560\u2013573 (2017). https:\/\/doi.org\/10.1145\/3123939.3124532","DOI":"10.1145\/3123939.3124532"},{"key":"196_CR27","doi-asserted-by":"publisher","unstructured":"Ma, Z., He, J., Qiu, J., et\u00a0al.: Bagualu: targeting brain scale pretrained models with over 37 million cores. In: Proceedings of the 27th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, PPoPP \u201922, pp. 192\u2013204. Association for Computing Machinery, New York (2022). https:\/\/doi.org\/10.1145\/3503221.3508417","DOI":"10.1145\/3503221.3508417"},{"key":"196_CR28","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1016\/j.neucom.2015.08.117","volume":"195","author":"X Mingliang","year":"2016","unstructured":"Mingliang, X., Pei, L., Mingyuan, L., et al.: Medical image denoising by parallel non-local means. Neurocomputing 195, 117\u2013122 (2016). https:\/\/doi.org\/10.1016\/j.neucom.2015.08.117","journal-title":"Neurocomputing"},{"key":"196_CR29","doi-asserted-by":"publisher","DOI":"10.1109\/TVCG.2022.3209418","author":"N Morrical","year":"2022","unstructured":"Morrical, N., Sahistan, A., G\u00fcd\u00fckbay, U., et al.: Quick clusters: a GPU-parallel partitioning for efficient path tracing of unstructured volumetric grids. IEEE Trans. Vis. Comput. Graph. (2022). https:\/\/doi.org\/10.1109\/TVCG.2022.3209418","journal-title":"IEEE Trans. Vis. Comput. Graph."},{"key":"196_CR30","doi-asserted-by":"publisher","unstructured":"Nguyen-Cong, K., Willman, J.T., Moore, S.G., et\u00a0al.: Billion atom molecular dynamics simulations of carbon at extreme conditions and experimental time and length scales. In: SC21: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 1\u201312 (2021). https:\/\/doi.org\/10.1145\/3458817.3487400","DOI":"10.1145\/3458817.3487400"},{"issue":"4","key":"196_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1778765.1778803","volume":"29","author":"SG Parker","year":"2010","unstructured":"Parker, S.G., Bigler, J., Dietrich, A., et al.: Optix: a general purpose ray tracing engine. ACM Trans. Graph. 29(4), 1\u201313 (2010). https:\/\/doi.org\/10.1145\/1778765.1778803","journal-title":"ACM Trans. Graph."},{"key":"196_CR32","doi-asserted-by":"publisher","unstructured":"Purcell, T.J., Buck, I., Mark, W.R., et\u00a0al.: Ray tracing on programmable graphics hardware. In: ACM SIGGRAPH 2005 Courses, SIGGRAPH \u201905, p 268-es. Association for Computing Machinery, New York (2005).https:\/\/doi.org\/10.1145\/1198555.1198798","DOI":"10.1145\/1198555.1198798"},{"key":"196_CR33","doi-asserted-by":"publisher","unstructured":"Ristovski, A., Gusev, M., Ristov, S.: Nested parallelism concepts of ray tracing algorithms and multithreading API performance analysis. In: 2016 24th Telecommunications Forum (TELFOR), pp. 1\u20134 (2016). https:\/\/doi.org\/10.1109\/TELFOR.2016.7818932","DOI":"10.1109\/TELFOR.2016.7818932"},{"key":"196_CR34","doi-asserted-by":"publisher","unstructured":"Saed, M., Chou, Y.H., Liu, L., et\u00a0al.: Vulkan-sim: a GPU architecture simulator for ray tracing. In: 2022 55th IEEE\/ACM International Symposium on Microarchitecture (MICRO), pp. 263\u2013281 (2022). https:\/\/doi.org\/10.1109\/MICRO56248.2022.00027","DOI":"10.1109\/MICRO56248.2022.00027"},{"key":"196_CR35","doi-asserted-by":"publisher","unstructured":"Shang, H., Li, F., Zhang, Y., et\u00a0al.: Extreme-scale ab initio quantum Raman spectra simulations on the leadership HPC system in China. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC \u201921, pp 1\u201313. Association for Computing Machinery, New York (2021). https:\/\/doi.org\/10.1145\/3458817.3487402","DOI":"10.1145\/3458817.3487402"},{"issue":"12","key":"196_CR36","doi-asserted-by":"publisher","first-page":"1802","DOI":"10.1109\/TCAD.2009.2028981","volume":"28","author":"J Spjut","year":"2009","unstructured":"Spjut, J., Kensler, A., Kopta, D., et al.: Trax: a multicore hardware architecture for real-time ray tracing. IEEE Trans. Comput. Aided Des. Integr. Circuits Syst. 28(12), 1802\u20131815 (2009). https:\/\/doi.org\/10.1109\/TCAD.2009.2028981","journal-title":"IEEE Trans. Comput. Aided Des. Integr. Circuits Syst."},{"key":"196_CR37","doi-asserted-by":"publisher","unstructured":"Vasiou, E., Shkurko, K., Brunvand, E., et\u00a0al.: Mach-rt: a many chip architecture for ray tracing. In: High Performance Graphics (Short Papers), pp. 1\u20136 (2019). https:\/\/doi.org\/10.2312\/hpg.20191188","DOI":"10.2312\/hpg.20191188"},{"key":"196_CR38","unstructured":"Veach, E.: Robust Monte Carlo Methods for Light Transport Simulation. Stanford University, Stanford, CA, USA, aAI9837162 (1998)"},{"issue":"3","key":"196_CR39","doi-asserted-by":"publisher","first-page":"277","DOI":"10.1109\/LSP.2009.2038956","volume":"17","author":"R Vignesh","year":"2010","unstructured":"Vignesh, R., Oh, B.T., Kuo, C.C.J.: Fast non-local means (NLM) computation with probabilistic early termination. IEEE Signal Process. Lett. 17(3), 277\u2013280 (2010). https:\/\/doi.org\/10.1109\/LSP.2009.2038956","journal-title":"IEEE Signal Process. Lett."},{"issue":"1","key":"196_CR40","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1109\/TVCG.2010.251","volume":"18","author":"I Wald","year":"2012","unstructured":"Wald, I.: Fast construction of SAH BVHs on the intel many integrated core (MIC) architecture. IEEE Trans. Visual Comput. Graph. 18(1), 47\u201357 (2012). https:\/\/doi.org\/10.1109\/TVCG.2010.251","journal-title":"IEEE Trans. Visual Comput. Graph."},{"issue":"3","key":"196_CR41","doi-asserted-by":"publisher","first-page":"30-1","DOI":"10.1145\/3543861","volume":"5","author":"I Wald","year":"2022","unstructured":"Wald, I., Parker, S.G.: Data parallel path tracing with object hierarchies. Proc. ACM Comput. Graph Interact. Tech. 5(3), 30\u20131 (2022). https:\/\/doi.org\/10.1145\/3543861","journal-title":"Proc. ACM Comput. Graph Interact. Tech."},{"key":"196_CR42","doi-asserted-by":"publisher","unstructured":"Wald, I., Benthin, C., Slusallek, P.: Distributed interactive ray tracing of dynamic scenes. In: IEEE Symposium on Parallel and Large-Data Visualization and Graphics, 2003. PVG 2003, pp. 77\u201385 (2003). https:\/\/doi.org\/10.1109\/PVGS.2003.1249045","DOI":"10.1109\/PVGS.2003.1249045"},{"issue":"4","key":"196_CR43","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2601097.2601199","volume":"33","author":"I Wald","year":"2014","unstructured":"Wald, I., Woop, S., Benthin, C., et al.: Embree: a kernel framework for efficient CPU ray tracing. ACM Trans. Graph. 33(4), 1\u20138 (2014). https:\/\/doi.org\/10.1145\/2601097.2601199","journal-title":"ACM Trans. Graph."},{"issue":"1","key":"196_CR44","doi-asserted-by":"publisher","first-page":"931","DOI":"10.1109\/TVCG.2016.2599041","volume":"23","author":"I Wald","year":"2016","unstructured":"Wald, I., Johnson, G., Amstutz, J., et al.: Ospray\u2014a CPU ray tracing framework for scientific visualization. IEEE Trans. Visual Comput. Graph. 23(1), 931\u2013940 (2016). https:\/\/doi.org\/10.1109\/TVCG.2016.2599041","journal-title":"IEEE Trans. Visual Comput. Graph."},{"issue":"4","key":"196_CR46","doi-asserted-by":"publisher","first-page":"600","DOI":"10.1109\/TIP.2003.819861","volume":"13","author":"Z Wang","year":"2004","unstructured":"Wang, Z., Bovik, A., Sheikh, H., et al.: Image quality assessment: from error visibility to structural similarity. IEEE Trans. Image Process. 13(4), 600\u2013612 (2004). https:\/\/doi.org\/10.1109\/TIP.2003.819861","journal-title":"IEEE Trans. Image Process."},{"key":"196_CR45","doi-asserted-by":"publisher","unstructured":"Wang, J., Guo, Y., Ying, Y., et\u00a0al.: Fast non-local algorithm for image denoising. In: 2006 International Conference on Image Processing, pp. 1429\u20131432 (2006). https:\/\/doi.org\/10.1109\/ICIP.2006.312698","DOI":"10.1109\/ICIP.2006.312698"},{"key":"196_CR47","doi-asserted-by":"publisher","unstructured":"Xie, F., Mishchuk, P., Hunt, W.: Real time cluster path tracing. In: SIGGRAPH Asia 2021 Technical Communications, SA \u201921 Technical Communications. Association for Computing Machinery, New York (2021). https:\/\/doi.org\/10.1145\/3478512.3488605","DOI":"10.1145\/3478512.3488605"},{"key":"196_CR49","doi-asserted-by":"publisher","first-page":"214","DOI":"10.1016\/j.cag.2019.05.009","volume":"82","author":"X Xu","year":"2019","unstructured":"Xu, X., Wang, B., Wang, L., et al.: A task and data balanced distributed photon mapping method. Comput. Graph. 82, 214\u2013221 (2019). https:\/\/doi.org\/10.1016\/j.cag.2019.05.009","journal-title":"Comput. Graph."},{"key":"196_CR48","doi-asserted-by":"publisher","unstructured":"Xu, J., Fu, J., Gan, L., et\u00a0al.: Accelerating cryo-em reconstruction of relion on the new Sunway supercomputer. In: 2022 IEEE Intl Conf on Parallel & Distributed Processing with Applications, Big Data & Cloud Computing, Sustainable Computing & Communications, Social Computing & Networking (ISPA\/BDCloud\/SocialCom\/SustainCom), pp. 129\u2013138 (2022). https:\/\/doi.org\/10.1109\/ISPA-BDCloud-SocialCom-SustainCom57177.2022.00024","DOI":"10.1109\/ISPA-BDCloud-SocialCom-SustainCom57177.2022.00024"},{"key":"196_CR50","doi-asserted-by":"publisher","unstructured":"Ylitie, H., Karras, T., Laine, S.: Efficient incoherent ray traversal on GPUs through compressed wide BVHs. In: Proceedings of High Performance Graphics, HPG \u201917, pp. 1\u201313 (2017). https:\/\/doi.org\/10.1145\/3105762.3105773","DOI":"10.1145\/3105762.3105773"},{"key":"196_CR51","doi-asserted-by":"publisher","unstructured":"Zhu, Q., Luo, H., Yang, C., et\u00a0al.: Enabling and scaling the HPCG benchmark on the newest generation Sunway supercomputer with 42 million heterogeneous cores. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC \u201921, pp. 1\u201313 (2021). https:\/\/doi.org\/10.1145\/3458817.3476158","DOI":"10.1145\/3458817.3476158"}],"container-title":["CCF Transactions on High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-024-00196-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42514-024-00196-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-024-00196-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,22]],"date-time":"2025-01-22T06:51:05Z","timestamp":1737528665000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42514-024-00196-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12]]},"references-count":51,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2024,12]]}},"alternative-id":["196"],"URL":"https:\/\/doi.org\/10.1007\/s42514-024-00196-w","relation":{},"ISSN":["2524-4922","2524-4930"],"issn-type":[{"type":"print","value":"2524-4922"},{"type":"electronic","value":"2524-4930"}],"subject":[],"published":{"date-parts":[[2024,12]]},"assertion":[{"value":"9 March 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 August 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"On behalf of all authors, the corresponding author states that there is no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}