{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,10]],"date-time":"2026-02-10T02:53:44Z","timestamp":1770692024219,"version":"3.49.0"},"reference-count":28,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T00:00:00Z","timestamp":1763510400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T00:00:00Z","timestamp":1763510400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["CCF Trans. HPC"],"published-print":{"date-parts":[[2026,2]]},"DOI":"10.1007\/s42514-025-00255-w","type":"journal-article","created":{"date-parts":[[2025,11,19]],"date-time":"2025-11-19T15:23:08Z","timestamp":1763565788000},"page":"1-14","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Improving scalability of sequential task flow models with cache-friendly parallel dependency tracking"],"prefix":"10.1007","volume":"8","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-3130-1868","authenticated-orcid":false,"given":"Xiran","family":"Gao","sequence":"first","affiliation":[]},{"given":"Li","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Xiaobing","family":"Feng","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,11,19]]},"reference":[{"issue":"2","key":"255_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2898348","volume":"43","author":"E Agullo","year":"2017","unstructured":"Agullo, E., Buttari, A., Guermouche, A., Lopez, F.: Implementing multifrontal sparse solvers for multicore architectures with sequential task flow runtime systems. ACM Trans. Math. Softw. 43(2), 1\u201322 (2017). https:\/\/doi.org\/10.1145\/2898348","journal-title":"ACM Trans. Math. Softw."},{"key":"255_CR2","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2017.2766064","author":"E Agullo","year":"2017","unstructured":"Agullo, E., et al.: Achieving high performance on supercomputers with a sequential task-based programming model. IEEE Trans. Parallel Distrib. Syst. (2017). https:\/\/doi.org\/10.1109\/TPDS.2017.2766064","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"255_CR3","doi-asserted-by":"publisher","unstructured":"D. \u00c1lvarez, K. Sala, M. Maro\u00f1as, A. Roca, and V. Beltran: Advanced synchronization techniques for task-based runtime systems. In: Proceedings of the 26th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 334\u2013347. ACM, Virtual Event Republic of Korea (2021). https:\/\/doi.org\/10.1145\/3437801.3441601","DOI":"10.1145\/3437801.3441601"},{"key":"255_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2020.102664","volume":"97","author":"J Bosch","year":"2020","unstructured":"Bosch, J., \u00c1lvarez, C., Jim\u00e9nez-Gonz\u00e1lez, D., Martorell, X., Ayguad\u00e9, E.: Asynchronous runtime with distributed manager for task-based programming models. Parallel Comput. 97, 102664 (2020). https:\/\/doi.org\/10.1016\/j.parco.2020.102664","journal-title":"Parallel Comput."},{"key":"255_CR5","doi-asserted-by":"publisher","unstructured":"J. Bosch, X. Tan, C. Alvarez, D. Jimenez-Gonzalez, X. Martorell, and E. Ayguade: Characterizing and improving the performance of many-core task-based parallel programming runtimes. In: 2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), Orlando\/Buena Vista, pp. 1285\u20131292. IEEE, FL, USA (2017). https:\/\/doi.org\/10.1109\/IPDPSW.2017.32","DOI":"10.1109\/IPDPSW.2017.32"},{"issue":"6","key":"255_CR6","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1109\/MCSE.2013.98","volume":"15","author":"G Bosilca","year":"2013","unstructured":"Bosilca, G., Bouteiller, A., Danalis, A., Faverge, M., Herault, T., Dongarra, J.J.: PaRSEC: exploiting heterogeneity to enhance scalability. Comput. Sci. Eng. 15(6), 36\u201345 (2013). https:\/\/doi.org\/10.1109\/MCSE.2013.98","journal-title":"Comput. Sci. Eng."},{"key":"255_CR7","doi-asserted-by":"publisher","unstructured":"C. Castes, E. Agullo, O. Aumage, and E. Saillard: Decentralized in-order execution of a sequential task-based code for shared-memory architectures. In: 2022 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp. 552\u2013561. IEEE, Lyon, France (2022). https:\/\/doi.org\/10.1109\/IPDPSW55747.2022.00095","DOI":"10.1109\/IPDPSW55747.2022.00095"},{"issue":"no. 4","key":"255_CR8","doi-asserted-by":"publisher","first-page":"309","DOI":"10.1007\/s42514-020-00047-4","volume":"2","author":"L Chen","year":"2020","unstructured":"Chen, L., Tang, S., Fu, Y., Gao, X., Guo, J., Jiang, S.: AceMesh: a structured data driven programming language for high performance computing. CCF Trans. High Perform. Comput. 2(4), 309\u2013322 (2020). https:\/\/doi.org\/10.1007\/s42514-020-00047-4","journal-title":"CCF Trans. High Perform. Comput."},{"issue":"7","key":"255_CR9","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-016-5588-7","volume":"59","author":"H Fu","year":"2016","unstructured":"Fu, H., et al.: The sunway taihulight supercomputer: system and applications. Sci. China Inf. Sci. 59(7), 072001 (2016). https:\/\/doi.org\/10.1007\/s11432-016-5588-7","journal-title":"Sci. China Inf. Sci."},{"key":"255_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2024.103124","volume":"123","author":"X Gao","year":"2025","unstructured":"Gao, X., Chen, L., Wang, H., Cui, H., Feng, X.: Scalable tasking runtime with parallelized builders for explicit message passing architectures. Parallel Comput. 123, 103124 (2025). https:\/\/doi.org\/10.1016\/j.parco.2024.103124","journal-title":"Parallel Comput."},{"key":"255_CR11","doi-asserted-by":"publisher","unstructured":"R. Hoque, T. Herault, G. Bosilca, and J. Dongarra: Dynamic task discovery in PaRSEC: a data-flow task-based runtime. In: Proceedings of the 8th Workshop on Latest Advances in Scalable Algorithms for Large-Scale Systems, pp. 1\u20138. ACM, Denver Colorado (2017) https:\/\/doi.org\/10.1145\/3148226.3148233","DOI":"10.1145\/3148226.3148233"},{"key":"255_CR12","doi-asserted-by":"publisher","unstructured":"T.-W. Huang, C.-X. Lin, G. Guo, and M. Wong: Cpp-taskflow: fast task-based parallel programming using modern C++. In: 2019 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 974\u2013983. IEEE, Rio de Janeiro, Brazil (2019). https:\/\/doi.org\/10.1109\/IPDPS.2019.00105","DOI":"10.1109\/IPDPS.2019.00105"},{"issue":"10","key":"255_CR13","first-page":"1842","volume":"42","author":"S Jiang","year":"2020","unstructured":"Jiang, S., Tang, S., Gao, X., Hua, R., Chen, L., Liu, Y.: Parallel optimization of tend_lin application on the sunway taihulight supercomputer. Comput. Eng. Sci. 42(10), 1842\u20131851 (2020)","journal-title":"Comput. Eng. Sci."},{"key":"255_CR14","doi-asserted-by":"publisher","first-page":"128","DOI":"10.1016\/j.parco.2018.06.001","volume":"77","author":"J Lin","year":"2018","unstructured":"Lin, J., Xu, Z., Cai, L., Nukada, A., Matsuoka, S.: Evaluating the SW26010 many-core processor with a micro-benchmark suite for performance optimizations. Parallel Comput. 77, 128\u2013143 (2018). https:\/\/doi.org\/10.1016\/j.parco.2018.06.001","journal-title":"Parallel Comput."},{"key":"255_CR15","doi-asserted-by":"publisher","unstructured":"X. Meng, X. Zeng, X. Chen, and X. Ye: A cache-friendly concurrent lock-free queue for efficient inter-core communication. In: 2017 IEEE 9th International Conference on Communication Software and Networks (ICCSN), pp. 538\u2013542. IEEE, Guangzhou (2017). https:\/\/doi.org\/10.1109\/ICCSN.2017.8230170","DOI":"10.1109\/ICCSN.2017.8230170"},{"key":"255_CR16","doi-asserted-by":"publisher","unstructured":"A. Morrison and Y. Afek: Fast concurrent queues for x86 processors. In: Proceedings of the 18th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 103\u2013112. (2013). https:\/\/doi.org\/10.1145\/2442516.2442527","DOI":"10.1145\/2442516.2442527"},{"key":"255_CR17","doi-asserted-by":"publisher","first-page":"444","DOI":"10.1016\/j.future.2024.05.019","volume":"159","author":"P Nookala","year":"2024","unstructured":"Nookala, P., Chard, K., Raicu, I.: X-OpenMP \u2014 extreme fine-grained tasking using lock-less work stealing. Future Gener. Comput. Syst. 159, 444\u2013458 (2024). https:\/\/doi.org\/10.1016\/j.future.2024.05.019","journal-title":"Future Gener. Comput. Syst."},{"key":"255_CR18","doi-asserted-by":"publisher","unstructured":"P. Nookala, P. Dinda, K. C. Hale, K. Chard, and I. Raicu: Enabling extremely fine-grained parallelism via scalable concurrent queues on modern many-core architectures. In: 2021 29th International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems (MASCOTS), pp. 1\u20138. IEEE, Houston, TX, USA (2021). https:\/\/doi.org\/10.1109\/MASCOTS53633.2021.9614292","DOI":"10.1109\/MASCOTS53633.2021.9614292"},{"key":"255_CR19","doi-asserted-by":"publisher","unstructured":"R. Pereira, P. Carribault, A. Roussel, and T. Gautier: Investigating dependency graph discovery impact on task-based MPI+OpenMP applications performances. In: Proceedings of the 52nd International Conference on Parallel Processing, pp. 163\u2013172. (2023). https:\/\/doi.org\/10.1145\/3605573.3605602","DOI":"10.1145\/3605573.3605602"},{"key":"255_CR20","doi-asserted-by":"crossref","unstructured":"A. Podobas, M. Brorsson, and V. Vlassov: TurboBLYSK: scheduling for improved data-driven task performance with fast dependency resolution. In: International Workshop on OpenMP, pp. 45\u201357. (2014)","DOI":"10.1007\/978-3-319-11454-5_4"},{"key":"255_CR21","doi-asserted-by":"publisher","DOI":"10.5555\/3433701.3433783","author":"E Slaughter","year":"2020","unstructured":"Slaughter, E., et al.: Task bench: a parameterized benchmark for evaluating parallel runtime performance. presented at the sc \u201920: the international conference for high performance computing, networking. Storage Anal. (2020). https:\/\/doi.org\/10.5555\/3433701.3433783","journal-title":"Storage Anal."},{"issue":"4","key":"255_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/2541228.2555316","volume":"10","author":"H Vandierendonck","year":"2013","unstructured":"Vandierendonck, H., Tzenakis, G., Nikolopoulos, D.S.: Analysis of dependence tracking algorithms for task dataflow execution. ACM Trans. Archit. Code Optim. 10(4), 1\u201324 (2013). https:\/\/doi.org\/10.1145\/2541228.2555316","journal-title":"ACM Trans. Archit. Code Optim."},{"issue":"1","key":"255_CR23","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1007\/s10766-012-0213-x","volume":"41","author":"J Wang","year":"2013","unstructured":"Wang, J., Zhang, K., Tang, X., Hua, B.: B-queue: efficient and practical queuing for fast core-to-core communication. Int. J. Parallel Program. 41(1), 137\u2013159 (2013). https:\/\/doi.org\/10.1007\/s10766-012-0213-x","journal-title":"Int. J. Parallel Program."},{"issue":"8","key":"255_CR24","doi-asserted-by":"publisher","first-page":"2325","DOI":"10.1109\/TPDS.2023.3284219","volume":"34","author":"C Yu","year":"2023","unstructured":"Yu, C., Royuela, S., Qui\u00f1ones, E.: Taskgraph: a low contention OpenMP tasking framework. IEEE Trans. Parallel Distrib. Syst. 34(8), 2325\u20132336 (2023). https:\/\/doi.org\/10.1109\/TPDS.2023.3284219","journal-title":"IEEE Trans. Parallel Distrib. Syst."},{"key":"255_CR25","doi-asserted-by":"publisher","unstructured":"C. Yu, S. Royuela, and E. Qui\u00f1ones: Enhancing openMP tasking model: performance and portability. In: OpenMP: Enabling Massive Node-Level Parallelism, in Lecture Notes in Computer Science, vol. 12870, pp. 35\u201349. Cham: Springer International Publishing (2021). https:\/\/doi.org\/10.1007\/978-3-030-85262-7_3","DOI":"10.1007\/978-3-030-85262-7_3"},{"key":"255_CR26","doi-asserted-by":"publisher","unstructured":"C. Yu, S. Royuela, and E. Qui\u00f1ones: a low overhead tasking model for openMP. In: Euro-Par 2021: Parallel Processing Workshops, in Lecture Notes in Computer Science, vol. 13098, pp. 520\u2013524. Cham: Springer International Publishing (2022). https:\/\/doi.org\/10.1007\/978-3-031-06156-1_42","DOI":"10.1007\/978-3-031-06156-1_42"},{"key":"255_CR27","doi-asserted-by":"publisher","unstructured":"C. Yu, S. Royuela, and E. Qui\u00f1ones: Enhancing heterogeneous computing through openMP and GPU graph. In: Proceedings of the 53rd International Conference on Parallel Processing, pp. 534\u2013543. ACM, Gotland Sweden (2024). https:\/\/doi.org\/10.1145\/3673038.3673050","DOI":"10.1145\/3673038.3673050"},{"issue":"6","key":"255_CR28","doi-asserted-by":"publisher","first-page":"1267","DOI":"10.3878\/j.issn.1006-9895.2009.06.13","volume":"33","author":"H Zhang","year":"2009","unstructured":"Zhang, H.: The computational scheme and the test for dynamical framework of IAP AGCM-4. Chin. J. Atmospheric Sci. 33(6), 1267\u20131285 (2009). https:\/\/doi.org\/10.3878\/j.issn.1006-9895.2009.06.13","journal-title":"Chin. J. Atmospheric Sci."}],"container-title":["CCF Transactions on High Performance Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-025-00255-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s42514-025-00255-w","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s42514-025-00255-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,9]],"date-time":"2026-02-09T08:56:09Z","timestamp":1770627369000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s42514-025-00255-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,19]]},"references-count":28,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2026,2]]}},"alternative-id":["255"],"URL":"https:\/\/doi.org\/10.1007\/s42514-025-00255-w","relation":{},"ISSN":["2524-4922","2524-4930"],"issn-type":[{"value":"2524-4922","type":"print"},{"value":"2524-4930","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,19]]},"assertion":[{"value":"25 July 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 September 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 November 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All authors certify that they have no affiliations with or involvement in any organization or entity with any financial interest or non-financial interest in the subject matter or materials discussed in this manuscript.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}}]}}