{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:38:16Z","timestamp":1740123496583,"version":"3.37.3"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2021,8,23]],"date-time":"2021-08-23T00:00:00Z","timestamp":1629676800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,8,23]],"date-time":"2021-08-23T00:00:00Z","timestamp":1629676800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100004410","name":"T\u00fcrkiye Bilimsel ve Teknolojik Ara\u015ftirma Kurumu","doi-asserted-by":"publisher","award":["119E011"],"award-info":[{"award-number":["119E011"]}],"id":[{"id":"10.13039\/501100004410","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"published-print":{"date-parts":[[2022,2]]},"DOI":"10.1007\/s11227-021-04026-6","type":"journal-article","created":{"date-parts":[[2021,8,23]],"date-time":"2021-08-23T15:06:03Z","timestamp":1629731163000},"page":"4095-4130","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Regional soft error vulnerability and error propagation analysis for GPGPU applications"],"prefix":"10.1007","volume":"78","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8310-1143","authenticated-orcid":false,"given":"I\u015f\u0131l","family":"\u00d6z","sequence":"first","affiliation":[]},{"given":"\u00d6mer Faruk","family":"Karada\u015f","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,8,23]]},"reference":[{"key":"4026_CR1","unstructured":"Implementing-graphcoloring-on-gpu (2020). https:\/\/github.com\/cemsakizci\/Implementing-graphColoring-on-GPU"},{"key":"4026_CR2","unstructured":"Nvidia, cuda-gdb (2020). https:\/\/developer.nvidia.com\/cuda-gdb"},{"key":"4026_CR3","doi-asserted-by":"crossref","unstructured":"Aamodt TM, Fung WWL, Rogers TG, Martonosi M (2018) General-purpose graphics processor architecture","DOI":"10.2200\/S00848ED1V01Y201804CAC044"},{"key":"4026_CR4","doi-asserted-by":"crossref","unstructured":"Anwer AR, Li G, Pattabiraman K, Sullivan M, Tsai T, Hari SKS (2020) Gpu-trident: efficient modeling of error propagation in gpu programs. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC \u201920","DOI":"10.1109\/SC41405.2020.00092"},{"key":"4026_CR5","doi-asserted-by":"publisher","unstructured":"Arslan S, Unsal O (2021) Efficient selective replication of critical code regions for sdc mitigation leveraging redundant multithreading. Journal of Supercomputing pp. 1. https:\/\/doi.org\/10.1007\/s11227-021-03804-6","DOI":"10.1007\/s11227-021-03804-6"},{"key":"4026_CR6","doi-asserted-by":"crossref","unstructured":"Bakhoda A, Yuan GL, Fung WWL, Wong H, Aamodt TM (2009) Analyzing cuda workloads using a detailed gpu simulator. In: International Symposium on Performance Analysis of Systems and Software","DOI":"10.1109\/ISPASS.2009.4919648"},{"key":"4026_CR7","doi-asserted-by":"crossref","unstructured":"Borodin D, Juurlink BH (2010) Protective redundancy overhead reduction using instruction vulnerability factor. Proceedings of the 7th ACM International Conference on Computing Frontiers (CF)","DOI":"10.1145\/1787275.1787342"},{"key":"4026_CR8","doi-asserted-by":"publisher","unstructured":"Cini N, Yalcin G (2020) A methodology for comparing the reliability of gpu-based and cpu-based hpcs. ACM Comput Surv 53(1). https:\/\/doi.org\/10.1145\/3372790","DOI":"10.1145\/3372790"},{"issue":"6","key":"4026_CR9","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1109\/2.386985","volume":"28","author":"JA Clark","year":"1995","unstructured":"Clark JA, Pradhan DK (1995) Fault injection: a method for validating computer-system dependability. Computer 28(6):47\u201356","journal-title":"Computer"},{"key":"4026_CR10","doi-asserted-by":"publisher","unstructured":"Cook S (2013) Chapter 9 - optimizing your application. In: S.\u00a0Cook (ed.) CUDA Programming, Applications of GPU Computing Series, pp. 305 \u2013 440. Morgan Kaufmann, Boston. https:\/\/doi.org\/10.1016\/B978-0-12-415933-4.00009-0. http:\/\/www.sciencedirect.com\/science\/article\/pii\/B9780124159334000090","DOI":"10.1016\/B978-0-12-415933-4.00009-0"},{"key":"4026_CR11","doi-asserted-by":"crossref","unstructured":"Davis TA, Hu Y (2011) The university of florida sparse matrix collection. ACM Trans Math Softw 38(1)","DOI":"10.1145\/2049662.2049663"},{"key":"4026_CR12","doi-asserted-by":"crossref","unstructured":"Dimitrov M, Mantor M, Zhou H (2009) Understanding software approaches for gpgpu reliability. In: Proceedings of 2nd Workshop on General Purpose Processing on Graphics Processing Units, GPGPU-2, p. 94\u2013104","DOI":"10.1145\/1513895.1513907"},{"key":"4026_CR13","doi-asserted-by":"crossref","unstructured":"Fang B, Pattabiraman K, Ripeanu M, Gurumurthi S (2014) Gpu-qin: A methodology for evaluating the error resilience of gpgpu applications. IEEE International Symposium on Performance Analysis of Systems and Software (ISPASS)","DOI":"10.1109\/ISPASS.2014.6844486"},{"issue":"12","key":"4026_CR14","doi-asserted-by":"publisher","first-page":"3397","DOI":"10.1109\/TPDS.2016.2517633","volume":"27","author":"B Fang","year":"2016","unstructured":"Fang B, Pattabiraman K, Ripeanu M, Gurumurthi S (2016) A systematic methodology for evaluating the error resilience of gpgpu applications. IEEE Transac Parallel Distrib Syst 27(12):3397\u20133411","journal-title":"IEEE Transac Parallel Distrib Syst"},{"key":"4026_CR15","doi-asserted-by":"crossref","unstructured":"Feng S, Gupta S, Ansari A, Mahlke S (2010) Shoestring: probabilistic soft error reliability on the cheap. Proceedings of the Fifteenth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS), p. 385\u2013396","DOI":"10.1145\/1735971.1736063"},{"key":"4026_CR16","doi-asserted-by":"crossref","unstructured":"Fernandes dos Santos F, Lunardi C, Oliveira D, Libano F, Rech P (2019) Reliability evaluation of mixed-precision architectures. IEEE International Symposium on High Performance Computer Architecture (HPCA)","DOI":"10.1109\/HPCA.2019.00041"},{"key":"4026_CR17","doi-asserted-by":"crossref","unstructured":"Grauer-Gray S, Xu L, Searles R, Ayalasomayajula S, Cavazos J (2012) Auto-tuning a high-level language targeted to gpu codes. 2012 Innovative Parallel Computing (InPar)","DOI":"10.1109\/InPar.2012.6339595"},{"key":"4026_CR18","doi-asserted-by":"publisher","unstructured":"Gupta M, Lowell D, Kalamatianos J, Raasch S, Sridharan V, Tullsen D, Gupta R (2017) Compiler techniques to reduce the synchronization overhead of gpu redundant multithreading. In: 2017 54th ACM\/EDAC\/IEEE Design Automation Conference (DAC), pp. 1\u20136 (2017). https:\/\/doi.org\/10.1145\/3061639.3062212","DOI":"10.1145\/3061639.3062212"},{"key":"4026_CR19","doi-asserted-by":"crossref","unstructured":"Hari SKS, Tsai T, Stephenson M, Keckler SW, Emer J (2017) Sassifi: an architecture-level fault injection tool for gpu application resilience evaluation. In: International Symposium on Performance Analysis of Systems and Software (ISPASS), International Symposium on Performance Analysis of Systems and Software (ISPASS)","DOI":"10.1109\/ISPASS.2017.7975296"},{"key":"4026_CR20","doi-asserted-by":"crossref","unstructured":"Hukerikar S, Teranishi K, Diniz PC, Lucas RF (2018) Redthreads: an interface for application-level fault detection\/correction through adaptive redundant multithreading. International Journal of Parallel Programming 46","DOI":"10.1007\/s10766-017-0492-3"},{"key":"4026_CR21","doi-asserted-by":"crossref","unstructured":"Jauk D, Yang D, Schulz M (2019) Predicting faults in high performance computing systems: an in-depth survey of the state-of-the-practice. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, SC \u201919","DOI":"10.1145\/3295500.3356185"},{"key":"4026_CR22","doi-asserted-by":"crossref","unstructured":"Jeon H, Annavaram M (2012) Warped-dmr: light-weight error detection for gpgpu. In: International Symposium on Microarchitecture (MICRO), International Symposium on Microarchitecture (MICRO)","DOI":"10.1109\/MICRO.2012.13"},{"key":"4026_CR23","doi-asserted-by":"publisher","unstructured":"Kalra C, Previlon F, Li X, Rubin N, Kaeli D (2018) Prism: predicting resilience of gpu applications using statistical methods. In: SC18: International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 866\u2013879 . https:\/\/doi.org\/10.1109\/SC.2018.00072","DOI":"10.1109\/SC.2018.00072"},{"key":"4026_CR24","doi-asserted-by":"publisher","unstructured":"Kalra C, Previlon F, Rubin N, Kaeli D (2020) Armorall: compiler-based resilience targeting gpu applications. ACM Trans. Archit. Code Optim. 17(2). https:\/\/doi.org\/10.1145\/3382132","DOI":"10.1145\/3382132"},{"key":"4026_CR25","doi-asserted-by":"publisher","unstructured":"Kirk DB, mei W Hwu W (2017) Chapter 5 - performance considerations. In: D.B. Kirk, W.\u00a0mei W.\u00a0Hwu (eds.) Programming Massively Parallel Processors (Third Edition), third edition edn., pp. 103 \u2013 130. Morgan Kaufmann . https:\/\/doi.org\/10.1016\/B978-0-12-811986-0.00005-4. http:\/\/www.sciencedirect.com\/science\/article\/pii\/B9780128119860000054","DOI":"10.1016\/B978-0-12-811986-0.00005-4"},{"key":"4026_CR26","doi-asserted-by":"crossref","unstructured":"Leng J, Buyuktosunoglu A, Bertran R, Bose P, Reddi VJ (2015) Safe limits on voltage reduction efficiency in gpus: a direct measurement approach. In: Proceedings of the 48th International Symposium on Microarchitecture, MICRO-48, p. 294\u2013307","DOI":"10.1145\/2830772.2830811"},{"key":"4026_CR27","doi-asserted-by":"crossref","unstructured":"Leveugle R, Calvez A, Maistri P, Vanhauwaert P (2009) Statistical fault injection: quantified error and confidence. Proceedings of the Conference on Design, Automation and Test in Europe (DATE)","DOI":"10.1109\/DATE.2009.5090716"},{"key":"4026_CR28","doi-asserted-by":"publisher","unstructured":"Li G, Pattabiraman K, Cher C, Bose P (2016) Understanding error propagation in gpgpu applications. In: SC \u201916: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis, pp. 240\u2013251. https:\/\/doi.org\/10.1109\/SC.2016.20","DOI":"10.1109\/SC.2016.20"},{"key":"4026_CR29","doi-asserted-by":"crossref","unstructured":"Li T, Ambrose JA, Ragel R, Parameswaran S (2016) Processor design for soft errors: challenges and state of the art. ACM Comput. Surv. 49(3)","DOI":"10.1145\/2996357"},{"key":"4026_CR30","doi-asserted-by":"crossref","unstructured":"Mahmoud A, Hari SKS, Sullivan MB, Tsai T, Keckler SW (2018) Optimizing software-directed instruction replication for gpu error detection. International Conference for High Performance Computing, Networking, Storage and Analysis (SC), pp. 842\u2013854","DOI":"10.1109\/SC.2018.00070"},{"issue":"4","key":"4026_CR31","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1109\/2.585157","volume":"30","author":"Mei-Chen Hsueh","year":"1997","unstructured":"Hsueh Mei-Chen, Tsai TK, Iyer RK (1997) Fault injection techniques and tools. Computer 30(4):75\u201382","journal-title":"Computer"},{"key":"4026_CR32","doi-asserted-by":"crossref","unstructured":"Mittal S (2016) A survey of techniques for approximate computing. ACM Comput Surv 48(4)","DOI":"10.1145\/2893356"},{"key":"4026_CR33","volume-title":"Architecture Design for Soft Errors","author":"S Mukherjee","year":"2008","unstructured":"Mukherjee S (2008) Architecture Design for Soft Errors. Morgan Kaufmann Publishers Inc., San Francisco, CA, USA"},{"key":"4026_CR34","doi-asserted-by":"publisher","unstructured":"Nie B, Tiwari D, Gupta S, Smirni E, Rogers JH (2016) A large-scale study of soft-errors on gpus in the field. In: 2016 IEEE International Symposium on High Performance Computer Architecture (HPCA), pp. 519\u2013530. https:\/\/doi.org\/10.1109\/HPCA.2016.7446091","DOI":"10.1109\/HPCA.2016.7446091"},{"issue":"1","key":"4026_CR35","doi-asserted-by":"publisher","first-page":"80","DOI":"10.1111\/j.1467-8659.2007.01012.x","volume":"26","author":"JD Owens","year":"2007","unstructured":"Owens JD, Luebke D, Govindaraju N, Harris M, Kr\u00fcger J, Lefohn AE, Purcell TJ (2007) A survey of general-purpose computation on graphics hardware. Comput Graphics Forum 26(1):80\u2013113","journal-title":"Comput Graphics Forum"},{"key":"4026_CR36","doi-asserted-by":"crossref","unstructured":"Oz I, Topcuoglu HR, Tosun O (2019) A user-assisted thread-level vulnerability assessment tool. Concurrency and Computation: Practice and Experience 31(13)","DOI":"10.1002\/cpe.5085"},{"key":"4026_CR37","doi-asserted-by":"publisher","unstructured":"Palazzi L, Li G, Fang B, Pattabiraman K (2020) Improving the accuracy of ir-level fault injection. IEEE Transactions on Dependable and Secure Computing pp. 1\u20131. https:\/\/doi.org\/10.1109\/TDSC.2020.2980273","DOI":"10.1109\/TDSC.2020.2980273"},{"key":"4026_CR38","unstructured":"Previlon FG, Kalra C, d.\u00a0tiwari, Kaeli DR (2020) Characterizing and exploiting soft error vulnerability phase behavior in gpu applications. IEEE Transactions on Dependable and Secure Computing pp. 1"},{"key":"4026_CR39","doi-asserted-by":"crossref","unstructured":"Quang Anh Pham N, Fan R (2018) Efficient algorithms for graph coloring on gpu. 2018 IEEE 24th International Conference on Parallel and Distributed Systems (ICPADS)","DOI":"10.1109\/PADSW.2018.8644624"},{"key":"4026_CR40","doi-asserted-by":"crossref","unstructured":"Reis GA, Chang J, Vachharajani N, Rangan R, August DI (2005) Swift: software implemented fault tolerance. International Symposium on Code Generation and Optimization","DOI":"10.1145\/1113841.1113843"},{"issue":"4","key":"4026_CR41","doi-asserted-by":"publisher","first-page":"366","DOI":"10.1145\/1113841.1113843","volume":"2","author":"GA Reis","year":"2005","unstructured":"Reis GA, Chang J, Vachharajani N, Rangan R, August DI, Mukherjee SS (2005) Software-controlled fault tolerance. ACM Trans Archit Code Optim 2(4):366\u2013396. https:\/\/doi.org\/10.1145\/1113841.1113843","journal-title":"ACM Trans Archit Code Optim"},{"key":"4026_CR42","doi-asserted-by":"crossref","unstructured":"dos Santos FF, Carro L, Rech P (2019) Kernel and layer vulnerability factor to evaluate object detection reliability in gpus. IET Computers and Digital Techniques 13","DOI":"10.1049\/iet-cdt.2018.5026"},{"key":"4026_CR43","doi-asserted-by":"publisher","unstructured":"Santos FFd, Hari SKS, Basso PM, Carro L, Rech P (2021) Demystifying gpu reliability: comparing and combining beam experiments, fault simulation, and profiling. In: 2021 IEEE International Parallel and Distributed Processing Symposium (IPDPS), pp. 289\u2013298. https:\/\/doi.org\/10.1109\/IPDPS49936.2021.00037","DOI":"10.1109\/IPDPS49936.2021.00037"},{"key":"4026_CR44","unstructured":"Shivakumar P, Kistler M, Keckler SW, Burger D, Alvisi L (2002) Modeling the effect of technology trends on the soft error rate of combinational logic. In: International Conference on Dependable Systems and Networks (DSN), International Conference on Dependable Systems and Networks (DSN)"},{"key":"4026_CR45","doi-asserted-by":"crossref","unstructured":"Tsai T, Hari SKS, Sullivan MB, Villa O, Keckler SW (2021) Nvbitfi: dynamic fault injection for gpus. In: International Conference on Dependable Systems and Networks, (DSN)","DOI":"10.1109\/DSN48987.2021.00041"},{"key":"4026_CR46","doi-asserted-by":"publisher","unstructured":"Villa O, Stephenson M, Nellans D, Keckler SW (2019) Nvbit: a dynamic binary instrumentation framework for nvidia gpus. In: Proceedings of the 52nd Annual IEEE\/ACM International Symposium on Microarchitecture, MICRO \u201952, p. 372\u2013383. Association for Computing Machinery, New York, NY, USA. https:\/\/doi.org\/10.1145\/3352460.3358307","DOI":"10.1145\/3352460.3358307"},{"key":"4026_CR47","doi-asserted-by":"crossref","unstructured":"Wadden J, Lyashevsky A, Gurumurthi S, Sridharan V, Skadron K (2014) Real-world design and evaluation of compiler-managed gpu redundant multithreading. In: International Symposium on Computer Architecture (ISCA), International Symposium on Computer Architecture (ISCA)","DOI":"10.1109\/ISCA.2014.6853227"},{"key":"4026_CR48","unstructured":"Wang J (2017) Acceleration and optimization of dynamic parallelism for irregular applications on gpus. Ph.D. thesis, Georgia Institute of Technology, Atlanta, GA, USA. http:\/\/hdl.handle.net\/1853\/56294"},{"issue":"1","key":"4026_CR49","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1109\/TC.2020.2980541","volume":"70","author":"L Yang","year":"2021","unstructured":"Yang L, Nie B, Jog A, Smirni E (2021) Practical resilience analysis of gpgpu applications in the presence of single- and multi-bit faults. IEEE Transac Comput 70(1):30\u201344. https:\/\/doi.org\/10.1109\/TC.2020.2980541","journal-title":"IEEE Transac Comput"},{"key":"4026_CR50","doi-asserted-by":"publisher","unstructured":"Yang L, Nie B, Jog A, Smirni E (2021) Sugar: speeding up gpgpu application resilience estimation with input sizing. Proc. ACM Meas. Anal. Comput. Syst. 5(1). https:\/\/doi.org\/10.1145\/3447375","DOI":"10.1145\/3447375"},{"key":"4026_CR51","doi-asserted-by":"crossref","unstructured":"Zamani H, Liu Y, Tripathy D, Bhuyan L, Chen Z (2019) Greenmm: energy efficient gpu matrix multiplication through undervolting. In: Proceedings of the ACM International Conference on Supercomputing, ICS \u201919, p. 308\u2013318","DOI":"10.1145\/3330345.3330373"},{"key":"4026_CR52","doi-asserted-by":"crossref","unstructured":"Zamani H, Tripathy D, Bhuyan L, Chen Z (2020) Saou: safe adaptive overclocking and undervolting for energy-efficient gpu computing. In: Proceedings of the ACM\/IEEE International Symposium on Low Power Electronics and Design, ISLPED \u201920, p. 205\u2013210","DOI":"10.1145\/3370748.3406553"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-021-04026-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-021-04026-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-021-04026-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,2,7]],"date-time":"2022-02-07T13:25:58Z","timestamp":1644240358000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-021-04026-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,23]]},"references-count":52,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2022,2]]}},"alternative-id":["4026"],"URL":"https:\/\/doi.org\/10.1007\/s11227-021-04026-6","relation":{},"ISSN":["0920-8542","1573-0484"],"issn-type":[{"type":"print","value":"0920-8542"},{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2021,8,23]]},"assertion":[{"value":"13 August 2021","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 August 2021","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}