{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,21]],"date-time":"2026-02-21T18:53:37Z","timestamp":1771700017850,"version":"3.50.1"},"reference-count":52,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,12,1]],"date-time":"2022-12-01T00:00:00Z","timestamp":1669852800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2017YFB0202001"],"award-info":[{"award-number":["2017YFB0202001"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2022,12,1]]},"DOI":"10.1109\/tpds.2022.3157690","type":"journal-article","created":{"date-parts":[[2022,3,8]],"date-time":"2022-03-08T21:20:13Z","timestamp":1646774413000},"page":"3491-3504","source":"Crossref","is-referenced-by-count":2,"title":["Jdebug: A Fast, Non-Intrusive and Scalable Fault Locating Tool for Ten-Million-Scale Parallel Applications"],"prefix":"10.1109","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6805-975X","authenticated-orcid":false,"given":"Dajia","family":"Peng","sequence":"first","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1151-5179","authenticated-orcid":false,"given":"Yunlong","family":"Feng","sequence":"additional","affiliation":[{"name":"National Supercomputer Center in Wuxi, Wuxi, China"}]},{"given":"Yong","family":"Liu","sequence":"additional","affiliation":[{"name":"National Research Centre of Parallel Computer Engineering and Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7870-6535","authenticated-orcid":false,"given":"Xin","family":"Liu","sequence":"additional","affiliation":[{"name":"National Research Centre of Parallel Computer Engineering and Technology, Beijing, China"}]},{"given":"Wei","family":"Xue","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}]},{"given":"Dexun","family":"Chen","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Tsinghua University, Beijing, China"}]},{"given":"Jiawei","family":"Song","sequence":"additional","affiliation":[{"name":"National Supercomputer Center in Wuxi, Wuxi, China"}]},{"given":"Zuoning","family":"Chen","sequence":"additional","affiliation":[{"name":"National Research Centre of Parallel Computer Engineering and Technology, Beijing, China"}]}],"member":"263","reference":[{"key":"ref39","first-page":"19","article-title":"WiDS checker: Combating bugs in distributed systems","author":"liu","year":"0","journal-title":"Proc 4th USENIX Conf Networked Syst Des Implementation USENIX Assoc"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/32.908957"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/DSN.2010.5544927"},{"key":"ref32","article-title":"Largest particle simulations downgrade the runaway electron risk for iter","author":"liu","year":"2016"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2018.00043"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2016.4"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CCGrid.2011.44"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/581376.581377"},{"key":"ref35","first-page":"5","article-title":"ABHRANTA: Locating bugs that manifest at large system scales","author":"zhou","year":"0","journal-title":"Proc 8th USENIX Conf Hot Topics Syst Dependability USENIX Assoc"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/2063384.2063451"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3126908.3126938"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/2666356.2594336"},{"key":"ref29","article-title":"The landscape of parallel computing research: A view from berkeley","author":"asanovic","year":"2006"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2007.370254"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/2667219"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2015.15"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-016-5588-7"},{"key":"ref21","first-page":"1","article-title":"Lessons learned at 208 k: Towards debugging millions of cores","author":"lee","year":"0","journal-title":"Proc ACM\/IEEE Conf Supercomputing"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3332466.3374504"},{"key":"ref23","article-title":"ScanWorks platform for embedded instruments","year":"0"},{"key":"ref26","article-title":"Nas parallel benchmarks, multi-zone versions","volume":"2003","author":"wijngaart","year":"2003","journal-title":"Supercomputing"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/235968.233324"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1145\/1542275.1542319"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1177\/1094342007077860"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/1654059.1654104"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ISPDC.2016.53"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/2872362.2872374"},{"key":"ref40","first-page":"423","article-title":"D3S: Debugging deployed distributed systems","author":"liu","year":"0","journal-title":"Proc 5th USENIX Symp Networked Syst Des Implementation USENIX Assoc"},{"key":"ref12","first-page":"425","article-title":"A classification of software faults","volume":"27","author":"grottke","year":"2005","journal-title":"J Rel Eng Assoc Jpn"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1177\/1094342009347767"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2882394"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/1869459.1869481"},{"key":"ref16","first-page":"269","article-title":"AccMon: Automatically detecting memory-related bugs via program counter-based invariants","author":"zhou","year":"0","journal-title":"Proc IEEE 37th Int Symp Microarchit"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/1362622.1362643"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/1806799.1806838"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2010.27"},{"key":"ref4","first-page":"131","article-title":"Wukong: Automatically detecting and localizing bugs that manifest at large system scales","author":"zhou","year":"0","journal-title":"Proc 22nd Int Symp High- Perform Parallel Distrib Comput"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/1996130.1996143"},{"key":"ref6","article-title":"Petascale Debugging","author":"january","year":"2010"},{"key":"ref5","article-title":"GDB: The GNU project debugger","year":"0"},{"key":"ref8","article-title":"Automation assisted debugging on the cray with TotalView","author":"gottbrath","year":"0","journal-title":"Proc Cray User Group"},{"key":"ref7","article-title":"Arm limited, &#x201C;ARM DDT","year":"0"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-11261-4_5"},{"key":"ref9","article-title":"TotalView HPC debugging software","year":"0"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1145\/1145319.1145342"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.705"},{"key":"ref48","doi-asserted-by":"crossref","first-page":"493","DOI":"10.1016\/S0927-5452(04)80063-7","article-title":"MARMOT: An MPI analysis and checking tool","volume":"13","author":"krammer","year":"2003","journal-title":"Advances in Parallel Computing"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2000.10055"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2013.86"},{"key":"ref41","article-title":"Support for debugging automatically parallelized programs","author":"hood","year":"2000"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/1851476.1851491"},{"key":"ref43","first-page":"115","article-title":"Automatic relative debugging of OpenMP programs","author":"matthews","year":"2003","journal-title":"Proc EWOMP"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/71\/9790018\/09730097.pdf?arnumber=9730097","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,18]],"date-time":"2022-07-18T20:25:48Z","timestamp":1658175948000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9730097\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,12,1]]},"references-count":52,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2022.3157690","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,12,1]]}}}