{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T12:13:31Z","timestamp":1763468011231},"publisher-location":"Berlin, Heidelberg","reference-count":22,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783642156458"},{"type":"electronic","value":"9783642156465"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2010]]},"DOI":"10.1007\/978-3-642-15646-5_23","type":"book-chapter","created":{"date-parts":[[2010,9,6]],"date-time":"2010-09-06T07:43:52Z","timestamp":1283759032000},"page":"219-228","source":"Crossref","is-referenced-by-count":5,"title":["Checkpoint\/Restart-Enabled Parallel Debugging"],"prefix":"10.1007","author":[{"given":"Joshua","family":"Hursey","sequence":"first","affiliation":[]},{"given":"Chris","family":"January","sequence":"additional","affiliation":[]},{"given":"Mark","family":"O\u2019Connor","sequence":"additional","affiliation":[]},{"given":"Paul H.","family":"Hargrove","sequence":"additional","affiliation":[]},{"given":"David","family":"Lecomber","sequence":"additional","affiliation":[]},{"given":"Jeffrey M.","family":"Squyres","sequence":"additional","affiliation":[]},{"given":"Andrew","family":"Lumsdaine","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"23_CR1","unstructured":"Message Passing Interface Forum: MPI: A Message Passing Interface. In: Proc.\u00a0of Supercomputing 1993, pp. 878\u2013883 (1993)"},{"key":"23_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/3-540-48158-3_7","volume-title":"Recent Advances in Parallel Virtual Machine and Message Passing Interface","author":"J. Cownie","year":"1999","unstructured":"Cownie, J., Gropp, W.: A standard interface for debugger access to message queue information in MPI. In: Margalef, T., Dongarra, J., Luque, E. (eds.) PVM\/MPI 1999. LNCS, vol.\u00a01697, pp. 51\u201358. Springer, Heidelberg (1999)"},{"key":"23_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"115","DOI":"10.1007\/11846802_22","volume-title":"Recent Advances in Parallel Virtual Machine and Message Passing Interface","author":"C.L. Gottbrath","year":"2006","unstructured":"Gottbrath, C.L., Barrett, B., Gropp, B., Lusk, E., Squyres, J.: An interface to support the identification of dynamic MPI 2 processes for scalable parallel debugging. In: Mohr, B., Tr\u00e4ff, J.L., Worringen, J., Dongarra, J. (eds.) PVM\/MPI 2006. LNCS, vol.\u00a04192, pp. 115\u2013122. Springer, Heidelberg (2006)"},{"key":"23_CR4","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1145\/568522.568525","volume":"34","author":"E.N.M. Elnozahy","year":"2002","unstructured":"Elnozahy, E.N.M., Alvisi, L., Wang, Y.M., Johnson, D.B.: A survey of rollback-recovery protocols in message-passing systems. ACM Computing Surveys\u00a034, 375\u2013408 (2002)","journal-title":"ACM Computing Surveys"},{"key":"23_CR5","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1145\/214451.214456","volume":"3","author":"K.M. Chandy","year":"1985","unstructured":"Chandy, K.M., Lamport, L.: Distributed snapshots: determining global states of distributed systems. ACM Transactions on Computer Systems\u00a03, 63\u201375 (1985)","journal-title":"ACM Transactions on Computer Systems"},{"key":"23_CR6","doi-asserted-by":"crossref","unstructured":"Hursey, J., Squyres, J.M., Mattox, T.I., Lumsdaine, A.: The design and implementation of checkpoint\/restart process fault tolerance for Open MPI. In: Proceedings of the IEEE International Parallel and Distributed Processing Symposium (2007)","DOI":"10.1109\/IPDPS.2007.370605"},{"key":"23_CR7","unstructured":"Jung, H., Shin, D., Han, H., Kim, J.W., Yeom, H.Y., Lee, J.: Design and implementation of multiple fault-tolerant MPI over Myrinet (M3). In: Proceedings of the ACM\/IEEE Supercomputing Conference (2005)"},{"key":"23_CR8","unstructured":"Gao, Q., Yu, W., Huang, W., Panda, D.K.: Application-transparent checkpoint\/restart for MPI programs over InfiniBand. In: International Conference on Parallel Processing, pp. 471\u2013478 (2006)"},{"key":"23_CR9","doi-asserted-by":"publisher","first-page":"319","DOI":"10.1177\/1094342006067469","volume":"20","author":"A. Bouteiller","year":"2006","unstructured":"Bouteiller, A., et al.: MPICH-V project: A multiprotocol automatic fault-tolerant MPI. International Journal of High Performance Computing Applications\u00a020, 319\u2013333 (2006)","journal-title":"International Journal of High Performance Computing Applications"},{"key":"23_CR10","doi-asserted-by":"crossref","unstructured":"Duell, J., Hargrove, P., Roman, E.: The design and implementation of Berkeley Lab\u2019s Linux Checkpoint\/Restart. Technical Report LBNL-54941, Lawrence Berkeley National Laboratory (2002)","DOI":"10.2172\/793773"},{"key":"23_CR11","doi-asserted-by":"crossref","unstructured":"Hursey, J., Mattox, T.I., Lumsdaine, A.: Interconnect agnostic checkpoint\/restart in Open MPI. In: Proceedings of the 18th ACM International Symposium on High Performance Distributed Computing, pp. 49\u201358 (2009)","DOI":"10.1145\/1551609.1551619"},{"key":"23_CR12","unstructured":"Curtis, B.: Fifteen years of psychology in software engineering: Individual differences and cognitive science. In: Proceedings of the International Conference on Software Engineering, pp. 97\u2013106 (1984)"},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Feldman, S.I., Brown, C.B.: IGOR: A system for program debugging via reversible execution. In: Proceedings of the ACM SIGPLAN\/SIGOPS workshop on Parallel and Distributed Debugging, pp. 112\u2013123 (1988)","DOI":"10.1145\/68210.69226"},{"key":"23_CR14","doi-asserted-by":"crossref","unstructured":"Wittie, L.: The Bugnet distributed debugging system. In: Proceedings of the 2nd workshop on Making Distributed Systems Work, pp. 1\u20133 (1986)","DOI":"10.1145\/503956.504005"},{"key":"23_CR15","doi-asserted-by":"crossref","unstructured":"Bouteiller, A., Bosilca, G., Dongarra, J.: Retrospect: Deterministic replay of MPI applications for interactive distributed debugging. In: Recent Advances in Parallel Virtual Machine and Message Passing Interface, pp. 297\u2013306 (2007)","DOI":"10.1007\/978-3-540-75416-9_41"},{"key":"23_CR16","unstructured":"Ronsse, M., Bosschere, K.D., de Kergommeaux, J.C.: Execution replay and debugging. In: Proceedings of the Fourth International Workshop on Automated Debugging, Munich, Germany (2000)"},{"key":"23_CR17","unstructured":"King, S.T., Dunlap, G.W., Chen, P.M.: Debugging operating systems with time-traveling virtual machines. In: Proceedings of the USENIX Annual Technical Conference (2005)"},{"key":"23_CR18","doi-asserted-by":"crossref","unstructured":"Pan, D.Z., Linton, M.A.: Supporting reverse execution for parallel programs. In: Proceedings of the ACM SIGPLAN\/SIGOPS workshop on Parallel and Distributed Debugging, pp. 124\u2013129 (1988)","DOI":"10.1145\/68210.69227"},{"issue":"3","key":"23_CR19","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/52.88940","volume":"8","author":"H. Agrawal","year":"1991","unstructured":"Agrawal, H., DeMillo, R.A., Spafford, E.H.: An execution-backtracking approach to debugging. IEEE Software\u00a08(3), 21\u201326 (1991)","journal-title":"IEEE Software"},{"key":"23_CR20","unstructured":"Undo Ltd.: UndoDB - Reversible debugging for Linux (2009)"},{"key":"23_CR21","unstructured":"TotalView Technologies: ReplayEngine (2009)"},{"key":"23_CR22","doi-asserted-by":"publisher","first-page":"123","DOI":"10.1145\/545214.545229","volume":"30","author":"D.J. Sorin","year":"2002","unstructured":"Sorin, D.J., Martin, M.M.K., Hill, M.D., Wood, D.A.: SafetyNet: Improving the availability of shared memory multiprocessors with global checkpoint\/recovery. SIGARCH Computer Architecture News\u00a030, 123\u2013134 (2002)","journal-title":"SIGARCH Computer Architecture News"}],"container-title":["Lecture Notes in Computer Science","Recent Advances in the Message Passing Interface"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-642-15646-5_23.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2020,11,24]],"date-time":"2020-11-24T03:11:03Z","timestamp":1606187463000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-642-15646-5_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2010]]},"ISBN":["9783642156458","9783642156465"],"references-count":22,"URL":"https:\/\/doi.org\/10.1007\/978-3-642-15646-5_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2010]]}}}