{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,20]],"date-time":"2025-02-20T05:19:12Z","timestamp":1740028752034,"version":"3.37.3"},"publisher-location":"Berlin, Heidelberg","reference-count":30,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783540008521"},{"type":"electronic","value":"9783540365693"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2003]]},"DOI":"10.1007\/3-540-36569-9_45","type":"book-chapter","created":{"date-parts":[[2010,3,29]],"date-time":"2010-03-29T21:12:05Z","timestamp":1269897125000},"page":"664-678","source":"Crossref","is-referenced-by-count":1,"title":["ROS: The Rollback-One-Step Method to Minimize the Waiting Time during Debugging Long-Running Parallel Programs"],"prefix":"10.1007","author":[{"given":"Nam","family":"Thoai","sequence":"first","affiliation":[]},{"given":"Dieter","family":"Kranzlm\u00fcller","sequence":"additional","affiliation":[]},{"given":"Jens","family":"Volkert","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2003,4,15]]},"reference":[{"key":"45_CR1","doi-asserted-by":"publisher","first-page":"63","DOI":"10.1145\/214451.214456","volume":"3","author":"K. M. Chandy","year":"1985","unstructured":"Chandy, K. M., and Lamport, L. \u201cDistributed Snapshots: Determining Global States of Distributed Systems\u201d, ACM Transactions on Computer Systems 3 (1985), pp. 63\u201375.","journal-title":"ACM Transactions on Computer Systems"},{"key":"45_CR2","unstructured":"Cunha, J. C., and Lourenco, J. \u201cAn Integrated Testing and Debugging Environment for Parallel and Distributed Programs\u201d, Proc. of the 23rd EUROMICRO Conference, IEEE Computer Society Budapest, Hungary (1997), pp. 291\u2013298."},{"key":"45_CR3","first-page":"547","volume":"19","author":"C. R. Dow","year":"2000","unstructured":"Dow, C. R., and Lin, C. M. \u201cAdaptive Distributed Breakpoint Detection and Checkpoint Space Reduction in Message Massing Programs\u201d, Computers and Artificial Intelligence (2000), Vol. 19, pp. 547\u2013568.","journal-title":"Computers and Artificial Intelligence"},{"key":"45_CR4","unstructured":"Elnozahy, E. N., Johnson, D. B., and Wang, Y. M. \u201cA Survey of Rollback-Recovery Protocols in Message-Passing Systems\u201d, Technical Report CMU-CS, Carnegie Mellon University, (October 1996), pp. 96\u2013181."},{"key":"45_CR5","doi-asserted-by":"crossref","unstructured":"Feldman, S.I., Brown, Ch. B. \u201cIgor: A System for Program Debugging via Reversible Execution\u201d, Proc. of the ACM SIGPLAN and SIGOPS Workshop on Parallel and Distributed Debugging (May 5\u20136, 1988), University of Wisconsin, Madison, Wisconsin, USA, SIGPLAN Notices (January 1989), Vol. 24, No. 1, pp. 112-123.","DOI":"10.1145\/69215.69226"},{"key":"45_CR6","doi-asserted-by":"crossref","unstructured":"Fowler, J., and Zwaenepoel, W. \u201cCausal Distributed Breakpoints\u201d, Proc. of the 10th International Conference on Distributed Computing Systems (ICDCS) (1990), pp. 134\u2013141.","DOI":"10.1109\/ICDCS.1990.89277"},{"key":"45_CR7","doi-asserted-by":"crossref","unstructured":"Garcia, I. C., and Buzato. L. E. \u201cProgressive Construction of Consistent Global Checkpoints\u201d, In 19th IEEE International Conference on Distributed Computing Systems (ICDCS\u201999), Austin, Texas, EUA (June 1999).","DOI":"10.1109\/ICDCS.1999.776506"},{"key":"45_CR8","doi-asserted-by":"crossref","unstructured":"Haban, D., and Weigel, W. \u201cGlobal Events and Global Breakpoints in Distributed Systems\u201d, Proc. of the 21st Annual Hawaii International Conference on System Sciences, Software Track, IEEE Computer Society (January 1988), Vol. 2, pp. 166\u2013175.","DOI":"10.1109\/HICSS.1988.11802"},{"key":"45_CR9","doi-asserted-by":"crossref","unstructured":"H\u00e9lary, J. M., Mostefaoui, A., and Raynal., M. \u201cCommunication-Induced Determination of Consistent Snapshot\u201d, IEEE Transaction on Parallel and Distributed Systems (September 1999), Vol. 10, No. 9.","DOI":"10.1109\/71.798312"},{"key":"45_CR10","doi-asserted-by":"crossref","unstructured":"Kacsuk, P., \u201cSystematic Macrostep Debugging of Message Passing Parallel Programs\u201d, In: Kacsuk, P., Kotsis, G., \u201cDistributed and Parallel Systems (DAPSYS\u201998)\u201d, Future Generation Computer Systems, North-Holland (April 2000), Vol. 16, No. 6, pp. 597\u2013607.","DOI":"10.1016\/S0167-739X(99)00074-6"},{"key":"45_CR11","unstructured":"Kranzlm\u00fcller, D. \u201cEvent Graph Analysis for Debugging Massively Parallel Programs\u201d, PhD Thesis, GUP Linz, Johannes Kepler University Linz, Austria (September 2000), http:\/\/www.gup.uni-linz.ac.at\/~dk\/thesis\/thesis.php ."},{"issue":"7","key":"45_CR12","doi-asserted-by":"publisher","first-page":"558","DOI":"10.1145\/359545.359563","volume":"21","author":"L. Lamport","year":"1978","unstructured":"Lamport, L. \u201cTime, Clocks, and the Ordering of Events in a Distributed System\u201d, Communications of the ACM (July 1978), Vol. 21, No. 7, pp. 558\u2013565.","journal-title":"Communications of the ACM"},{"key":"45_CR13","doi-asserted-by":"crossref","unstructured":"Manivannan, D. and Singhal, M. \u201cA Low Overhead Recovery Technique Using Quasi-Synchronous Checkpointing\u201d, Proc. 16th IEEE International Conference on Distributed Computing Systems, Hong-Kong (1996), pp. 100\u2013107.","DOI":"10.1109\/ICDCS.1996.507906"},{"issue":"4","key":"45_CR14","doi-asserted-by":"publisher","first-page":"32","DOI":"10.1109\/88.260289","volume":"1","author":"R. Netzer","year":"1993","unstructured":"Netzer, R. H. B., and Xu, J. \u201cAdaptive Message Logging for Incremental Program Replay\u201d, IEEE Parallel & Distributed Technology (November 1993), Vol. 1, No. 4, pp. 32\u201340.","journal-title":"IEEE Parallel & Distributed Technology"},{"key":"45_CR15","doi-asserted-by":"crossref","unstructured":"Netzer, R. H. B., Subramanian, S., and Xu, J. \u201cCritical-Path-Based Message Logging for Incremental Replay of Message-Passing Programs\u201d, In 14th International Conference on Distributed Computing Systems, Poznan, Poland (June 1994).","DOI":"10.1109\/ICDCS.1994.302444"},{"key":"45_CR16","unstructured":"Netzer, R. H. B., and Xu, J. \u201cSender-Based Message Logging for Reducing Rollback Propagation\u201d, Proc. of the 7th IEEE Symposium on Parallel and Distributed Processing (SPDP\u2019 95)."},{"key":"45_CR17","doi-asserted-by":"crossref","unstructured":"Pan, D.Z., and Linton, M.A. \u201cSupporting Reverse Execution of Parallel Programs\u201d, Proc. of the ACM SIGPLAN and SIGOPS Workshop on Parallel and Distributed Debugging (May 5\u20136, 1988), University of Wisconsin, Madison, Wisconsin, USA, SIGPLAN Notices (January 1989), Vol. 24, No. 1, pp. 124\u2013129.","DOI":"10.1145\/69215.69227"},{"key":"45_CR18","unstructured":"Plank, J. S. \u201cAn Overview of Checkpointing in Uniprocessor and Distributed Systems, Focusing on Implementation and Performance\u201d, Technical Report of University of Tennessee, UT-CS-97-372 (July 1997)."},{"issue":"2","key":"45_CR19","first-page":"221","volume":"1","author":"B. Randel","year":"1975","unstructured":"Randel, B. \u201cSystem Structure for Software Fault Tolerance\u201d, IEEE Transactions on Software Engineering TSE (June 1975), Vol. 1, No. 2, pp. 221\u2013232.","journal-title":"IEEE Transactions on Software Engineering TSE"},{"key":"45_CR20","unstructured":"Raynal, M., and Singhal, M. \u201cLogical Time: A Way to Capture Causality in Distributed Systems\u201d, IRISA (January 1995)."},{"key":"45_CR21","series-title":"Lect Notes Comput Sci","doi-asserted-by":"crossref","first-page":"355","DOI":"10.1007\/BFb0020445","volume-title":"Cheaper Matrix Clocks","author":"F. Ruget","year":"1994","unstructured":"Ruget, F. \u201cCheaper Matrix Clocks\u201d, Proc. of the 8th International Workshop on Distributed Algorithms, Springer-Verlag LNCS 857 (G. Tel and P. Vityani Eds) (1994), pp. 355\u2013369."},{"key":"45_CR22","doi-asserted-by":"crossref","unstructured":"Wang, Y. M., and Fuchs, W. K. \u201cOptimistic Message Logging for Independent Checkpointing in Message Passing Systems\u201d, Proc. of the 11th Symposium on Reliable Distributed Systems, (October 1992), pp. 147\u2013154.","DOI":"10.1109\/RELDIS.1992.235132"},{"key":"45_CR23","unstructured":"Wang, Y. M., and Fuchs, W. K. \u201cLazy Checkpoint Coordination for Bounding Rollback Propagation\u201d, Proc. of the 12th Symposium on Reliable Distributed Systems (1993), pp. 78\u201385."},{"key":"45_CR24","doi-asserted-by":"crossref","unstructured":"Wang, Y. M. \u201cThe Maximum and Minimum Consistent Global Checkpoints and Their Applications\u201d, Proc. IEEE Symposium Reliable Distributed Systems (September 1995), pp. 86\u201395.","DOI":"10.1109\/RELDIS.1995.526216"},{"key":"45_CR25","doi-asserted-by":"crossref","unstructured":"Wang, Y. M., and Fuchs, W. K. \u201cOptimal Message Log Reclamation for Uncoordinated Checkpointing\u201d, Fault-Tolerant Parallel and Distributed Systems, IEEE Computer Society Press (1995), pp. 24\u201329.","DOI":"10.1109\/FTPDS.1994.494470"},{"issue":"4","key":"45_CR26","doi-asserted-by":"publisher","first-page":"456","DOI":"10.1109\/12.588059","volume":"46","author":"Y. M. Wang","year":"1997","unstructured":"Wang, Y. M. \u201cConsistent Global Checkpoints That Contains a Set of Local Checkpoints\u201d, IEEE Transactions on Computers (1997), Vol. 46, No. 4, pp. 456\u2013468.","journal-title":"IEEE Transactions on Computers"},{"key":"45_CR27","unstructured":"Yang, Z., and Marsland, T. \u201cGlobal Snapshots for Distributed Debugging\u201d, Technical Report TR 92-03, Laboratory for Distributed and Parallel Computing, Computing Science Department, University of Alberta, Edmonton, Canada T6G 2H1 (1992)."},{"key":"45_CR28","doi-asserted-by":"crossref","unstructured":"Zambonelli, F. \u201cOn the Effectiveness of Distributed Checkpoint Algorithms for Domino-Free Recovery\u201d, In 7th IEEE Symposium on High-Performance Distributed Computing (July 1998).","DOI":"10.1109\/HPDC.1998.709964"},{"key":"45_CR29","doi-asserted-by":"crossref","unstructured":"Zambonelli, F., and Netzer, R. H. B. \u201cAn Efficient Logging Algorithm for Incremental Replay of Message-Passing Applications\u201d, Proc. of the 13th International Parallel Processing Symposium and 10th Symposium on Parallel and Distributed Processing (1999).","DOI":"10.1109\/IPPS.1999.760506"},{"key":"45_CR30","doi-asserted-by":"publisher","first-page":"667","DOI":"10.1006\/jpdc.2001.1703","volume":"61","author":"F. Zambonelli","year":"2001","unstructured":"Zambonelli, F., and Netzer, R. H. B. \u201cDeadlock-Free Incremental Replay of Message-Passing Programs\u201d, Journal of Parallel and Distributed Computing 61 (2001), pp. 667\u2013678.","journal-title":"Journal of Parallel and Distributed Computing"}],"container-title":["Lecture Notes in Computer Science","High Performance Computing for Computational Science \u2014 VECPAR 2002"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/3-540-36569-9_45","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,19]],"date-time":"2025-02-19T19:12:25Z","timestamp":1739992345000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/3-540-36569-9_45"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2003]]},"ISBN":["9783540008521","9783540365693"],"references-count":30,"URL":"https:\/\/doi.org\/10.1007\/3-540-36569-9_45","relation":{},"ISSN":["0302-9743"],"issn-type":[{"type":"print","value":"0302-9743"}],"subject":[],"published":{"date-parts":[[2003]]}}}