{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,30]],"date-time":"2025-10-30T06:46:12Z","timestamp":1761806772628,"version":"3.30.1"},"reference-count":42,"publisher":"Elsevier BV","issue":"5-6","license":[{"start":{"date-parts":[[1999,10,1]],"date-time":"1999-10-01T00:00:00Z","timestamp":938736000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["Future Generation Computer Systems"],"published-print":{"date-parts":[[1999,10]]},"DOI":"10.1016\/s0167-739x(99)00024-2","type":"journal-article","created":{"date-parts":[[2002,7,26]],"date-time":"2002-07-26T02:32:32Z","timestamp":1027650752000},"page":"745-755","source":"Crossref","is-referenced-by-count":15,"title":["Deploying fault tolerance and taks migration with NetSolve"],"prefix":"10.1016","volume":"15","author":[{"given":"James S.","family":"Plank","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Henri","family":"Casanova","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Micah","family":"Beck","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jack J.","family":"Dongarra","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/S0167-739X(99)00024-2_BIB1","unstructured":"The Math Works, matlab Reference Guide, 1992."},{"key":"10.1016\/S0167-739X(99)00024-2_BIB2","unstructured":"S. Wolfram, The mathematica Book, 3rd ed., Wolfram Median, Inc. and Cambridge University Press, Cambridge, 1996."},{"key":"10.1016\/S0167-739X(99)00024-2_BIB3","unstructured":"E. Anderson, Z. Bai, C. Bischof, J. Demmel, J. Dongarra, J. Du Croz, A. Greenbaum, S. Hammarling, A. McKenney, S. Ostrouchov, D. Sorensen, lapack Users\u2019 Guide, 2nd ed., SIAM, Philadelphia. PA, 1995."},{"key":"10.1016\/S0167-739X(99)00024-2_BIB4","doi-asserted-by":"crossref","unstructured":"L.S. Blackford, J. Choi, A. Cleary, E. D\u2019Azevedo, J. Demmel, I. Dhillon, J. Dongarra, S. Hammarling, G. Henry, A. Petitet, K. Stanley, D. Walker, R.C. Whaley, ScaLAPACK Users\u2019 Guide, SIAM, Philadelphia. PA, 1997.","DOI":"10.1137\/1.9780898719642"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB5","doi-asserted-by":"crossref","unstructured":"M. Litzkow, M. Livny, Experience with the condor distributed batch system, in: Proc. IEEE Workshop on Experimental Distributed Systems, IEEE Computer Society Press, 1990, pp. 97\u2013101.","DOI":"10.1109\/EDS.1990.138057"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB6","unstructured":"H. Casanova, J. Dongarra, NetSolve\u2019s network enabled server: examples and applications, IEEE Computational Sci. Eng., submitted for publication."},{"key":"10.1016\/S0167-739X(99)00024-2_BIB7","unstructured":"J. Casas, D.L. Clark, P.S. Galbiati, R. Konuru, S.W. Otto, R.M. Prouty, J. Walpole, MIST: PVM with transparent migration and checkpointing, in: 3rd Ann. PVM Users\u2019 Group Meeting, Pittsburgh, PA, May 1995."},{"key":"10.1016\/S0167-739X(99)00024-2_BIB8","doi-asserted-by":"crossref","unstructured":"Y. Chen, J.S. Plank, K. Li, CLIP: a checkpointing tool for message-passing parallel programs, in: SC \u201897: High Performance Networking and Computing, San Jose, November 1997.","DOI":"10.1145\/509593.509626"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB9","doi-asserted-by":"crossref","unstructured":"G. Stellner, CoCheck: checkpointing, process migration for MPI, in: 10th Int. Parallel Processing Symp., April 1996, IEEE Computer Society, pp. 526\u2013531.","DOI":"10.1109\/IPPS.1996.508106"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB10","doi-asserted-by":"crossref","unstructured":"V.K. Naik, S.P. Midkiff, J.E. Moreira, A checkpointing strategy for scalable recovery on distributed Parallel systems, in: SC \u201897: High Performance Networking and Computing, San Jose, November 1997.","DOI":"10.1145\/509593.509625"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB11","doi-asserted-by":"crossref","unstructured":"L.M. Silva, J.G. Silva, S. Chapple, L. Clarke, Portable checkpointing and recovery, in: Proc. HPDC-4, High-performance distributed computing, Washington. DC, August 1995, pp. 188\u2013195.","DOI":"10.1109\/HPDC.1995.518709"},{"issue":"2","key":"10.1016\/S0167-739X(99)00024-2_BIB12","doi-asserted-by":"crossref","first-page":"302","DOI":"10.1109\/12.364541","article-title":"Floating point fault tolerance with backward error assertions","volume":"44","author":"Boley","year":"1995","journal-title":"IEEE Trans. Computers 44"},{"issue":"6","key":"10.1016\/S0167-739X(99)00024-2_BIB13","doi-asserted-by":"crossref","first-page":"518","DOI":"10.1109\/TC.1984.1676475","article-title":"Algorithm-based fault tolerance for matrix operations","volume":"C-33","author":"Huang","year":"1984","journal-title":"IEEE Trans. Computers"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB14","doi-asserted-by":"crossref","first-page":"125","DOI":"10.1006\/jpdc.1997.1336","article-title":"Fault tolerant matrix operations for networks of workstations using diskless checkpointing","volume":"43","author":"Plank","year":"1997","journal-title":"J. Parallel and Distributed Computing"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB15","unstructured":"I. Foster, C. Kesselman, C. Lee, G. von Laszewski, P. Stelling, A fault detection service for wide area distributed computations, in: Proc. High Performance Distributed Computing Conf., submitted for publication."},{"key":"10.1016\/S0167-739X(99)00024-2_BIB16","doi-asserted-by":"crossref","unstructured":"R. Wolski, Dynamically forecasting network performance to support dynamic scheduling using the network weather service, in: 6th High-Performance Distributed Computing Conf., August 1997, pp. 316\u2013325.","DOI":"10.1109\/HPDC.1997.626437"},{"issue":"8","key":"10.1016\/S0167-739X(99)00024-2_BIB17","doi-asserted-by":"crossref","first-page":"942","DOI":"10.1109\/12.609281","article-title":"Impact of checkpoint latency on overhead ratio of a checkpointing scheme","volume":"46","author":"Vaidya","year":"1997","journal-title":"IEEE Trans. Computers"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB18","unstructured":"Y. Kim, J.S. Plank, J. Dongarra, Fault tolerant matrix operations using checksum and reverse computation, in: 6th Symp. on the frontiers of massively parallel computation, October 1996, 70\u201377."},{"issue":"2","key":"10.1016\/S0167-739X(99)00024-2_BIB19","doi-asserted-by":"crossref","first-page":"18","DOI":"10.1109\/2.485843","article-title":"TreadMarks: shared memory computing on networks of workstations","volume":"29","author":"Amza","year":"1996","journal-title":"IEEE Computer"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB20","doi-asserted-by":"crossref","unstructured":"K.L. Johnson, M.F. Kaashoek, D.A. Wallach, CRL: High-performance all-software distributed shared memory, in: 15th Symp. on Operating Systems Principles, ACM, New York, December 1995, pp. 213\u2013228.","DOI":"10.1145\/224056.224073"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB21","doi-asserted-by":"crossref","unstructured":"G. Cabillic, G. Muller, I. Puaut, The performance of consistent checkpointing in distributed shared memory systems, in: Proc. 14th Symposium on Reliable Distributed Systems, Sep. 1995, pp. 96\u2013105.","DOI":"10.1109\/RELDIS.1995.526217"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB22","doi-asserted-by":"crossref","unstructured":"N. Neves, M. Castro, P. Guedes, A checkpoint protocol for an entry consistent shared memory system, in: 13th ACM Symp. on Principles of Distributed Computing, Los Angeles, CA, August 1994, pp. 121\u2013129.","DOI":"10.1145\/197917.197973"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB23","doi-asserted-by":"crossref","unstructured":"G. Janakiraman, Y. Tamir, Coordinated checkpointing\u2013rollback error recovery for distributed shared memory multicomputers, in: 13th Symp. on Reliable Distributed Systems, October 1994, pp. 42\u201351.","DOI":"10.1109\/RELDIS.1994.336910"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB24","doi-asserted-by":"crossref","unstructured":"G. Suri, B. Janssens, W.K. Fuchs, Reduced overhead logging for rollback recovery in distributed shared memory, in: 24th Int. Symp. on Fault-Tolerant Computing, June 1994, pp. 279\u2013288.","DOI":"10.1109\/FTCS.1995.466971"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB25","doi-asserted-by":"crossref","first-page":"147","DOI":"10.1006\/jpdc.1997.1338","article-title":"Application level fault tolerance in heterogeneous networks of workstations","volume":"43","author":"Beguelin","year":"1997","journal-title":"J. Parallel and Distributed Computing"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB26","unstructured":"D.J. Scales, M.S. Lam, Transparent fault tolerance for parallel applications on networks of workstations, in: Usenix 1996 Technical Conf. on UNIX and Advanced Computing Systems, San Diego, January 1996."},{"issue":"3","key":"10.1016\/S0167-739X(99)00024-2_BIB27","doi-asserted-by":"crossref","first-page":"287","DOI":"10.1109\/71.372777","article-title":"Supporting fault-tolerant parallel programming in Linda","volume":"6","author":"Bakken","year":"1995","journal-title":"IEEE Trans. Parallel and Distributed Systems"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB28","unstructured":"L.M. Silva, B. Veer, J.G. Silva, Checkpointing SPMD applications on transputer networks, in: Scalable High Performance Computing Conf., Knoxville, TN, May 1994, pp. 694\u2013701."},{"key":"10.1016\/S0167-739X(99)00024-2_BIB29","doi-asserted-by":"crossref","unstructured":"A. Baratloo, P. Dasgupta, Z.M. Kedem, Calypso: a novel software system for fault-tolerant parallel processing on distributed platforms, in: 4th IEEE Int. Symp. on High Performance Distributed Computing, August 1995, pp. 122\u2013129.","DOI":"10.1109\/HPDC.1995.518702"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB30","doi-asserted-by":"crossref","unstructured":"D. Cummings, L. Alkalaj, Checkpoint\/Rollback in a distributed system using coarse-grained dataflow, in: 24th Int. Symp. on Fault-Tolerant Computing, Austin, TX, June 1994, pp. 424\u2013433.","DOI":"10.1109\/FTCS.1994.315619"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB31","doi-asserted-by":"crossref","unstructured":"M.J. Feeley, W.E. Morgan, F.H. Pighin, A.R. Karlin, H.M. Levy, Implementing global memory management in a workstation cluster, in: 15th Symp. on Operating Systems Principles, ACM, New York, December 1995, pp. 201\u2013212.","DOI":"10.1145\/224056.224072"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB32","doi-asserted-by":"crossref","unstructured":"B. Ramkumar, V. Strumpen, Portable checkpointing and recovery in heterogeneous environments, in: 27th Int. Symp. on Fault-Tolerant Computing, June 1997, pp. 58\u201367.","DOI":"10.1109\/FTCS.1997.614078"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB33","doi-asserted-by":"crossref","unstructured":"B. Steensgaard, E. Jul, Object and native code thread mobility among heterogeneous computers, in: 15th Symp. on Operating Systems Principles, ACM, New York, December 1995, pp. 68\u201378.","DOI":"10.1145\/224057.224063"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB34","doi-asserted-by":"crossref","unstructured":"P.E. Chung, Y. Huang, S. Yajnik, G. Fowler, K.P. Vo, Y.M. Wang, Checkpointing in CosMiC: a user-level process migration environment, in: Pacific Rim Int. Symp. on Fault-Tolerant Systems, December 1997.","DOI":"10.1109\/PRFTS.1997.640146"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB35","doi-asserted-by":"crossref","unstructured":"M.W. Mutka, M. Livny, The available capacity of a privately owned workstation environment, Perfomance Evaluation, July 1991, Vol. 12, pp. 269\u2013284.","DOI":"10.1016\/0166-5316(91)90005-N"},{"issue":"5","key":"10.1016\/S0167-739X(99)00024-2_BIB36","doi-asserted-by":"crossref","first-page":"5","DOI":"10.1145\/37499.37502","article-title":"Using idle workstations in a shared computing environment","volume":"21","author":"Nichols","year":"1987","journal-title":"Operating Systems Review: Proceedings of SOSP-11"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB37","doi-asserted-by":"crossref","unstructured":"J. Pruyne, M. Livny, Parallel processing on dynamic resources with CARMI, in: First IPPS Workshop on Job Scheduling Strategies for Parallel Processing, April 1995.","DOI":"10.1007\/3-540-60153-8_33"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB38","doi-asserted-by":"crossref","unstructured":"I. Foster, K. Kesselman, Globus: a metacomputing infrastructure toolkit, in: Proc. Workshop on Environments and Tools, SIAM, Philadelphia. PA, submitted for publication.","DOI":"10.1177\/109434209701100205"},{"key":"10.1016\/S0167-739X(99)00024-2_BIB39","unstructured":"R. Orfali, D. Harkey, Client\/Server Programming with Java and CORBA, Wiley, New York, 1997."},{"key":"10.1016\/S0167-739X(99)00024-2_BIB40","unstructured":"S. Sekiguchi, M. Sato, H. Nakada, S. Matsuoka, U. Nagashima, Ninf: network based information library for globally high performance computing, in: Proc. Parallel Object-Oriented Methods and Applications (POOMA), Santa Fe, 1996, pp. 39\u201348."},{"key":"10.1016\/S0167-739X(99)00024-2_BIB41","unstructured":"J. Czyzyk, M. Mesnier, J. Mor\u00e9, NEOS: the network-enabled optimization system. Technical Report MCS-P615-1096, Mathematics and Computer Science Division, Argonne National Laboratory, 1996."},{"key":"10.1016\/S0167-739X(99)00024-2_BIB42","unstructured":"A. Grimshaw, W. Wulf, J. French, A. Weaver, P. Reynolds Jr., A Synopsis of the Legion Project. Technical Report CS-94-20, Department of Computer Science, University of Virginia, 1994."}],"container-title":["Future Generation Computer Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167739X99000242?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0167739X99000242?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2024,12,6]],"date-time":"2024-12-06T14:56:01Z","timestamp":1733496961000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167739X99000242"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[1999,10]]},"references-count":42,"journal-issue":{"issue":"5-6","published-print":{"date-parts":[[1999,10]]}},"alternative-id":["S0167739X99000242"],"URL":"https:\/\/doi.org\/10.1016\/s0167-739x(99)00024-2","relation":{},"ISSN":["0167-739X"],"issn-type":[{"type":"print","value":"0167-739X"}],"subject":[],"published":{"date-parts":[[1999,10]]}}}