{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,15]],"date-time":"2025-11-15T10:19:07Z","timestamp":1763201947863,"version":"3.41.0"},"reference-count":33,"publisher":"Oxford University Press (OUP)","issue":"8","funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61572215 and 61332001"],"award-info":[{"award-number":["61572215 and 61332001"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Foundation","doi-asserted-by":"publisher","award":["2013M531696"],"award-info":[{"award-number":["2013M531696"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002858","name":"China Postdoctoral Science Special Foundation","doi-asserted-by":"crossref","award":["2015T80802"],"award-info":[{"award-number":["2015T80802"]}],"id":[{"id":"10.13039\/501100002858","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["The Computer Journal"],"published-print":{"date-parts":[[2016,8]]},"DOI":"10.1093\/comjnl\/bxv125","type":"journal-article","created":{"date-parts":[[2016,1,15]],"date-time":"2016-01-15T12:48:14Z","timestamp":1452862094000},"page":"1155-1173","source":"Crossref","is-referenced-by-count":2,"title":["DO<sub>cyclical<\/sub>: A Latency-Resistant Cyclic Multi-Threading Approach for Automatic Program Parallelization"],"prefix":"10.1093","volume":"59","author":[{"given":"Hairong","family":"Yu","sequence":"first","affiliation":[]},{"given":"Guohui","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jianjun","family":"Li","sequence":"additional","affiliation":[]},{"given":"Lihchyun","family":"Shu","sequence":"additional","affiliation":[]}],"member":"286","published-online":{"date-parts":[[2016,1,14]]},"reference":[{"key":"2016083005472012000_59.8.1155.1","doi-asserted-by":"crossref","unstructured":"Olukotun K. , Hammond L. and Laudon J. (2007) Chip Multiprocessor Architecture: Techniques to Improve Throughput and Latency. Morgan & Claypool Press, Madison.","DOI":"10.2200\/S00093ED1V01Y200707CAC003"},{"key":"2016083005472012000_59.8.1155.2","doi-asserted-by":"crossref","unstructured":"Ottoni G. and August D.I. (2007) Global Multi-Threaded Instruction Scheduling. Proc. MICRO07, Chicago, IL, December 1\u20135, pp. 56\u201368. IEEE, Washington.","DOI":"10.1109\/MICRO.2007.32"},{"key":"2016083005472012000_59.8.1155.3","unstructured":"Vachharajani N.A. (2008) Intelligent Speculation for Pipelined Multithreading. Princeton University, Princeton, NJ."},{"key":"2016083005472012000_59.8.1155.4","unstructured":"Bridges M.J. (2008) The Velocity Compiler: Extracting Efficient Multicore Execution from Legacy Sequential Codes. Princeton University, Princeton, NJ."},{"key":"2016083005472012000_59.8.1155.5","unstructured":"Allen R. and Kennedy K. (2002) Optimizing Compilers for Modern Architectures. Morgan Kaufmann, San Francisco."},{"key":"2016083005472012000_59.8.1155.6","doi-asserted-by":"crossref","unstructured":"Ottoni G. , Rangan R. , Stoler A. and August D.I. (2005) Automatic Thread Extraction with Decoupled Software Pipelining. Proc. MICRO05, Barcelona, ES, November 12\u201316, pp. 105\u2013116. IEEE, Washington.","DOI":"10.1109\/MICRO.2005.13"},{"key":"2016083005472012000_59.8.1155.7","doi-asserted-by":"crossref","unstructured":"Raman E. , Ottoni G. , Raman A. , Bridges M.J. and August D.I. (2008) Parallel-Stage Decoupled Software Pipelining. Proc. CGO08, Boston, MA, April 6\u20139, pp. 114\u2013123. ACM, New York.","DOI":"10.1145\/1356058.1356074"},{"key":"2016083005472012000_59.8.1155.8","unstructured":"Cytron R. (1986) Doacross: Beyond Vectorization for Multiprocessors. Proc. ICPP86, St. Charles, IL, August 5\u201310, pp. 114\u2013123. IEEE, Washington."},{"key":"2016083005472012000_59.8.1155.9","doi-asserted-by":"crossref","unstructured":"Campanoni S. , Jones T. , Holloway G. , Reddi V.J. , Wei G.-Y. and Brooks D. (2012) Helix: Automatic Parallelization of Irregular Programs for Chip Multiprocessing. Proc. CGO12, San Jose, CA, March 31\u2013April 4, pp. 84\u201393. ACM, New York.","DOI":"10.1145\/2259016.2259028"},{"key":"2016083005472012000_59.8.1155.10","unstructured":"Hennessy J.L. and Patterson D.A. (2012) Computer Architecture: A Quantitative Approach. Elsevier, Atlanta."},{"key":"2016083005472012000_59.8.1155.11","doi-asserted-by":"crossref","unstructured":"Ferrante J. , Ottenstein K.J. and Warren J.D. The program dependence graph and its use in optimization. ACM Trans. Prog. Lang. Syst., 9, 319\u2013349.","DOI":"10.1145\/24039.24041"},{"key":"2016083005472012000_59.8.1155.12","unstructured":"Tarjan R. (1971) Depth-First Search and Linear Grajh Algorithms. Proc. SWAT71, Kingston, CA, October 13\u201315, pp. 114\u2013121. IEEE, Washington."},{"key":"2016083005472012000_59.8.1155.13","unstructured":"Butenhof D.R. (1997) Programming with POSIX Threads. Addison-Wesley, Boston."},{"key":"2016083005472012000_59.8.1155.14","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/1186736.1186737","article-title":"Spec cpu2006 benchmark descriptions","volume":"34","author":"Henning","year":"2006","journal-title":"ACM SIGARCH Comp. Arch. News"},{"key":"2016083005472012000_59.8.1155.15","doi-asserted-by":"crossref","unstructured":"Thies W. and Amarasinghe S. (2010) An Empirical Characterization of Stream Programs and its Implications for Language and Compiler Design. Proc. PACT10, Vienna, AT, September 11\u201315, pp. 365\u2013376. ACM, New York.","DOI":"10.1145\/1854273.1854319"},{"key":"2016083005472012000_59.8.1155.16","doi-asserted-by":"crossref","unstructured":"Lattner C. and Adve V. (2004) Llvm: A Compilation Framework for Lifelong Program Analysis & Transformation. Proc. CGO04, Palo Alto, CA, March 21\u201324, pp. 75\u201386. IEEE, Washington.","DOI":"10.1109\/CGO.2004.1281665"},{"key":"2016083005472012000_59.8.1155.17","unstructured":"Haiek D.A.P. (1979) Multiprocessors: Discussion of Some Theoretical and Practical Problems. University of Illinois, Champaign."},{"key":"2016083005472012000_59.8.1155.18","doi-asserted-by":"crossref","unstructured":"Bridges M. , Vachharajani N. , Zhang Y. , Jablin T. and August D. (2007) Revisiting the Sequential Programming Model for Multi-core. Proc. MICRO07, Chicago, IL, December 1\u20135, pp. 69\u201384. IEEE, Washington.","DOI":"10.1109\/MICRO.2007.20"},{"key":"2016083005472012000_59.8.1155.19","unstructured":"Lundstrom S.F. and Barnes G.H. (1980) A Controllable Mimd Architecture. Proc. ICPP80, Washington, DC, August 5\u20139, pp. 19\u201327. IEEE, Washington."},{"key":"2016083005472012000_59.8.1155.20","doi-asserted-by":"crossref","unstructured":"Rangan R. , Vachharajani N. , Vachharajani M. and August D.I. (2004) Decoupled Software Pipelining with the Synchronization Array. Proc. PACT04, Juan-les-pins, FR, September 29\u2013October 3, pp. 177\u2013188. IEEE, Washington.","DOI":"10.1109\/PACT.2004.1342552"},{"key":"2016083005472012000_59.8.1155.21","doi-asserted-by":"crossref","unstructured":"Gordon M.I. , Thies W. and Amarasinghe S. (2006) Exploiting Coarse-Grained Task, Data, and Pipeline Parallelism in Stream Programs. Proc. ASPLOS06, San Jose, CA, October 21\u201325, pp. 177\u2013188. ACM, New York.","DOI":"10.1145\/1168857.1168877"},{"key":"2016083005472012000_59.8.1155.22","doi-asserted-by":"crossref","unstructured":"Vachharajani N. , Rangan R. , Raman E. , Bridges M.J. , Ottoni G. and August D.I. (2007) Speculative Decoupled Software Pipelining. Proc. PACT07, Washington, DC, September 15\u201319, pp. 49\u201359. IEEE, Washington.","DOI":"10.1109\/PACT.2007.4336199"},{"key":"2016083005472012000_59.8.1155.23","doi-asserted-by":"crossref","unstructured":"Ottoni G. and August D.I. (2008) Communication Optimizations for Global Multi-Threaded Instruction Scheduling. Proc. ASPLOS08, Seattle, WA, March 1\u20135, pp. 222\u2013232. ACM, New York.","DOI":"10.1145\/1346281.1346310"},{"key":"2016083005472012000_59.8.1155.24","doi-asserted-by":"crossref","unstructured":"Zhong H. , Mehrara M. , Lieberman S. and Mahlke S. (2008) Uncovering Hidden Loop Level Parallelism in Sequential Applications. Proc. HPCA08, Salt Lake City, UT, February 16\u201320, pp. 290\u2013301. IEEE, Washington.","DOI":"10.1109\/HPCA.2008.4658647"},{"key":"2016083005472012000_59.8.1155.25","doi-asserted-by":"crossref","unstructured":"Rul S. , Vandierendonck H. and De Bosschere K. (2008) Extracting Coarse-Grain Parallelism in General-Purpose Programs. Proc. PPoPP08, Salt Lake City, UT, February 20\u201323, pp. 281\u2013282. ACM, New York.","DOI":"10.1145\/1345206.1345256"},{"key":"2016083005472012000_59.8.1155.26","doi-asserted-by":"publisher","DOI":"10.1016\/j.parco.2010.05.006"},{"key":"2016083005472012000_59.8.1155.27","doi-asserted-by":"crossref","unstructured":"Vandierendonck H. , Rul S. and De Bosschere K. (2010) The Paralax Infrastructure: Automatic Parallelization with a Helping Hand. Proc. PACT10, Vienna, AT, September 11\u201315, pp. 389\u2013400. ACM, New York.","DOI":"10.1145\/1854273.1854322"},{"key":"2016083005472012000_59.8.1155.28","doi-asserted-by":"crossref","unstructured":"Tournavitis G. and Franke B. (2010) Semi-Automatic Extraction and Exploitation of Hierarchical Pipeline Parallelism Using Profiling Information. Proc. PACT10, Vienna, AT, September 11\u201315, pp. 377\u2013388. ACM, New York.","DOI":"10.1145\/1854273.1854321"},{"key":"2016083005472012000_59.8.1155.29","doi-asserted-by":"crossref","unstructured":"Huang J. , Raman A. , Jablin T.B. , Zhang Y. , Hung T.-H. and August D.I. (2010) Decoupled Software Pipelining Creates Parallelization Opportunities. Proc. CGO10, Toronto, ON, April 24\u201328, pp. 121\u2013130. ACM, New York.","DOI":"10.1145\/1772954.1772973"},{"key":"2016083005472012000_59.8.1155.30","doi-asserted-by":"crossref","unstructured":"Gutierrez E. , Plata O. and Zapata E.L. (2001) Improving Parallel Irregular Reductions Using Partial Array Expansion. Proc. ICS01, New York, NY, November 10\u201316, pp. 56\u201356. IEEE, Washington.","DOI":"10.1145\/582034.582072"},{"key":"2016083005472012000_59.8.1155.31","doi-asserted-by":"crossref","unstructured":"Huang J. , Jablin T.B. , Beard S.R. , Johnson N.P. and August D.I. (2013) Automatically Exploiting Cross-Invocation Parallelism Using Runtime Information. Proc. CGO13, Shenzhen, CA, February 23\u201327, pp. 1\u201311. IEEE, Washington.","DOI":"10.1109\/CGO.2013.6495001"},{"key":"2016083005472012000_59.8.1155.32","doi-asserted-by":"crossref","unstructured":"Campanoni S. , Brownell K. , Kanev S. , Jones T.M. , Wei G.-Y. and Brooks D. (2014) Helix-rc: An Architecture-Compiler Co-Design for Automatic Parallelization of Irregular Programs. Proc. ISCA14, Minneapolis, USA, January 14\u201318, pp. 217\u2013228. IEEE, Washington.","DOI":"10.1145\/2678373.2665705"},{"key":"2016083005472012000_59.8.1155.33","doi-asserted-by":"crossref","unstructured":"Campanoni S. , Holloway G. , Wei G.-Y. and Brooks D. (2015) Helix-up: Relaxing Program Semantics to Unleash Parallelization. Proc. CGO15, San Francisco, USA, February 7\u201311, pp. 235\u2013245. IEEE, Washington.","DOI":"10.1109\/CGO.2015.7054203"}],"container-title":["The Computer Journal"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/academic.oup.com\/comjnl\/article-pdf\/59\/8\/1155\/8039290\/bxv125.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T04:20:50Z","timestamp":1748751650000},"score":1,"resource":{"primary":{"URL":"https:\/\/academic.oup.com\/comjnl\/article-lookup\/doi\/10.1093\/comjnl\/bxv125"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2016,1,14]]},"references-count":33,"journal-issue":{"issue":"8","published-online":{"date-parts":[[2016,8,30]]},"published-print":{"date-parts":[[2016,8]]}},"alternative-id":["10.1093\/comjnl\/bxv125"],"URL":"https:\/\/doi.org\/10.1093\/comjnl\/bxv125","relation":{},"ISSN":["0010-4620","1460-2067"],"issn-type":[{"type":"print","value":"0010-4620"},{"type":"electronic","value":"1460-2067"}],"subject":[],"published":{"date-parts":[[2016,1,14]]}}}