{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,9,8]],"date-time":"2024-09-08T12:26:47Z","timestamp":1725798407181},"publisher-location":"Cham","reference-count":24,"publisher":"Springer International Publishing","isbn-type":[{"type":"print","value":"9783319111933"},{"type":"electronic","value":"9783319111940"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-319-11194-0_14","type":"book-chapter","created":{"date-parts":[[2014,8,12]],"date-time":"2014-08-12T10:25:55Z","timestamp":1407839155000},"page":"178-191","source":"Crossref","is-referenced-by-count":9,"title":["C2CU : A CUDA C Program Generator for Bulk Execution of a Sequential Algorithm"],"prefix":"10.1007","author":[{"given":"Daisuke","family":"Takafuji","sequence":"first","affiliation":[]},{"given":"Koji","family":"Nakano","sequence":"additional","affiliation":[]},{"given":"Yasuaki","family":"Ito","sequence":"additional","affiliation":[]}],"member":"297","reference":[{"key":"14_CR1","unstructured":"Hwu, W.W.: GPU Computing Gems Emerald Edition. Morgan Kaufmann (2011)"},{"key":"14_CR2","doi-asserted-by":"crossref","unstructured":"Man, D., Uda, K., Ito, Y., Nakano, K.: A GPU implementation of computing Euclidean distance map with efficient memory access. In: Proc. of International Conference on Networking and Computing, pp. 68\u201376 (December 2011)","DOI":"10.1109\/ICNC.2011.19"},{"key":"14_CR3","doi-asserted-by":"crossref","unstructured":"Uchida, A., Ito, Y., Nakano, K.: Fast and accurate template matching using pixel rearrangement on the GPU. In: Proc. of International Conference on Networking and Computing, pp. 153\u2013159. CS Press (December 2011)","DOI":"10.1109\/ICNC.2011.30"},{"key":"14_CR4","doi-asserted-by":"crossref","unstructured":"Ogawa, K., Ito, Y., Nakano, K.: Efficient Canny edge detection using a GPU. In: Proc. of International Conference on Networking and Computing, pp. 279\u2013280. IEEE CS Press (November 2010)","DOI":"10.1109\/IC-NC.2010.13"},{"key":"14_CR5","doi-asserted-by":"crossref","unstructured":"Nishida, K., Ito, Y., Nakano, K.: Accelerating the dynamic programming for the matrix chain product on the GPU. In: Proc. of International Conference on Networking and Computing, pp. 320\u2013326 (December 2011)","DOI":"10.1109\/ICNC.2011.62"},{"key":"14_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/978-3-642-33078-0_1","volume-title":"Algorithms and Architectures for Parallel Processing","author":"K. Nishida","year":"2012","unstructured":"Nishida, K., Nakano, K., Ito, Y.: Accelerating the dynamic programming for the optial poygon triangulation on the GPU. In: Xiang, Y., Stojmenovic, I., Apduhan, B.O., Wang, G., Nakano, K., Zomaya, A. (eds.) ICA3PP 2012, Part I. LNCS, vol.\u00a07439, pp. 1\u201315. Springer, Heidelberg (2012)"},{"key":"14_CR7","doi-asserted-by":"crossref","unstructured":"Uchida, A., Ito, Y., Nakano, K.: An efficient GPU implementation of ant colony optimization for the traveling salesman problem. In: Proc. of International Conference on Networking and Computing, pp. 94\u2013102. IEEE CS Press (December 2012)","DOI":"10.1109\/ICNC.2012.22"},{"key":"14_CR8","unstructured":"NVIDIA Corporation: NVIDIA CUDA C programming guide version 5.0 (2012)"},{"issue":"2","key":"14_CR9","doi-asserted-by":"crossref","first-page":"260","DOI":"10.15803\/ijnc.1.2_260","volume":"1","author":"D. Man","year":"2011","unstructured":"Man, D., Uda, K., Ueyama, H., Ito, Y., Nakano, K.: Implementations of a parallel algorithm for computing euclidean distance map in multicore processors and GPUs. International Journal of Networking and Computing\u00a01(2), 260\u2013276 (2011)","journal-title":"International Journal of Networking and Computing"},{"key":"14_CR10","unstructured":"NVIDIA Corporation: NVIDIA CUDA C best practice guide version 3.1 (2010)"},{"key":"14_CR11","doi-asserted-by":"crossref","unstructured":"Tani, K., Takafuji, D., Nakano, K., Ito, Y.: Bulk execution of oblivious algorithms on the unified memory machine, with gpu implementation. In: Proc. of International Parallel and Distributed Processing Symposium Workshops, pp. 586\u2013595 (May 2014)","DOI":"10.1109\/IPDPSW.2014.69"},{"key":"14_CR12","unstructured":"Batcher, K.E.: Sorting networks and their applications. In: Proc. AFIPS Spring Joint Comput. Conf., vol.\u00a032, pp. 307\u2013314 (1968)"},{"key":"14_CR13","unstructured":"Akl, S.G.: Parallel Sorting Algorithms. Academic Press (1985)"},{"issue":"6","key":"14_CR14","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1145\/367766.368168","volume":"5","author":"R.W. Floyd","year":"1962","unstructured":"Floyd, R.W.: Algorithm 97: Shortest path. Communications of the ACM\u00a05(6), 345 (1962)","journal-title":"Communications of the ACM"},{"issue":"1","key":"14_CR15","doi-asserted-by":"publisher","first-page":"11","DOI":"10.1145\/321105.321107","volume":"9","author":"S. Warshall","year":"1962","unstructured":"Warshall, S.: A theorem on boolean matrices. Journal of the ACM\u00a09(1), 11\u201312 (1962)","journal-title":"Journal of the ACM"},{"key":"14_CR16","unstructured":"Cormen, T.H., Leiserson, C.E., Rivest, R.L.: Introduction to Algorithms. MIT Press (1990)"},{"issue":"170","key":"14_CR17","doi-asserted-by":"publisher","first-page":"519","DOI":"10.1090\/S0025-5718-1985-0777282-X","volume":"44","author":"P.L. Montgomery","year":"1985","unstructured":"Montgomery, P.L.: Modular multiplication without trial division. Mathematics of Computation\u00a044(170), 519\u2013521 (1985)","journal-title":"Mathematics of Computation"},{"key":"14_CR18","doi-asserted-by":"crossref","unstructured":"Shigemoto, K., Kawakami, K., Nakano, K.: Accelerating montgomery modulo multiplication for redundant radix-64k number system on the FPGA using dual-port block RAMs. In: Proc. of International Conference on Embedded and Ubiquitous Computing (EUC), pp. 44\u201351 (2008)","DOI":"10.1109\/EUC.2008.30"},{"issue":"2","key":"14_CR19","doi-asserted-by":"crossref","first-page":"277","DOI":"10.15803\/ijnc.1.2_277","volume":"1","author":"S. Bo","year":"2011","unstructured":"Bo, S., Kawakami, K., Nakano, K., Ito, Y.: An RSA encryption hardware algorithm using a single DSP block and a single block RAM on the fpga. International Journal of Networking and Computing\u00a01(2), 277\u2013289 (2011)","journal-title":"International Journal of Networking and Computing"},{"issue":"1","key":"14_CR20","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1080\/17445760.2012.731507","volume":"29","author":"K. Nakano","year":"2014","unstructured":"Nakano, K.: Simple memory machine models for GPUs. International Journal of Parallel, Emergent and Distributed Systems\u00a029(1), 17\u201337 (2014)","journal-title":"International Journal of Parallel, Emergent and Distributed Systems"},{"key":"14_CR21","doi-asserted-by":"crossref","unstructured":"Nakano, K.: Sequential memory access on the unified memory machine with application to the dynamic programming. In: Proc. of International Symposium on Computing and Networking, pp. 85\u201394 (December 2013)","DOI":"10.1109\/CANDAR.2013.20"},{"key":"14_CR22","unstructured":"Aho, A.V., Ullman, J.D., Hopcroft, J.E.: Data Structures and Algorithms. Addison Wesley (1983)"},{"key":"14_CR23","doi-asserted-by":"publisher","first-page":"948","DOI":"10.1109\/TC.1972.5009071","volume":"21","author":"M.J. Flynn","year":"1972","unstructured":"Flynn, M.J.: Some computer organizations and their effectiveness. IEEE Transactions on Computers\u00a021, 948\u2013960 (1972)","journal-title":"IEEE Transactions on Computers"},{"issue":"7","key":"14_CR24","doi-asserted-by":"publisher","first-page":"759","DOI":"10.1109\/12.936241","volume":"50","author":"T. Blum","year":"2001","unstructured":"Blum, T., Paar, C.: High-radix montgomery modular exponentiation on reconfigurable hardware. IEEE Trans. on Computers\u00a050(7), 759\u2013764 (2001)","journal-title":"IEEE Trans. on Computers"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-319-11194-0_14","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,5,27]],"date-time":"2019-05-27T11:32:17Z","timestamp":1558956737000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-3-319-11194-0_14"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783319111933","9783319111940"],"references-count":24,"URL":"https:\/\/doi.org\/10.1007\/978-3-319-11194-0_14","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]}}}