{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T02:26:08Z","timestamp":1768530368176,"version":"3.49.0"},"publisher-location":"Cham","reference-count":50,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032081865","type":"print"},{"value":"9783032081872","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T00:00:00Z","timestamp":1764028800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,11,25]],"date-time":"2025-11-25T00:00:00Z","timestamp":1764028800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-08187-2_4","type":"book-chapter","created":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T05:43:11Z","timestamp":1763962991000},"page":"51-79","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["GPU Implementations for\u00a0Midsize Integer Addition and\u00a0Multiplication"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5421-6876","authenticated-orcid":false,"given":"Cosmin E.","family":"Oancea","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8303-4983","authenticated-orcid":false,"given":"Stephen M.","family":"Watt","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,11,25]]},"reference":[{"key":"4_CR1","unstructured":"Abadi, M., et\u00a0al.: TensorFlow: a system for large-scale machine learning. In: 12th USENIX Symposium on Operating Systems Design and Implementation (OSDI 16), pp. 265\u2013283 (2016)"},{"key":"4_CR2","unstructured":"Bantikyan, H.: Big integer multiplication with CUDA FFT(CUFFT) library. Int. J. Innovative Res. Comput. Commun. Eng. 2, 6317\u20136325 (2014). https:\/\/api.semanticscholar.org\/CorpusID:14759606"},{"key":"4_CR3","unstructured":"Bernardin, L., et al.: Maple Programming Guide, Maplesoft, a division of Waterloo Maple Inc. (1996\u20132023)"},{"issue":"11","key":"4_CR4","doi-asserted-by":"publisher","first-page":"1526","DOI":"10.1109\/12.42122","volume":"38","author":"GE Blelloch","year":"1989","unstructured":"Blelloch, G.E.: Scans as primitive parallel operations. IEEE Trans. Comput. 38(11), 1526\u20131538 (1989)","journal-title":"IEEE Trans. Comput."},{"key":"4_CR5","unstructured":"Blelloch, G.E.: Vector Models for Data-Parallel Computing, vol.\u00a075. MIT Press Cambridge (1990)"},{"issue":"3","key":"4_CR6","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1145\/227234.227246","volume":"39","author":"GE Blelloch","year":"1996","unstructured":"Blelloch, G.E.: Programming parallel algorithms. Commun. ACM (CACM) 39(3), 85\u201397 (1996)","journal-title":"Commun. ACM (CACM)"},{"issue":"1","key":"4_CR7","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1006\/jpdc.1994.1038","volume":"21","author":"GE Blelloch","year":"1994","unstructured":"Blelloch, G.E., Hardwick, J.C., Sipelstein, J., Zagha, M., Chatterjee, S.: Implementation of a portable nested data-parallel language. J. Parallel Distrib. Comput. 21(1), 4\u201314 (1994)","journal-title":"J. Parallel Distrib. Comput."},{"key":"4_CR8","doi-asserted-by":"publisher","unstructured":"Bruun, L.M., Larsen, U.S., Hinnerskov, N.H., Oancea, C.E.: Reverse-mode ad of multi-reduce and scan in futhark. In: Proceedings of the 35th Symposium on Implementation and Application of Functional Languages, IFL 2023. Association for Computing Machinery, New York, NY, USA (2024). https:\/\/doi.org\/10.1145\/3652561.3652575","DOI":"10.1145\/3652561.3652575"},{"key":"4_CR9","doi-asserted-by":"publisher","first-page":"31","DOI":"10.1016\/S0747-7171(89)80004-5","volume":"7","author":"BW Char","year":"1989","unstructured":"Char, B.W., Geddes, K.O., Gonnet, G.H.: GCDHEU: heuristic polynomial GCD algorithm based on integer GCD computation. J. Symb. Comput. 7, 31\u201348 (1989)","journal-title":"J. Symb. Comput."},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Chen, L., Covanov, S., Mohajerani, D., Moreno\u00a0Maza, M.: Big prime field FFT on the GPU. In: Proceedings 2017 International Symposium on Symbolic and Algebraic Computation (ISSAC 2017), pp. 85\u201392. ACM Press (2017)","DOI":"10.1145\/3087604.3087657"},{"key":"4_CR11","unstructured":"Chicha, Y., Lloyd, M., Oancea, C., Watt, S.M.: Parametric polymorphism for computer algebra software components. In: Proceedings 6th International Symposium on Symbolic and Numeric Algorithms for Scientific Computing, pp. 119\u2013130. Mirton Publishing House (2004)"},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"Cooley, J.W., Tukey, J.W.: An algorithm for the machine calculation of complex Fourier series. Math. Comput. 19(90), 297\u2013301 (1965). http:\/\/www.jstor.org\/stable\/2003354","DOI":"10.1090\/S0025-5718-1965-0178586-1"},{"issue":"3","key":"4_CR13","doi-asserted-by":"publisher","first-page":"356","DOI":"10.1177\/10943420221077964","volume":"36","author":"AP Dieguez","year":"2022","unstructured":"Dieguez, A.P., Amor, M., Doallo, R., Nukada, A., Matsuoka, S.: Efficient high-precision integer multiplication on the GPU. Int. J. High Performance Comput. Appl. 36(3), 356\u2013369 (2022). https:\/\/doi.org\/10.1177\/10943420221077964","journal-title":"Int. J. High Performance Comput. Appl."},{"key":"4_CR14","doi-asserted-by":"publisher","unstructured":"Emmart, N., Weems, C.: High precision integer addition, subtraction and multiplication with a graphics processing unit. Parallel Process. Lett. 20, 293\u2013306 (2010). https:\/\/doi.org\/10.1142\/S0129626410000259","DOI":"10.1142\/S0129626410000259"},{"key":"4_CR15","unstructured":"Frostig, R., Johnson, M.J., Leary, C.: Compiling machine learning programs via high-level tracing. Syst. Mach. Learn., 23\u201324 (2018)"},{"key":"4_CR16","doi-asserted-by":"publisher","unstructured":"Gieseke, F., Rosca, S., Henriksen, T., Verbesselt, J., Oancea, C.E.: Massively-parallel change detection for satellite time series data with missing values. In: 2020 IEEE 36th International Conference on Data Engineering (ICDE), pp. 385\u2013396 (2020). https:\/\/doi.org\/10.1109\/ICDE48307.2020.00040","DOI":"10.1109\/ICDE48307.2020.00040"},{"key":"4_CR17","doi-asserted-by":"publisher","first-page":"274","DOI":"10.1007\/3-540-61756-6_91","volume-title":"Programming Languages: Implementations, Logics, and Programs","author":"S Gorlatch","year":"1996","unstructured":"Gorlatch, S.: Systematic extraction and implementation of divide-and-conquer parallelism. In: Kuchen, H., Doaitse Swierstra, S. (eds.) Programming Languages: Implementations, Logics, and Programs, pp. 274\u2013288. Springer, Heidelberg (1996)"},{"key":"4_CR18","doi-asserted-by":"publisher","unstructured":"Henriksen, T., Serup, N.G.W., Elsman, M., Henglein, F., Oancea, C.E.: Futhark: purely functional GPU-programming with nested parallelism and in-place array updates. In: Proceedings of the 38th ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI 2017, pp. 556\u2013571. ACM, New York, NY, USA (2017). https:\/\/doi.org\/10.1145\/3062341.3062354","DOI":"10.1145\/3062341.3062354"},{"key":"4_CR19","doi-asserted-by":"publisher","unstructured":"Henriksen, T., Thor\u00f8e, F., Elsman, M., Oancea, C.: Incremental flattening for nested data parallelism. In: Proceedings of the 24th Symposium on Principles and Practice of Parallel Programming, PPoPP 2019, pp. 53\u201367. ACM, New York, NY, USA (2019). https:\/\/doi.org\/10.1145\/3293883.3295707","DOI":"10.1145\/3293883.3295707"},{"key":"4_CR20","unstructured":"Wolfram Research, Inc.: Mathematica, Version 14.0, Champaign, IL (2024). https:\/\/www.wolfram.com\/mathematica"},{"key":"4_CR21","doi-asserted-by":"publisher","first-page":"58603","DOI":"10.1109\/ACCESS.2020.2982365","volume":"8","author":"K Isupov","year":"2020","unstructured":"Isupov, K.: Using floating-point intervals for non-modular computations in residue number system. IEEE Access 8, 58603\u201358619 (2020)","journal-title":"IEEE Access"},{"key":"4_CR22","doi-asserted-by":"crossref","unstructured":"Joldes, M., Muller, J., Popescu, V., Tucker, W.: CAMPARY: CUDA multiple precision arithmetic library and applications. In: Greuel, G., Koch, T., Paule, P., Sommese, A. (eds.) Mathematical Software \u2013 ICMS 2016. LNCS 9725, pp. 232\u2013240. Springer Cham (2016)","DOI":"10.1007\/978-3-319-42432-3_29"},{"key":"4_CR23","unstructured":"Joldes, M., Muller, J., Popescu, V., Tucker, W.: CAMPARY library (2017). https:\/\/homepages.laas.fr\/mmjoldes\/campary\/"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"van Loan, C.: Computational Frameworks for the Fast Fourier Transform. SIAM (1992)","DOI":"10.1137\/1.9781611970999"},{"key":"4_CR25","doi-asserted-by":"publisher","unstructured":"Lu, B., Mellor-Crummey, J.: Compiler optimization of implicit reductions for distributed memory multiprocessors. In: Proceedings of the First Merged International Parallel Processing Symposium and Symposium on Parallel and Distributed Processing, pp. 42\u201351 (1998). https:\/\/doi.org\/10.1109\/IPPS.1998.669887","DOI":"10.1109\/IPPS.1998.669887"},{"key":"4_CR26","doi-asserted-by":"crossref","unstructured":"Lu, M., He, B., Luo, Q.: Supporting extended precision on Grahics processors. In: Proceedings Sixth International Workshop on Data Management on New Hardware (DaMoN 2010), pp. 19\u201326. ACM (2010)","DOI":"10.1145\/1869389.1869392"},{"key":"4_CR27","first-page":"114","volume":"44","author":"RJ McEliece","year":"1978","unstructured":"McEliece, R.J.: A public-key cryptosystem based on algebraic coding theory. DSN Progress Rep. 44, 114\u2013116 (1978)","journal-title":"DSN Progress Rep."},{"key":"4_CR28","unstructured":"Merrill, D., Garland, M.: Single-pass parallel prefix scan with decoupled lookback. Technical report NVR-2016-002, NVIDIA Corporation, March 2016. https:\/\/research.nvidia.com\/sites\/default\/files\/pubs\/2016-03_Single-pass-Parallel-Prefix\/nvr-2016-002.pdf"},{"key":"4_CR29","unstructured":"Munksgaard, P.: Static and dynamic analyses for efficient GPU execution. Ph.D. thesis, Department of Computer Science, Faculty of Science, University of Copenhagen (2023). https:\/\/di.ku.dk\/english\/research\/phd\/phd-theses\/2023\/Philip_Munksgaard_Thesis.pdf"},{"key":"4_CR30","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-83978-9_1","volume-title":"Trends in Functional Programming","author":"P Munksgaard","year":"2021","unstructured":"Munksgaard, P., Breddam, S.L., Henriksen, T., Gieseke, F.C., Oancea, C.: Dataset sensitive autotuning of multi-versioned code based on monotonic properties. In: Zs\u00f3k, V., Hughes, J. (eds.) Trends in Functional Programming, pp. 3\u201323. Springer International Publishing, Cham (2021)"},{"key":"4_CR31","doi-asserted-by":"publisher","unstructured":"Munksgaard, P., Henriksen, T., Sadayappan, P., Oancea, C.: Memory optimizations in an array language. In: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, SC 2022. IEEE Press (2022). https:\/\/doi.org\/10.1109\/SC41404.2022.00036","DOI":"10.1109\/SC41404.2022.00036"},{"key":"4_CR32","doi-asserted-by":"crossref","unstructured":"Nakayama, T., Takahashi, D.: Implementation of multiple-precision floating-point arithmetic for GPU computing. In: Proceedings 23rd IASTED International Conference on Parallel and Distributed Computing and Systems (PDCS 2011), pp. 343\u2013349. IASTED (2011)","DOI":"10.2316\/P.2011.757-041"},{"key":"4_CR33","unstructured":"Nakayama, T.: CUMP library (2017). https:\/\/github.com\/skystar0227\/CUMP"},{"key":"4_CR34","unstructured":"NVlabs: Cooperative Groups Big Numbers (CGBN) Library (2018). https:\/\/github.com\/NVlabs\/CGBN"},{"key":"4_CR35","doi-asserted-by":"publisher","unstructured":"Oancea, C.E., Andreetta, C., Berthold, J., Frisch, A., Henglein, F.: Financial software on GPUs: between Haskell and Fortran. In: Proceedings of the 1st ACM SIGPLAN Workshop on Functional High-performance Computing, FHPC 2012, pp. 61\u201372. ACM, New York, NY, USA (2012). https:\/\/doi.org\/10.1145\/2364474.2364484","DOI":"10.1145\/2364474.2364484"},{"key":"4_CR36","doi-asserted-by":"publisher","first-page":"156","DOI":"10.1007\/978-3-540-89740-8_11","volume-title":"Languages and Compilers for Parallel Computing","author":"CE Oancea","year":"2008","unstructured":"Oancea, C.E., Mycroft, A.: Set-congruence dynamic analysis for thread-level speculation (TLS). In: Amaral, J.N. (ed.) Languages and Compilers for Parallel Computing, pp. 156\u2013171. Springer, Heidelberg (2008)"},{"key":"4_CR37","doi-asserted-by":"crossref","unstructured":"Oancea, C.E., Mycroft, A., Watt, S.M.: A new approach to parallelising tracing algorithms. In: Proceedings 2009 International Symposium on Memory Management (ISMM 2009), pp. 10\u201319. ACM Press (2009)","DOI":"10.1145\/1542431.1542434"},{"key":"4_CR38","doi-asserted-by":"publisher","unstructured":"Oancea, C.E., Rauchwerger, L.: Logical inference techniques for loop parallelization. In: Proceedings of the 33rd ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI 2012, pp. 509\u2013520. ACM, New York, NY, USA (2012). https:\/\/doi.org\/10.1145\/2254064.2254124","DOI":"10.1145\/2254064.2254124"},{"key":"4_CR39","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1007\/978-3-642-36036-7_5","volume-title":"Languages and Compilers for Parallel Computing","author":"CE Oancea","year":"2013","unstructured":"Oancea, C.E., Rauchwerger, L.: A hybrid approach to proving memory reference monotonicity. In: Rajopadhye, S., Mills Strout, M. (eds.) Languages and Compilers for Parallel Computing, pp. 61\u201375. Springer, Heidelberg (2013)"},{"key":"4_CR40","doi-asserted-by":"crossref","unstructured":"Oancea, C.E., Rauchwerger, L.: Scalable conditional induction variables (CIV) analysis. In: Proceedings of the 13th Annual IEEE\/ACM International Symposium on Code Generation and Optimization, CGO 2015, pp. 213\u2013224. IEEE Computer Society, Washington, DC, USA (2015). http:\/\/dl.acm.org\/citation.cfm?id=2738600.2738627","DOI":"10.1109\/CGO.2015.7054201"},{"key":"4_CR41","doi-asserted-by":"publisher","unstructured":"Oancea, C.E., Watt, S.M.: Domains and expressions: an interface between two approaches to computer algebra. In: Proceedings of the 2005 International Symposium on Symbolic and Algebraic Computation, ISSAC 2005, pp. 261\u2013268. Association for Computing Machinery, New York, NY, USA (2005).https:\/\/doi.org\/10.1145\/1073884.1073921","DOI":"10.1145\/1073884.1073921"},{"key":"4_CR42","doi-asserted-by":"publisher","unstructured":"Oancea, C.E., Watt, S.M.: Parametric polymorphism for software component architectures. In: Proceedings of the 20th Annual ACM SIGPLAN Conference on Object-Oriented Programming, Systems, Languages, and Applications, OOPSLA 2005, pp. 147\u2013166. Association for Computing Machinery, New York, NY, USA (2005). https:\/\/doi.org\/10.1145\/1094811.1094823","DOI":"10.1145\/1094811.1094823"},{"key":"4_CR43","doi-asserted-by":"publisher","unstructured":"Oancea, C.E., Robroek, T., Gieseke, F.: Approximate nearest-neighbour fields via massively-parallel propagation-assisted K-D trees. In: 2020 IEEE International Conference on Big Data (Big Data), pp. 5172\u20135181 (2020). https:\/\/doi.org\/10.1109\/BigData50022.2020.9378426","DOI":"10.1109\/BigData50022.2020.9378426"},{"key":"4_CR44","unstructured":"O\u2019Malley, D., E.\u00a0Santos, J., Lubbers, N.: Interlingual automatic differentiation: software 2.0 between PyTorch and Julia. In: Association for the Advancement of Artificial Intelligence (2022)"},{"key":"4_CR45","doi-asserted-by":"publisher","unstructured":"Pascual, V., Hasco\u00ebt, L.: Mixed-language automatic differentiation. Optim. Methods Softw. 33(4-6), 1192\u20131206 (2018). https:\/\/doi.org\/10.1080\/10556788.2018.1435650","DOI":"10.1080\/10556788.2018.1435650"},{"key":"4_CR46","unstructured":"Paszke, A., et al.: PyTorch: an imperative style, high-performance deep learning library. In: Advances in Neural Information Processing Systems, vol. 32, pp. 8026\u20138037 (2019)"},{"key":"4_CR47","doi-asserted-by":"publisher","unstructured":"Schenck, R., R\u00f8nning, O., Henriksen, T., Oancea, C.E.: AD for an array language with nested parallelism. In: Proceedings of the International Conference on High Performance Computing, Networking, Storage and Analysis, SC 2022. IEEE Press (2022). https:\/\/doi.org\/10.1109\/SC41404.2022.00063","DOI":"10.1109\/SC41404.2022.00063"},{"key":"4_CR48","doi-asserted-by":"publisher","unstructured":"Serykh, D., et al.: Seasonal-trend time series decomposition on graphics processing units. In: 2023 IEEE International Conference on Big Data (BigData), pp. 5914\u20135923 (2023). https:\/\/doi.org\/10.1109\/BigData59044.2023.10386208","DOI":"10.1109\/BigData59044.2023.10386208"},{"issue":"3\u20134","key":"4_CR49","first-page":"281","volume":"7","author":"V Strassen","year":"1971","unstructured":"Strassen, V., Sch\u00f6nhage, A.: Schnelle multiplikation gro\u00dfer zahlen. Computing 7(3\u20134), 281\u2013292 (1971)","journal-title":"Computing"},{"key":"4_CR50","unstructured":"Topalovic, A., Restelli-Nielsen, W., Olesen, K.: Multiple-precision integer arithmetic. Final project of the \u201cData Parallel Programming\u201d MSc-level course, Department of Computer Science, University of Copenhagen (2022). https:\/\/futhark-lang.org\/student-projects\/dpp21-mpint.pdf"}],"container-title":["Lecture Notes in Computer Science","Languages, Compilers, Analysis - From Beautiful Theory to Useful Practice"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-08187-2_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,24]],"date-time":"2025-11-24T05:43:16Z","timestamp":1763962996000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-08187-2_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,25]]},"ISBN":["9783032081865","9783032081872"],"references-count":50,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-08187-2_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,11,25]]},"assertion":[{"value":"25 November 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no conflicts of interest to declare.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}}]}}