{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T05:02:27Z","timestamp":1743051747527,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":18,"publisher":"Springer Singapore","isbn-type":[{"type":"print","value":"9789811600098"},{"type":"electronic","value":"9789811600104"}],"license":[{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,1,1]],"date-time":"2021-01-01T00:00:00Z","timestamp":1609459200000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021]]},"DOI":"10.1007\/978-981-16-0010-4_34","type":"book-chapter","created":{"date-parts":[[2021,2,9]],"date-time":"2021-02-09T00:19:49Z","timestamp":1612829989000},"page":"390-404","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Automatic Thread Block Size Selection Strategy in GPU Parallel Code Generation"],"prefix":"10.1007","author":[{"given":"Weifang","family":"Hu","sequence":"first","affiliation":[]},{"given":"Lin","family":"Han","sequence":"additional","affiliation":[]},{"given":"Pu","family":"Han","sequence":"additional","affiliation":[]},{"given":"Jiandong","family":"Shang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2021,2,7]]},"reference":[{"key":"34_CR1","unstructured":"Chen, T., Moreau, T., et al.: TVM: an automated end-to-end optimizing compiler for deep learning. In: OSDI'18: Proceedings of the 13th USENIX conference on Operating Systems Design and Implementation, pp. 579\u2013594. ACM Press, New York (2018)"},{"key":"34_CR2","unstructured":"Vasilache, N., Zinenko, O., Theodoridis, T., et al.: Tensor comprehensions: framework-agnostic high-performance machine learning abstractions (2018)"},{"key":"34_CR3","doi-asserted-by":"crossref","unstructured":"Baghdadi, R., Ray, J., Romdhane, M.B., et al.: Tiramisu: a polyhedral compiler for expressing fast and portable code (2018)","DOI":"10.1109\/CGO.2019.8661197"},{"key":"34_CR4","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"244","DOI":"10.1007\/978-3-642-11970-5_14","volume-title":"Compiler Construction","author":"MM Baskaran","year":"2010","unstructured":"Baskaran, M.M., Ramanujam, J., Sadayappan, P.: Automatic C-to-CUDA code generation for affine programs. In: Gupta, R. (ed.) Compiler Construction. Lecture Notes in Computer Science, vol. 6011, pp. 244\u2013263. Springer, Berlin (2010). https:\/\/doi.org\/10.1007\/978-3-642-11970-5_14"},{"key":"34_CR5","unstructured":"Rudy, G.: CUDA-CHiLL: a programming language interface for GPGPU optimizations and code generation. Dissertations & Theses \u2013 Gradworks (2010)"},{"key":"34_CR6","unstructured":"Verdoolaege, S., Juega, J., Cohen, A., Gomez, J.I., Tenllado, C., Catthoor, F.: Polyhedral parallel code generation for CUDA. ACM Trans. Archit. Code Optim. (TACO) 9 (2013). Article no. 54"},{"issue":"8","key":"34_CR7","first-page":"2371","volume":"29","author":"J Zhao","year":"2018","unstructured":"Zhao, J., Li, Y.Y., Zhao, R.C.: \u201cBlack magic\u201d of polyhedral compilation. J. Softw. 29(8), 2371\u20132396 (2018). (in Chinese)","journal-title":"J. Softw."},{"key":"34_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"132","DOI":"10.1007\/978-3-540-78791-4_9","volume-title":"Compiler Construction","author":"U Bondhugula","year":"2008","unstructured":"Bondhugula, U., Baskaran, M., Krishnamoorthy, S., Ramanujam, J., Rountev, A., Sadayappan, P.: Automatic transformations for communication-minimized parallelization and locality optimization in the polyhedral model. In: Hendren, L. (ed.) CC 2008. LNCS, vol. 4959, pp. 132\u2013146. Springer, Heidelberg (2008). https:\/\/doi.org\/10.1007\/978-3-540-78791-4_9"},{"key":"34_CR9","doi-asserted-by":"crossref","unstructured":"Leung, A., Vasilache, N., Meister, B., et al.: A mapping path for multi-GPGPU accelerated computers from a portable high level programming abstraction, p. 51 (2010)","DOI":"10.1145\/1735688.1735698"},{"key":"34_CR10","unstructured":"Pouchet, L.N., Grlinger, A., Simb\u00fcrger, A., et al.: Polly-polyhedral optimization in LLVM. In: International Workshop on Polyhedral Compilation Techniques (IMPACT) (2011)"},{"key":"34_CR11","doi-asserted-by":"crossref","unstructured":"Grosser, T., Hoefler, T.: Polly-ACC transparent compilation to heterogeneous hardware. In: International Conference on Supercomputing. ACM (2016)","DOI":"10.1145\/2925426.2926286"},{"key":"34_CR12","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"237","DOI":"10.1007\/978-3-642-36036-7_16","volume-title":"Languages and Compilers for Parallel Computing","author":"M Amini","year":"2013","unstructured":"Amini, M., Coelho, F., Irigoin, F., Keryell, R.: Static compilation analysis for host-accelerator communication optimization. In: Rajopadhye, S., Mills Strout, M. (eds.) LCPC 2011. LNCS, vol. 7146, pp. 237\u2013251. Springer, Heidelberg (2013). https:\/\/doi.org\/10.1007\/978-3-642-36036-7_16"},{"key":"34_CR13","doi-asserted-by":"crossref","unstructured":"Shobaki, G., Kerbow, A., Mekhanoshin, S.: Optimizing occupancy and ILP on the GPU using a combinatorial approach. In: Proceedings of the 18th ACM\/IEEE International Symposium on Code Generation and Optimization (CGO 2020), pp. 133\u2013144. Association for Computing Machinery, New York (2020)","DOI":"10.1145\/3368826.3377918"},{"key":"34_CR14","doi-asserted-by":"publisher","unstructured":"Nickolls, J.: Scalable parallel programming with CUDA introduction. In: 2008 IEEE Hot Chips 20 Symposium (HCS), Stanford, CA, pp. 1\u20139 (2008). https:\/\/doi.org\/10.1109\/HOTCHIPS.2008.7476518","DOI":"10.1109\/HOTCHIPS.2008.7476518"},{"issue":"6","key":"34_CR15","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1007\/BF01379404","volume":"21","author":"P Feautrier","year":"1997","unstructured":"Feautrier, P.: Some efficient solutions to the affine scheduling problem. Part II .Multidimensional time. Int. J. Parallel Prog. 21(6), 389\u2013420 (1997)","journal-title":"Int. J. Parallel Prog."},{"issue":"4","key":"34_CR16","doi-asserted-by":"publisher","first-page":"12","DOI":"10.1145\/2743016","volume":"37","author":"T Grosser","year":"2015","unstructured":"Grosser, T., Verdoolaege, S., Cohen, A.: Polyhedral AST generation is more than scanning polyhedra. ACM Trans. Program. Lang. Syst. 37(4), 12 (2015)","journal-title":"ACM Trans. Program. Lang. Syst."},{"key":"34_CR17","doi-asserted-by":"publisher","unstructured":"Hayes, A., Li, L., Chavarr\u00eda-Miranda, D., Song, S., Zhang, E.: Orion: A Framework for GPU Occupancy Tuning, pp. 1\u201313 (2016). https:\/\/doi.org\/10.1145\/2988336.2988355","DOI":"10.1145\/2988336.2988355"},{"key":"34_CR18","doi-asserted-by":"crossref","unstructured":"Fauzia, N., Pouchet, L.-N., Sadayappan, P.: Characterizing and enhancing global memory data coalescing on GPUs. In: Proceedings of the 13th Annual IEEE\/ACM International Symposium on Code Generation and Optimization (CGO 2015), pp. 12\u201322. IEEE Computer Society, USA (2015)","DOI":"10.1109\/CGO.2015.7054183"}],"container-title":["Communications in Computer and Information Science","Parallel Architectures, Algorithms and Programming"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-16-0010-4_34","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,4,1]],"date-time":"2021-04-01T14:41:19Z","timestamp":1617288079000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/978-981-16-0010-4_34"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021]]},"ISBN":["9789811600098","9789811600104"],"references-count":18,"URL":"https:\/\/doi.org\/10.1007\/978-981-16-0010-4_34","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"type":"print","value":"1865-0929"},{"type":"electronic","value":"1865-0937"}],"subject":[],"published":{"date-parts":[[2021]]},"assertion":[{"value":"7 February 2021","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PAAP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Symposium on Parallel Architectures, Algorithms and Programming","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shenzhen","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2020","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 December 2020","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 December 2020","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"paap2020","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Single-blind","order":1,"name":"type","label":"Type","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"OCS","order":2,"name":"conference_management_system","label":"Conference Management System","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"75","order":3,"name":"number_of_submissions_sent_for_review","label":"Number of Submissions Sent for Review","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"37","order":4,"name":"number_of_full_papers_accepted","label":"Number of Full Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"0","order":5,"name":"number_of_short_papers_accepted","label":"Number of Short Papers Accepted","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"49% - The value is computed by the equation \"Number of Full Papers Accepted \/ Number of Submissions Sent for Review * 100\" and then rounded to a whole number.","order":6,"name":"acceptance_rate_of_full_papers","label":"Acceptance Rate of Full Papers","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"3","order":7,"name":"average_number_of_reviews_per_paper","label":"Average Number of Reviews per Paper","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"6","order":8,"name":"average_number_of_papers_per_reviewer","label":"Average Number of Papers per Reviewer","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}},{"value":"Yes","order":9,"name":"external_reviewers_involved","label":"External Reviewers Involved","group":{"name":"ConfEventPeerReviewInformation","label":"Peer Review Information (provided by the conference organizers)"}}]}}