{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T01:42:35Z","timestamp":1773193355291,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":68,"publisher":"ACM","license":[{"start":{"date-parts":[[2017,3,25]],"date-time":"2017-03-25T00:00:00Z","timestamp":1490400000000},"content-version":"vor","delay-in-days":365,"URL":"http:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-CSR-1419243"],"award-info":[{"award-number":["CNS-CSR-1419243"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2016,3,25]]},"DOI":"10.1145\/2872362.2872368","type":"proceedings-article","created":{"date-parts":[[2016,3,28]],"date-time":"2016-03-28T09:24:30Z","timestamp":1459157070000},"page":"681-696","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":79,"title":["Baymax"],"prefix":"10.1145","author":[{"given":"Quan","family":"Chen","sequence":"first","affiliation":[{"name":"Shanghai Jiao Tong University, Ann Arbor, MI, USA"}]},{"given":"Hailong","family":"Yang","sequence":"additional","affiliation":[{"name":"Beihang University, Ann Arbor, USA"}]},{"given":"Jason","family":"Mars","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, USA"}]},{"given":"Lingjia","family":"Tang","sequence":"additional","affiliation":[{"name":"University of Michigan, Ann Arbor, USA"}]}],"member":"320","published-online":{"date-parts":[[2016,3,25]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"185","volume-title":"Real-time Continuous Speech Recognition System for Hand-Held Devices. In International Conference on Acoustics, Speech and Signal Processing (ICASSP)","volume":"1","author":"Huggins-Daines David","year":"2006","unstructured":"David Huggins-Daines, Mohit Kumar, Arthur Chan, Alan W Black, Mosur Ravishankar, and Alex I Rudnicky. Pocketsphinx: A Free, Real-time Continuous Speech Recognition System for Hand-Held Devices. In International Conference on Acoustics, Speech and Signal Processing (ICASSP), volume 1, pages 185--188. IEEE, 2006."},{"key":"e_1_3_2_1_2_1","volume-title":"The Kaldi Speech Recognition Toolkit","author":"Povey Daniel","year":"2011","unstructured":"Daniel Povey, Arnab Ghoshal, Gilles Boulianne, Luk\u00e1s Burget, Ondrej Glembek, Nagendra Goel, Mirko Hannemann, Petr Motl\u0131cek, Yanmin Qian, Petr Schwarz, et al. The Kaldi Speech Recognition Toolkit. 2011."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/11744023_32"},{"key":"e_1_3_2_1_4_1","unstructured":"Qualcomm Acquires Kooaba Visual Recognition Company. http:\/\/mobilemarketingmagazine.com\/qualcomm-acquires-kooaba-visual-recognition-company."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.3115\/1117601.1117631"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/2018396.2018414"},{"key":"e_1_3_2_1_7_1","unstructured":"Google's Google Now. http:\/\/www.google.com\/landing\/now\/."},{"key":"e_1_3_2_1_8_1","unstructured":"Microsoft's Cortana. http:\/\/www.windowsphone.com\/en-us\/features-8--1."},{"key":"e_1_3_2_1_9_1","unstructured":"Apple Siri. https:\/\/www.apple.com\/ios\/siri\/."},{"key":"e_1_3_2_1_10_1","unstructured":"Baidu YuYin. http:\/\/yuyin.baidu.com\/."},{"key":"e_1_3_2_1_11_1","unstructured":"Johann Hauswald Michael A. Laurenzano Yunqi Zhang Cheng Li Austin Rovinski Arjun Khurana Ron Dreslinski Trevor Mudge Vinicius Petrucci Lingjia Tang and Jason Mars. Sirius: An Open End-to-End Voice and Vision Personal Assistant and Its Implications for Future Warehouse Scale Computers. In the 20th International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS). ACM 2015."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2014.6853195"},{"key":"e_1_3_2_1_13_1","volume-title":"The Learning Machines","author":"Jones Nicola","year":"2014","unstructured":"Nicola Jones. The Learning Machines, 2014."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.5555\/2534500"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/2408776.2408794"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/2259016.2259018"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/2485922.2485975"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2155620.2155650"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISPASS.2012.6189222"},{"key":"e_1_3_2_1_20_1","first-page":"607","volume-title":"Lingjia Tang. Bubble-flux: Precise Online QoS Management for Increased Utilization in Warehouse Scale Computers. In the 40th Annual International Symposium on Computer Architecture (ISCA)","author":"Yang Hailong","year":"2013","unstructured":"Hailong Yang, Alex Breslow, Jason Mars, and Lingjia Tang. Bubble-flux: Precise Online QoS Management for Increased Utilization in Warehouse Scale Computers. In the 40th Annual International Symposium on Computer Architecture (ISCA), pages 607--618. ACM\/IEEE, 2013."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541941"},{"key":"e_1_3_2_1_22_1","first-page":"450","volume-title":"the 42nd International Symposium on Computer Architecture (ISCA)","author":"Lo David","year":"2015","unstructured":"David Lo, Liqun Cheng, Rama Govindaraju, Parthasarathy Ranganathan, and Christos Kozyrakis. Heracles: Improving Resource Efficiency at Scale. In the 42nd International Symposium on Computer Architecture (ISCA), pages 450--462. ACM\/IEEE, 2015."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.53"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2749472"},{"key":"e_1_3_2_1_25_1","first-page":"246","volume-title":"Lingjia Tang. Octopus-Man: QoS-driven Task Management for Heterogeneous Multicores in Warehouse-Scale Computers. In the 21st International Symposium on High Performance Computer Architecture (HPCA)","author":"Petrucci Vinicius","year":"2015","unstructured":"Vinicius Petrucci, Michael Laurenzano, John Doherty, Yunqi Zhang, Daniel Mosse, Jason Mars, and Lingjia Tang. Octopus-Man: QoS-driven Task Management for Heterogeneous Multicores in Warehouse-Scale Computers. In the 21st International Symposium on High Performance Computer Architecture (HPCA), pages 246--258. IEEE, 2015."},{"key":"e_1_3_2_1_26_1","unstructured":"Nvidia Multi-Process Service. https:\/\/docs.nvidia.com\/deploy\/pdf\/CUDA\\_Multi\\_Process\\_Service\\_Overview.pdf."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2008.8"},{"key":"e_1_3_2_1_28_1","first-page":"17","volume-title":"Yutaka Ishikawa. TimeGraph: GPU Scheduling for Real-time Multi-tasking Environments. In USENIX Annual Technical Conference (ATC)","author":"Kato Shinpei","year":"2011","unstructured":"Shinpei Kato, Karthik Lakshmanan, Raj Rajkumar, and Yutaka Ishikawa. TimeGraph: GPU Scheduling for Real-time Multi-tasking Environments. In USENIX Annual Technical Conference (ATC), pages 17--30. USENIX, 2011."},{"key":"e_1_3_2_1_29_1","first-page":"33","volume-title":"James H Anderson. GPUSync: A Framework for Real-time GPU Management. In the 34th Real-Time Systems Symposium (RTSS)","author":"Elliott Glenn","year":"2013","unstructured":"Glenn Elliott, Bryan C Ward, and James H Anderson. GPUSync: A Framework for Real-time GPU Management. In the 34th Real-Time Systems Symposium (RTSS), pages 33--44. IEEE, 2013."},{"key":"e_1_3_2_1_30_1","unstructured":"Profiler User's Guide. http:\/\/docs.nvidia.com\/cuda\/profiler-users-guide."},{"key":"e_1_3_2_1_31_1","volume-title":"cuDNN: Efficient Primitives for Deep Learning. arXiv preprint arXiv:1410.0759","author":"Chetlur Sharan","year":"2014","unstructured":"Sharan Chetlur, Cliff Woolley, Philippe Vandermersch, Jonathan Cohen, John Tran, Bryan Catanzaro, and Evan Shelhamer. cuDNN: Efficient Primitives for Deep Learning. arXiv preprint arXiv:1410.0759, 2014."},{"key":"e_1_3_2_1_32_1","volume-title":"cuBLAS library","author":"Nvidia CUDA","year":"2008","unstructured":"CUDA Nvidia. cuBLAS library. Nvidia Corporation, Santa Clara, California, 15, 2008."},{"key":"e_1_3_2_1_33_1","first-page":"103","volume-title":"Nvidia CUDA Software and GPU Parallel Computing Architecture. In the 6th International Symposium on Memory Management (ISMM)","volume":"7","author":"David","year":"2007","unstructured":"David Kirk et al. Nvidia CUDA Software and GPU Parallel Computing Architecture. In the 6th International Symposium on Memory Management (ISMM), volume 7, pages 103--104. ACM, 2007."},{"key":"e_1_3_2_1_34_1","volume-title":"Linear Regression Analysis","author":"Seber George AF","year":"2012","unstructured":"George AF Seber and Alan J Lee. Linear Regression Analysis, volume 936. John Wiley & Sons, 2012."},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/293347.293348"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4614-7138-7"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/1961189.1961199"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10707-009-0076-5"},{"key":"e_1_3_2_1_39_1","first-page":"1","volume-title":"GPU. In Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)","author":"Garcia Vincent","year":"2008","unstructured":"Vincent Garcia, Eric Debreuve, and Michel Barlaud. Fast K Nearest Neighbor Search using GPU. In Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pages 1--6. IEEE, 2008."},{"key":"e_1_3_2_1_40_1","first-page":"4","article-title":"Understanding Performance of PCI Express Systems. Xilinx WP350","author":"Goldhammer Alex","year":"2008","unstructured":"Alex Goldhammer and John Ayer Jr. Understanding Performance of PCI Express Systems. Xilinx WP350, Sept, 4, 2008.","journal-title":"Sept"},{"key":"e_1_3_2_1_41_1","first-page":"44","volume-title":"Kevin Skadron. Rodinia: A Benchmark Suite for Heterogeneous Computing. In IEEE International Symposium on Workload Characterization (IISWC)","author":"Che Shuai","year":"2009","unstructured":"Shuai Che, Michael Boyer, Jiayuan Meng, David Tarjan, Jeremy W Sheaffer, Sang-Ha Lee, and Kevin Skadron. Rodinia: A Benchmark Suite for Heterogeneous Computing. In IEEE International Symposium on Workload Characterization (IISWC), pages 44--54. IEEE, 2009."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2008.4536485"},{"key":"e_1_3_2_1_43_1","volume-title":"The Hungarian Method for the Assignment Problem. Naval research logistics quarterly, 2(1--2):83--97","author":"Kuhn Harold W","year":"1955","unstructured":"Harold W Kuhn. The Hungarian Method for the Assignment Problem. Naval research logistics quarterly, 2(1--2):83--97, 1955."},{"key":"e_1_3_2_1_44_1","first-page":"2","article-title":"Workload Management with Loadleveler","volume":"2","author":"Kannan Subramanian","year":"2001","unstructured":"Subramanian Kannan, Mark Roberts, Peter Mayes, Dave Brelsford, and Joseph F Skovira. Workload Management with Loadleveler. IBM Redbooks, 2:2, 2001.","journal-title":"IBM Redbooks"},{"key":"e_1_3_2_1_45_1","first-page":"1","volume-title":"Automation and Test in Europe Conference and Exhibition (DATE)","author":"Lee Haeseung","year":"2014","unstructured":"Haeseung Lee, Al Faruque, and Mohammad Abdullah. GPU-EvR: Run-time Event based Real-time Scheduling Framework on GPGPU Platform. In Design, Automation and Test in Europe Conference and Exhibition (DATE), pages 1--6. IEEE, 2014."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.21"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-45540-X_6"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/71.932708"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/321738.321743"},{"issue":"1","key":"e_1_3_2_1_50_1","first-page":"68","article-title":"Generalized Rate-Monotonic Scheduling Theory","volume":"82","author":"Sha Lui","year":"1994","unstructured":"Lui Sha, Ragunathan Rajkumar, and Shirish S Sathaye. Generalized Rate-Monotonic Scheduling Theory: A Framework for Developing Real-time Systems. Proceedings of the IEEE, 82(1):68--82, 1994.","journal-title":"A Framework for Developing Real-time Systems. Proceedings of the IEEE"},{"key":"e_1_3_2_1_51_1","volume-title":"Citeseer","author":"Audsley Neil C","year":"1990","unstructured":"Neil C Audsley, Alan Burns, MF Richardson, and AJ Wellings. Deadline Monotonic Scheduling. Citeseer, 1990."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1109\/71.798317"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11241-011-9140-y"},{"key":"e_1_3_2_1_54_1","first-page":"726","volume-title":"Nam Sung Kim. QoS-aware Dynamic Resource Allocation for Spatial-multitasking GPUs. In the 19th Asia and South Pacific Design Automation Conference (ASP-DAC)","author":"Aguilera Pedro","year":"2014","unstructured":"Pedro Aguilera, Katherine Morrow, and Nam Sung Kim. QoS-aware Dynamic Resource Allocation for Spatial-multitasking GPUs. In the 19th Asia and South Pacific Design Automation Conference (ASP-DAC), pages 726--731. IEEE, 2014."},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1145\/2694344.2694346"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/2493123.2462911"},{"key":"e_1_3_2_1_57_1","first-page":"193","volume-title":"Mateo Valero. Enabling Preemptive Multiprogramming on GPUs. In the 41st International Symposium on Computer Architecuture (ISCA)","author":"Tanasic Ivan","year":"2014","unstructured":"Ivan Tanasic, Isaac Gelado, Javier Cabezas, Alex Ramirez, Nacho Navarro, and Mateo Valero. Enabling Preemptive Multiprogramming on GPUs. In the 41st International Symposium on Computer Architecuture (ISCA), pages 193--204. ACM\/IEEE, 2014."},{"key":"e_1_3_2_1_58_1","first-page":"354","volume-title":"Thomas F Wenisch. Unlocking Bandwidth for GPUs in CC-NUMA Systems. In the 21st International Symposium on High Performance Computer Architecture (HPCA)","author":"Agarwal Neha","year":"2015","unstructured":"Neha Agarwal, David Nellans, Mike O'Connor, Stephen W Keckler, and Thomas F Wenisch. Unlocking Bandwidth for GPUs in CC-NUMA Systems. In the 21st International Symposium on High Performance Computer Architecture (HPCA), pages 354--365. IEEE, 2015."},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2011.193"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2014.6835939"},{"issue":"4","key":"e_1_3_2_1_61_1","first-page":"16","article-title":"Enabling Portable Optimizations of Data Placement on GPU","volume":"35","author":"Chen Guoyang","year":"2015","unstructured":"Guoyang Chen, Bo Wu, Dong Li, and Xipeng Shen. Enabling Portable Optimizations of Data Placement on GPU. Micro, 35(4):16--24, July 2015.","journal-title":"Micro"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2013.6522332"},{"key":"e_1_3_2_1_63_1","first-page":"647","volume-title":"Sethia and Scott Mahlke. Equalizer: Dynamic Tuning of GPU Resources for Efficient Execution. In the 47th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO)","author":"Ankit","year":"2014","unstructured":"Ankit Sethia and Scott Mahlke. Equalizer: Dynamic Tuning of GPU Resources for Efficient Execution. In the 47th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), pages 647--658. IEEE\/ACM, 2014."},{"key":"e_1_3_2_1_64_1","first-page":"70","volume-title":"Dollinger and Vincent Loechner. Adaptive Runtime Selection for GPU. In the 42nd International Conference on Parallel Processing (ICPP)","year":"2013","unstructured":"J-F Dollinger and Vincent Loechner. Adaptive Runtime Selection for GPU. In the 42nd International Conference on Parallel Processing (ICPP), pages 70--79. IEEE, 2013."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/1996130.1996160"},{"key":"e_1_3_2_1_66_1","first-page":"947","volume-title":"Mohamed Hefeeda. Dynamic Sharing of GPUs in Cloud Systems. In the 27th International Parallel and Distributed Processing Symposium Workshops (IPDPSW)","author":"Diab Khaled M","year":"2013","unstructured":"Khaled M Diab, M Mustafa Rafique, and Mohamed Hefeeda. Dynamic Sharing of GPUs in Cloud Systems. In the 27th International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pages 947--954. IEEE, 2013."},{"key":"e_1_3_2_1_67_1","first-page":"114","volume-title":"Chita R Das. Managing GPU Concurrency in Heterogeneous Architectures. In the 47th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO)","author":"Kayiran Onur","year":"2014","unstructured":"Onur Kayiran, Nachiappan Chidambaram Nachiappan, Adwait Jog, Rachata Ausavarungnirun, Mahmut T Kandemir, Gabriel H Loh, Onur Mutlu, and Chita R Das. Managing GPU Concurrency in Heterogeneous Architectures. In the 47th Annual IEEE\/ACM International Symposium on Microarchitecture (MICRO), pages 114--126. IEEE\/ACM, 2014."},{"key":"e_1_3_2_1_68_1","first-page":"151","volume-title":"Keshav Pingali. Adaptive Heterogeneous Scheduling for Integrated GPUs. In the 23rd International Conference on Parallel Architectures and Compilation Techniques (PACT)","author":"Kaleem Rashid","year":"2014","unstructured":"Rashid Kaleem, Rajkishore Barik, Tatiana Shpeisman, Brian T Lewis, Chunling Hu, and Keshav Pingali. Adaptive Heterogeneous Scheduling for Integrated GPUs. In the 23rd International Conference on Parallel Architectures and Compilation Techniques (PACT), pages 151--162. ACM, 2014."}],"event":{"name":"ASPLOS '16: Architectural Support for Programming Languages and Operating Systems","location":"Atlanta Georgia USA","acronym":"ASPLOS '16","sponsor":["SIGPLAN ACM Special Interest Group on Programming Languages","SIGOPS ACM Special Interest Group on Operating Systems","SIGARCH ACM Special Interest Group on Computer Architecture","SIGBED ACM Special Interest Group on Embedded Systems"]},"container-title":["Proceedings of the Twenty-First International Conference on Architectural Support for Programming Languages and Operating Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2872362.2872368","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2872362.2872368","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/2872362.2872368","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,18]],"date-time":"2025-11-18T09:40:26Z","timestamp":1763458826000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/2872362.2872368"}},"subtitle":["QoS Awareness and Increased Utilization for Non-Preemptive Accelerators in Warehouse Scale Computers"],"short-title":[],"issued":{"date-parts":[[2016,3,25]]},"references-count":68,"alternative-id":["10.1145\/2872362.2872368","10.1145\/2872362"],"URL":"https:\/\/doi.org\/10.1145\/2872362.2872368","relation":{"is-identical-to":[{"id-type":"doi","id":"10.1145\/2954679.2872368","asserted-by":"object"},{"id-type":"doi","id":"10.1145\/2980024.2872368","asserted-by":"object"}]},"subject":[],"published":{"date-parts":[[2016,3,25]]},"assertion":[{"value":"2016-03-25","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}