{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:15:40Z","timestamp":1767323740474,"version":"3.48.0"},"publisher-location":"Singapore","reference-count":40,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819556984","type":"print"},{"value":"9789819556991","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5699-1_28","type":"book-chapter","created":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:13:42Z","timestamp":1767323622000},"page":"406-419","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Inference-Time Scaling for\u00a0Visual AutoRegressive Modeling by\u00a0Searching Representative Samples"],"prefix":"10.1007","author":[{"given":"Weidong","family":"Tang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinyan","family":"Wan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Siyu","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiumei","family":"Wang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"28_CR1","doi-asserted-by":"crossref","unstructured":"Borzunov, A., et al.: Distributed inference and fine-tuning of large language models over the Internet. In: NeurIPS (2023)","DOI":"10.18653\/v1\/2023.acl-demo.54"},{"key":"28_CR2","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TCIAIG.2012.2186810","volume":"4","author":"C Browne","year":"2012","unstructured":"Browne, C., et al.: A survey of Monte Carlo tree search methods. IEEE Trans. Comput. Intell. AI Games 4, 1\u201343 (2012)","journal-title":"IEEE Trans. Comput. Intell. AI Games"},{"key":"28_CR3","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, L.: ImageNet: a large-scale hierarchical image database. In: CVPR, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"28_CR4","unstructured":"Dhariwal, P., Nichol, A.Q.: Diffusion models beat GANs on image synthesis. In: NeurIPS, pp. 8780\u20138794 (2021)"},{"key":"28_CR5","unstructured":"Esser, P., Rombach, R., Blattmann, A., Ommer, B.: Imagebart: bidirectional context with multinomial diffusion for autoregressive image synthesis. In: NeurIPS, pp. 3518\u20133532 (2021)"},{"key":"28_CR6","doi-asserted-by":"crossref","unstructured":"Esser, P., Rombach, R., Ommer, B.: Taming transformers for high-resolution image synthesis. In: CVPR, pp. 12873\u201312883 (2021)","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"28_CR7","doi-asserted-by":"crossref","unstructured":"Fei, H., Li, B., Liu, Q., Bing, L., Li, F., Chua, T.-S.: Reasoning implicit sentiment with chain-of-thought prompting. In: ACL, pp. 1171\u20131182 (2023)","DOI":"10.18653\/v1\/2023.acl-short.101"},{"key":"28_CR8","unstructured":"Geiping, J., et al.: Scaling Up Test-Time Compute with Latent Reasoning: A Recurrent Depth Approach. arXiv preprint arXiv:2502.05171 (2025)"},{"key":"28_CR9","unstructured":"Ghosh, D., Hajishirzi, H., Schmidt, L.: An object-focused framework for evaluating text-to-image alignment. In: NeurIPS. GenEval (2023)"},{"key":"28_CR10","doi-asserted-by":"crossref","unstructured":"Han, J., et al.: Infinity: scaling bitwise autoregressive modeling for high-resolution image synthesis. In: CVPR, pp. 15733\u201315744 (2025)","DOI":"10.1109\/CVPR52734.2025.01467"},{"key":"28_CR11","unstructured":"Heusel, M., Ramsauer, H., Unterthiner, T., Nessler, B., Hochreiter, S.: GANs trained by a two time-scale update rule converge to a local Nash equilibrium. In: NeurIPS, pp. 6626\u20136637 (2017)"},{"key":"28_CR12","unstructured":"Ho, J., Jain, A., Abbeel, P.: Denoising diffusion probabilistic models. In: NeurIPS (2020)"},{"key":"28_CR13","unstructured":"Jiao, S., et al.: Flexvar: Flexible Visual Autoregressive Modeling without Residual Prediction. arXiv preprint arXiv:2502.20313 (2025)"},{"key":"28_CR14","unstructured":"Kojima, T., Gu, S.S., Reid, M., Matsuo, Y., Iwasawa, Y.: Large language models are zero-shot reasoners. In: NeurIPS (2022)"},{"key":"28_CR15","doi-asserted-by":"crossref","unstructured":"Lee, D., Kim, C., Kim, S., Cho, M., Han, W.-S.: Autoregressive image generation using residual quantization. In: CVPR, pp. 11513\u201311522 (2022)","DOI":"10.1109\/CVPR52688.2022.01123"},{"key":"28_CR16","unstructured":"Lee, H., et al.: RLAIF vs. RLHF: scaling reinforcement learning from human feedback with AI feedback. In: ICML (2024)"},{"key":"28_CR17","unstructured":"Leviathan, Y., Kalman, M., Matias, Y.: Fast inference from transformers via speculative decoding. In: ICML, pp. 19274\u201319286 (2023)"},{"key":"28_CR18","unstructured":"Lightman, H., et al.: Let\u2019s verify step by step. In: ICLR (2024)"},{"key":"28_CR19","unstructured":"Ma, N., et al.: Inference-Time Scaling for Diffusion Models beyond Scaling Denoising Steps. arXiv preprint arXiv:2501.09732 (2025)"},{"key":"28_CR20","unstructured":"Ma, Q., et al.: Let\u2019s reward step by step: step-level reward model as the navigators for reasoning. In: NeurIPS, vol. 36, pp. 23145\u201323158 (2023)"},{"key":"28_CR21","doi-asserted-by":"crossref","unstructured":"Peebles, W., Xie, S.: Scalable diffusion models with transformers. In: ICCV, pp. 4172\u20134182 (2023)","DOI":"10.1109\/ICCV51070.2023.00387"},{"key":"28_CR22","unstructured":"Pope, R., et al.: Efficiently scaling transformer inference. In: MLSys (2023)"},{"key":"28_CR23","doi-asserted-by":"crossref","unstructured":"Pryzant, R., Iter, D., Li, J., Lee, Y.T., Zhu, C., Zeng, M.: Automatic prompt optimization with \"Gradient Descent\" and beam search. In: EMNLP, pp. 7957\u20137968 (2023)","DOI":"10.18653\/v1\/2023.emnlp-main.494"},{"key":"28_CR24","unstructured":"Qi, Z., Bai, L., Xiong, H., Xie, Z.: Not All Noises Are Created Equally: Diffusion Noise Selection and Optimization. CoRR, abs\/2407.14041 (2024)"},{"key":"28_CR25","unstructured":"Salimans, T., Goodfellow, I.J., Zaremba, W., Cheung, V., Radford, A., Chen, X.: Improved techniques for training GANs. In: NeurIPS, pp. 2226\u20132234 (2016)"},{"key":"28_CR26","doi-asserted-by":"crossref","unstructured":"Scott, D.W.: Multivariate Density Estimation: Theory, Practice, and Visualization. Wiley (1992)","DOI":"10.1002\/9780470316849"},{"key":"28_CR27","unstructured":"Snell, C., Lee, J., Xu, K., Kumar, A.: Scaling LLM Test-Time Compute Optimally Can be More Effective than Scaling Model Parameters. arXiv preprint arXiv:2408.03314 (2024)"},{"key":"28_CR28","unstructured":"Song, J., Meng, C., Ermon, S.: Denoising diffusion implicit models. In: CLR (2021)"},{"key":"28_CR29","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1137\/0201010","volume":"1","author":"RE Tarjan","year":"1972","unstructured":"Tarjan, R.E.: Depth-first search and linear graph algorithms. SIAM J. Comput. 1, 146\u2013160 (1972)","journal-title":"SIAM J. Comput."},{"key":"28_CR30","unstructured":"Thaker, D., Goyal, A., Vidal, R.: Frequency-Guided Posterior Sampling for Diffusion-Based Image Restoration. arXiv preprint arXiv:2411.15295 (2024)"},{"key":"28_CR31","doi-asserted-by":"crossref","unstructured":"Tian, K., Jiang, Y., Yuan, Z., Peng, B., Wang, L.: Visual autoregressive modeling: scalable image generation via next-scale prediction. In: NeurIPS (2024)","DOI":"10.52202\/079017-2694"},{"key":"28_CR32","unstructured":"van\u00a0den Oord, A., Vinyals, O., Kavukcuoglu, K.: Neural discrete representation learning. In: NeurIPS, pp. 6306\u20136315 (2017)"},{"key":"28_CR33","unstructured":"Wang, E.Z., et al.: Planning in natural language improves LLM search for code generation. In: ICLR (2025)"},{"key":"28_CR34","unstructured":"Wang, X., et al.: Self-consistency improves chain of thought reasoning in language models. In: ICLR (2023)"},{"key":"28_CR35","unstructured":"Wei, J., et al.: Chain-of-thought prompting elicits reasoning in large language models. In: NeurIPS (2022)"},{"key":"28_CR36","unstructured":"Wu, Y., Sun, Z., Li, S., Welleck, S., Yang, Y.: Inference scaling laws: an empirical analysis of compute-optimal inference for LLM problem-solving. In: ICLR (2025)"},{"key":"28_CR37","unstructured":"Xie, Y., et al.: Self-evaluation guided beam search for reasoning. In: NeurIPS (2023)"},{"key":"28_CR38","unstructured":"Yao, S., et al.: Tree of thoughts: deliberate problem solving with large language models. In: NeurIPS (2023)"},{"key":"28_CR39","unstructured":"Yu, Z., He, L., Wu, Z., Dai, X., Chen, J.: Towards Better Chain-of-Thought Prompting Strategies: A Survey. arXiv preprint arXiv:2310.04959 (2023)"},{"key":"28_CR40","unstructured":"Zhao, Y., Xiong, Y., Kr\u00e4henb\u00fchl, P.: Image and Video Tokenization with Binary Spherical Quantization. arXiv preprint arXiv:2406.07548 (2024)"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5699-1_28","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T03:13:48Z","timestamp":1767323628000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5699-1_28"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819556984","9789819556991"],"references-count":40,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5699-1_28","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}