{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T18:50:56Z","timestamp":1776106256780,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":90,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T00:00:00Z","timestamp":1655683200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,6,21]]},"DOI":"10.1145\/3531146.3533229","type":"proceedings-article","created":{"date-parts":[[2022,6,20]],"date-time":"2022-06-20T14:27:10Z","timestamp":1655735230000},"page":"1747-1764","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":136,"title":["Predictability and Surprise in Large Generative Models"],"prefix":"10.1145","author":[{"given":"Deep","family":"Ganguli","sequence":"first","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Danny","family":"Hernandez","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Liane","family":"Lovitt","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Amanda","family":"Askell","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Yuntao","family":"Bai","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Anna","family":"Chen","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Tom","family":"Conerly","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Nova","family":"Dassarma","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Dawn","family":"Drain","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Nelson","family":"Elhage","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Sheer","family":"El Showk","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Stanislav","family":"Fort","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Zac","family":"Hatfield-Dodds","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Tom","family":"Henighan","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Scott","family":"Johnston","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Andy","family":"Jones","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Nicholas","family":"Joseph","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Jackson","family":"Kernian","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Shauna","family":"Kravec","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Ben","family":"Mann","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Neel","family":"Nanda","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Kamal","family":"Ndousse","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Catherine","family":"Olsson","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Daniela","family":"Amodei","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Tom","family":"Brown","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Jared","family":"Kaplan","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Sam","family":"McCandlish","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Christopher","family":"Olah","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Dario","family":"Amodei","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]},{"given":"Jack","family":"Clark","sequence":"additional","affiliation":[{"name":"Anthropic, USA"}]}],"member":"320","published-online":{"date-parts":[[2022,6,20]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"RAFT: A Real-World Few-Shot Text Classification Benchmark. arXiv:2109.14076 [cs] (Nov.","author":"Alex Neel","year":"2021","unstructured":"Neel Alex, Eli Lifland, Lewis Tunstall, Abhishek Thakur, Pegah Maham, C.\u00a0Jess Riedel, Emmie Hine, Carolyn Ashurst, Paul Sedille, Alexis Carlier, Michael Noetel, and Andreas Stuhlm\u00fcller. 2021. RAFT: A Real-World Few-Shot Text Classification Benchmark. arXiv:2109.14076 [cs] (Nov. 2021). http:\/\/arxiv.org\/abs\/2109.14076 arXiv:2109.14076."},{"key":"e_1_3_2_1_2_1","unstructured":"Dario Amodei Danny Hernandez Girish Sastry Jack Clark Greg Brockman and Ilya Sutskever. 2018. AI and Compute. https:\/\/openai.com\/blog\/ai-and-compute\/"},{"key":"e_1_3_2_1_3_1","unstructured":"Julia Angwin Jeff Larson Surya Mattu and Lauren Kirchner. 2016. Machine bias: There\u2019s software used across the country to predict future criminals. and it\u2019s biased against blacks. https:\/\/www.propublica.org\/article\/machine-bias-risk-assessments-in-criminal-sentencing"},{"key":"e_1_3_2_1_4_1","volume-title":"A General Language Assistant as a Laboratory for Alignment. arXiv:2112.00861 [cs] (Dec","author":"Askell Amanda","year":"2021","unstructured":"Amanda Askell, Yuntao Bai, Anna Chen, Dawn Drain, Deep Ganguli, Tom Henighan, Andy Jones, Nicholas Joseph, Ben Mann, Nova DasSarma, Nelson Elhage, Zac Hatfield-Dodds, Danny Hernandez, Jackson Kernion, Kamal Ndousse, Catherine Olsson, Dario Amodei, Tom Brown, Jack Clark, Sam McCandlish, Chris Olah, and Jared Kaplan. 2021. A General Language Assistant as a Laboratory for Alignment. arXiv:2112.00861 [cs] (Dec. 2021). http:\/\/arxiv.org\/abs\/2112.00861 arXiv:2112.00861."},{"key":"e_1_3_2_1_5_1","volume-title":"Program Synthesis with Large Language Models. arXiv:2108.07732 [cs] (Aug","author":"Austin Jacob","year":"2021","unstructured":"Jacob Austin, Augustus Odena, Maxwell Nye, Maarten Bosma, Henryk Michalewski, David Dohan, Ellen Jiang, Carrie Cai, Michael Terry, Quoc Le, and Charles Sutton. 2021. Program Synthesis with Large Language Models. arXiv:2108.07732 [cs] (Aug. 2021). http:\/\/arxiv.org\/abs\/2108.07732 arXiv:2108.07732."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.abi7176"},{"key":"e_1_3_2_1_7_1","volume-title":"It\u2019s COMPASlicated: The Messy Relationship between RAI Datasets and Algorithmic Fairness Benchmarks. arXiv:2106.05498 [cs] (June","author":"Bao Michelle","year":"2021","unstructured":"Michelle Bao, Angela Zhou, Samantha Zottola, Brian Brubach, Sarah Desmarais, Aaron Horowitz, Kristian Lum, and Suresh Venkatasubramanian. 2021. It\u2019s COMPASlicated: The Messy Relationship between RAI Datasets and Algorithmic Fairness Benchmarks. arXiv:2106.05498 [cs] (June 2021). http:\/\/arxiv.org\/abs\/2106.05498 arXiv:2106.05498."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445922"},{"key":"e_1_3_2_1_9_1","volume-title":"NLP. arXiv:2005.14050 [cs] (May","author":"Blodgett Su\u00a0Lin","year":"2020","unstructured":"Su\u00a0Lin Blodgett, Solon Barocas, Hal Daum\u00e9\u00a0III, and Hanna Wallach. 2020. Language (Technology) is Power: A Critical Survey of \u201dBias\u201d in NLP. arXiv:2005.14050 [cs] (May 2020). http:\/\/arxiv.org\/abs\/2005.14050 arXiv:2005.14050."},{"key":"e_1_3_2_1_10_1","volume-title":"On the Opportunities and Risks of Foundation Models. arXiv:2108.07258 [cs] (Aug","author":"Bommasani Rishi","year":"2021","unstructured":"Rishi Bommasani, Drew\u00a0A. Hudson, Ehsan Adeli, Russ Altman, Simran Arora, Sydney von Arx, Michael\u00a0S. Bernstein, Jeannette Bohg, Antoine Bosselut, Emma Brunskill, Erik Brynjolfsson, Shyamal Buch, Dallas Card, Rodrigo Castellon, Niladri Chatterji, Annie Chen, Kathleen Creel, Jared\u00a0Quincy Davis, Dora Demszky, Chris Donahue, Moussa Doumbouya, Esin Durmus, Stefano Ermon, John Etchemendy, Kawin Ethayarajh, Li Fei-Fei, Chelsea Finn, Trevor Gale, Lauren Gillespie, Karan Goel, Noah Goodman, Shelby Grossman, Neel Guha, Tatsunori Hashimoto, Peter Henderson, John Hewitt, Daniel\u00a0E. Ho, Jenny Hong, Kyle Hsu, Jing Huang, Thomas Icard, Saahil Jain, Dan Jurafsky, Pratyusha Kalluri, Siddharth Karamcheti, Geoff Keeling, Fereshte Khani, Omar Khattab, Pang\u00a0Wei Koh, Mark Krass, Ranjay Krishna, Rohith Kuditipudi, Ananya Kumar, Faisal Ladhak, Mina Lee, Tony Lee, Jure Leskovec, Isabelle Levent, Xiang\u00a0Lisa Li, Xuechen Li, Tengyu Ma, Ali Malik, Christopher\u00a0D. Manning, Suvir Mirchandani, Eric Mitchell, Zanele Munyikwa, Suraj Nair, Avanika Narayan, Deepak Narayanan, Ben Newman, Allen Nie, Juan\u00a0Carlos Niebles, Hamed Nilforoshan, Julian Nyarko, Giray Ogut, Laurel Orr, Isabel Papadimitriou, Joon\u00a0Sung Park, Chris Piech, Eva Portelance, Christopher Potts, Aditi Raghunathan, Rob Reich, Hongyu Ren, Frieda Rong, Yusuf Roohani, Camilo Ruiz, Jack Ryan, Christopher R\u00e9, Dorsa Sadigh, Shiori Sagawa, Keshav Santhanam, Andy Shih, Krishnan Srinivasan, Alex Tamkin, Rohan Taori, Armin\u00a0W. Thomas, Florian Tram\u00e8r, Rose\u00a0E. Wang, William Wang, Bohan Wu, Jiajun Wu, Yuhuai Wu, Sang\u00a0Michael Xie, Michihiro Yasunaga, Jiaxuan You, Matei Zaharia, Michael Zhang, Tianyi Zhang, Xikun Zhang, Yuhui Zhang, Lucia Zheng, Kaitlyn Zhou, and Percy Liang. 2021. On the Opportunities and Risks of Foundation Models. arXiv:2108.07258 [cs] (Aug. 2021). http:\/\/arxiv.org\/abs\/2108.07258 arXiv:2108.07258."},{"key":"e_1_3_2_1_11_1","unstructured":"Sebastian Borgeaud Arthur Mensch Jordan Hoffmann Trevor Cai Eliza Rutherford Katie Millican George van\u00a0den Driessche Jean-Baptiste Lespiau Bogdan Damoc Aidan Clark Diego de\u00a0Las Casas Aurelia Guy Jacob Menick Roman Ring Tom Hennigan Saffron Huang Loren Maggiore Chris Jones Albin Cassirer Andy Brock Michela Paganini Geoffrey Irving Oriol Vinyals Simon Osindero Karen Simonyan Jack\u00a0W. Rae Erich Elsen and Laurent Sifre. 2021. Improving language models by retrieving from trillions of tokens. arXiv:2112.04426 [cs] (Dec. 2021). http:\/\/arxiv.org\/abs\/2112.04426 arXiv:2112.04426."},{"key":"e_1_3_2_1_12_1","volume-title":"Advances in Neural Information Processing Systems, H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.\u00a0F. Balcan, and H.\u00a0Lin (Eds.). Vol.\u00a033. Curran Associates","author":"Brown Tom","year":"1877","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared\u00a0D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel Ziegler, Jeffrey Wu, Clemens Winter, Chris Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language Models are Few-Shot Learners. In Advances in Neural Information Processing Systems, H.\u00a0Larochelle, M.\u00a0Ranzato, R.\u00a0Hadsell, M.\u00a0F. Balcan, and H.\u00a0Lin (Eds.). Vol.\u00a033. Curran Associates, Inc., 1877\u20131901. https:\/\/proceedings.neurips.cc\/paper\/2020\/file\/1457c0d6bfcb4967418bfb8ac142f64a-Paper.pdf"},{"key":"e_1_3_2_1_13_1","unstructured":"Miles Brundage Shahar Avin Jasmine Wang Haydn Belfield Gretchen Krueger Gillian Hadfield Heidy Khlaaf Jingying Yang Helen Toner Ruth Fong Tegan Maharaj Pang\u00a0Wei Koh Sara Hooker Jade Leung Andrew Trask Emma Bluemke Jonathan Lebensold Cullen O\u2019Keefe Mark Koren Th\u00e9o Ryffel J.\u00a0B. Rubinovitz Tamay Besiroglu Federica Carugati Jack Clark Peter Eckersley Sarah de Haas Maritza Johnson Ben Laurie Alex Ingerman Igor Krawczuk Amanda Askell Rosario Cammarota Andrew Lohn David Krueger Charlotte Stix Peter Henderson Logan Graham Carina Prunkl Bianca Martin Elizabeth Seger Noa Zilberman Se\u00e1n\u00a0O h\u00c9igeartaigh Frens Kroeger Girish Sastry Rebecca Kagan Adrian Weller Brian Tse Elizabeth Barnes Allan Dafoe Paul Scharre Ariel Herbert-Voss Martijn Rasser Shagun Sodhani Carrick Flynn Thomas\u00a0Krendl Gilbert Lisa Dyer Saif Khan Yoshua Bengio and Markus Anderljung. 2020. Toward Trustworthy AI Development: Mechanisms for Supporting Verifiable Claims. arXiv:2004.07213 [cs] (April 2020). http:\/\/arxiv.org\/abs\/2004.07213 arXiv:2004.07213."},{"key":"e_1_3_2_1_14_1","unstructured":"Ben Buchanan Andrew Lohn Micha Musser and Katerina Sedova. 2021. Truth Lies and Automation. https:\/\/cset.georgetown.edu\/publication\/truth-lies-and-automation\/"},{"key":"e_1_3_2_1_15_1","volume-title":"Extracting Training Data from Large Language Models. arXiv:2012.07805 [cs] (June","author":"Carlini Nicholas","year":"2021","unstructured":"Nicholas Carlini, Florian Tramer, Eric Wallace, Matthew Jagielski, Ariel Herbert-Voss, Katherine Lee, Adam Roberts, Tom Brown, Dawn Song, Ulfar Erlingsson, Alina Oprea, and Colin Raffel. 2021. Extracting Training Data from Large Language Models. arXiv:2012.07805 [cs] (June 2021). http:\/\/arxiv.org\/abs\/2012.07805 arXiv:2012.07805."},{"key":"e_1_3_2_1_16_1","volume-title":"Square Kilometre Array hit with further cost hike and delay. Physics World (Aug","author":"Cartlidge Edwin","year":"2019","unstructured":"Edwin Cartlidge. 2019. Square Kilometre Array hit with further cost hike and delay. Physics World (Aug. 2019). https:\/\/physicsworld.com\/a\/square-kilometre-array-hit-with-further-cost-hike-and-delay\/"},{"key":"e_1_3_2_1_17_1","unstructured":"Mark Chen Jerry Tworek Heewoo Jun Qiming Yuan Henrique Ponde de\u00a0Oliveira Pinto Jared Kaplan Harri Edwards Yuri Burda Nicholas Joseph Greg Brockman Alex Ray Raul Puri Gretchen Krueger Michael Petrov Heidy Khlaaf Girish Sastry Pamela Mishkin Brooke Chan Scott Gray Nick Ryder Mikhail Pavlov Alethea Power Lukasz Kaiser Mohammad Bavarian Clemens Winter Philippe Tillet Felipe\u00a0Petroski Such Dave Cummings Matthias Plappert Fotios Chantzis Elizabeth Barnes Ariel Herbert-Voss William\u00a0Hebgen Guss Alex Nichol Alex Paino Nikolas Tezak Jie Tang Igor Babuschkin Suchir Balaji Shantanu Jain William Saunders Christopher Hesse Andrew\u00a0N. Carr Jan Leike Josh Achiam Vedant Misra Evan Morikawa Alec Radford Matthew Knight Miles Brundage Mira Murati Katie Mayer Peter Welinder Bob McGrew Dario Amodei Sam McCandlish Ilya Sutskever and Wojciech Zaremba. 2021. Evaluating Large Language Models Trained on Code. arXiv:2107.03374 [cs] (July 2021). http:\/\/arxiv.org\/abs\/2107.03374 arXiv:2107.03374."},{"key":"e_1_3_2_1_18_1","volume-title":"What Does BERT Look At? An Analysis of BERT\u2019s Attention. arXiv:1906.04341 [cs] (June","author":"Clark Kevin","year":"2019","unstructured":"Kevin Clark, Urvashi Khandelwal, Omer Levy, and Christopher\u00a0D. Manning. 2019. What Does BERT Look At? An Analysis of BERT\u2019s Attention. arXiv:1906.04341 [cs] (June 2019). http:\/\/arxiv.org\/abs\/1906.04341 arXiv:1906.04341."},{"key":"e_1_3_2_1_19_1","volume-title":"Let\u2019s Play Again: Variability of Deep Reinforcement Learning Agents in Atari Environments. arXiv:1904.06312 [cs, stat] (April","author":"Clary Kaleigh","year":"2019","unstructured":"Kaleigh Clary, Emma Tosch, John Foley, and David Jensen. 2019. Let\u2019s Play Again: Variability of Deep Reinforcement Learning Agents in Atari Environments. arXiv:1904.06312 [cs, stat] (April 2019). http:\/\/arxiv.org\/abs\/1904.06312 arXiv:1904.06312."},{"key":"e_1_3_2_1_20_1","volume-title":"Atlas of AI","author":"Crawford Kate","year":"2095","unstructured":"Kate Crawford. 2021. Atlas of AI. Yale University Press. https:\/\/yalebooks.yale.edu\/book\/9780300209570\/atlas-ai"},{"key":"e_1_3_2_1_21_1","volume-title":"Hilary Nicole, and Morgan\u00a0Klaus Scheuerman.","author":"Denton Emily","year":"2020","unstructured":"Emily Denton, Alex Hanna, Razvan Amironesei, Andrew Smart, Hilary Nicole, and Morgan\u00a0Klaus Scheuerman. 2020. Bringing the People Back In: Contesting Benchmark Machine Learning Datasets. arXiv:2007.07399 [cs] (July 2020). http:\/\/arxiv.org\/abs\/2007.07399 arXiv:2007.07399."},{"key":"e_1_3_2_1_22_1","volume-title":"Anticipating Safety Issues in E2E Conversational AI: Framework and Tooling. arXiv:2107.03451 [cs] (July","author":"Dinan Emily","year":"2021","unstructured":"Emily Dinan, Gavin Abercrombie, A.\u00a0Stevie Bergman, Shannon Spruit, Dirk Hovy, Y.-Lan Boureau, and Verena Rieser. 2021. Anticipating Safety Issues in E2E Conversational AI: Framework and Tooling. arXiv:2107.03451 [cs] (July 2021). http:\/\/arxiv.org\/abs\/2107.03451 arXiv:2107.03451."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1126\/sciadv.aao5580"},{"key":"e_1_3_2_1_24_1","volume-title":"Scaling Laws for Acoustic Models. arXiv:2106.09488 [cs, eess] (June","author":"Droppo Jasha","year":"2021","unstructured":"Jasha Droppo and Oguz Elibol. 2021. Scaling Laws for Acoustic Models. arXiv:2106.09488 [cs, eess] (June 2021). http:\/\/arxiv.org\/abs\/2106.09488 arXiv:2106.09488."},{"key":"e_1_3_2_1_25_1","volume-title":"GLaM: Efficient Scaling of Language Models with Mixture-of-Experts. arXiv:2112.06905 [cs] (Dec","author":"Du Nan","year":"2021","unstructured":"Nan Du, Yanping Huang, Andrew\u00a0M. Dai, Simon Tong, Dmitry Lepikhin, Yuanzhong Xu, Maxim Krikun, Yanqi Zhou, Adams\u00a0Wei Yu, Orhan Firat, Barret Zoph, Liam Fedus, Maarten Bosma, Zongwei Zhou, Tao Wang, Yu\u00a0Emma Wang, Kellie Webster, Marie Pellat, Kevin Robinson, Kathy Meier-Hellstern, Toju Duke, Lucas Dixon, Kun Zhang, Quoc\u00a0V. Le, Yonghui Wu, Zhifeng Chen, and Claire Cui. 2021. GLaM: Efficient Scaling of Language Models with Mixture-of-Experts. arXiv:2112.06905 [cs] (Dec. 2021). http:\/\/arxiv.org\/abs\/2112.06905 arXiv:2112.06905."},{"key":"e_1_3_2_1_26_1","unstructured":"Nelson Elhage Neel Nanda Catherine Olsson Tom Henighan Nicholas Joseph Ben Mann Amanda Askell Yuntao Bai Anna Chen Tom Conerly Nova DasSarma Dawn Drain Deep Ganguli Zac Hatfield-Dodds Danny Hernandez Andy Jones Jackson Kernion Liane Lovitt Kamal Ndousse Dario Amodei Tom Brown Jack Clark Jared Kaplan Sam McCandlish and Chris Olah. 2021. A Mathematical Framework for Transformer Circuits."},{"key":"e_1_3_2_1_27_1","volume-title":"On the (im)possibility of fairness. arXiv:1609.07236 [cs, stat] (Sept","author":"Friedler A.","year":"2016","unstructured":"Sorelle\u00a0A. Friedler, Carlos Scheidegger, and Suresh Venkatasubramanian. 2016. On the (im)possibility of fairness. arXiv:1609.07236 [cs, stat] (Sept. 2016). http:\/\/arxiv.org\/abs\/1609.07236 arXiv:1609.07236."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","unstructured":"Leo Gao Jonathan Tow Stella Biderman Sid Black Anthony DiPofi Charles Foster Laurence Golding Jeffrey Hsu Kyle McDonell Niklas Muennighoff Jason Phang Laria Reynolds Eric Tang Anish Thite Ben Wang Kevin Wang and Andy Zou. 2021. A framework for few-shot language model evaluation. https:\/\/doi.org\/10.5281\/zenodo.5371628","DOI":"10.5281\/zenodo.5371628"},{"key":"e_1_3_2_1_29_1","volume-title":"Datasheets for Datasets. arXiv:1803.09010 [cs] (Dec","author":"Gebru Timnit","year":"2021","unstructured":"Timnit Gebru, Jamie Morgenstern, Briana Vecchione, Jennifer\u00a0Wortman Vaughan, Hanna Wallach, Hal Daum\u00e9\u00a0III, and Kate Crawford. 2021. Datasheets for Datasets. arXiv:1803.09010 [cs] (Dec. 2021). http:\/\/arxiv.org\/abs\/1803.09010 arXiv:1803.09010."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"crossref","unstructured":"Samuel Gehman Suchin Gururangan Maarten Sap Yejin Choi and Noah\u00a0A. Smith. 2020. RealToxicityPrompts: Evaluating Neural Toxic Degeneration in Language Models. ArXiv abs\/2009.11462(2020).","DOI":"10.18653\/v1\/2020.findings-emnlp.301"},{"key":"e_1_3_2_1_31_1","unstructured":"Mary Gray and Siddharth Suri. 2019. Ghost Work. Mariner Books. https:\/\/ghostwork.info\/"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3482112"},{"key":"e_1_3_2_1_33_1","unstructured":"Laura Hanu and Unitary team. 2020. Detoxify. Published: Github. https:\/\/github.com\/unitaryai\/detoxify."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/2827872"},{"key":"e_1_3_2_1_35_1","volume-title":"Measuring Massive Multitask Language Understanding. arXiv:2009.03300 [cs] (Jan","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and Jacob Steinhardt. 2021. Measuring Massive Multitask Language Understanding. arXiv:2009.03300 [cs] (Jan. 2021). http:\/\/arxiv.org\/abs\/2009.03300 arXiv:2009.03300."},{"key":"e_1_3_2_1_36_1","volume-title":"Unsolved Problems in ML Safety. arXiv:2109.13916 [cs] (Dec","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Nicholas Carlini, John Schulman, and Jacob Steinhardt. 2021. Unsolved Problems in ML Safety. arXiv:2109.13916 [cs] (Dec. 2021). http:\/\/arxiv.org\/abs\/2109.13916 arXiv:2109.13916."},{"key":"e_1_3_2_1_37_1","volume-title":"Natural Adversarial Examples. arXiv:1907.07174 [cs, stat] (March","author":"Hendrycks Dan","year":"2021","unstructured":"Dan Hendrycks, Kevin Zhao, Steven Basart, Jacob Steinhardt, and Dawn Song. 2021. Natural Adversarial Examples. arXiv:1907.07174 [cs, stat] (March 2021). http:\/\/arxiv.org\/abs\/1907.07174 arXiv:1907.07174."},{"key":"e_1_3_2_1_38_1","volume-title":"Scaling Laws for Autoregressive Generative Modeling. arXiv:2010.14701 [cs] (Nov","author":"Henighan Tom","year":"2020","unstructured":"Tom Henighan, Jared Kaplan, Mor Katz, Mark Chen, Christopher Hesse, Jacob Jackson, Heewoo Jun, Tom\u00a0B. Brown, Prafulla Dhariwal, Scott Gray, Chris Hallacy, Benjamin Mann, Alec Radford, Aditya Ramesh, Nick Ryder, Daniel\u00a0M. Ziegler, John Schulman, Dario Amodei, and Sam McCandlish. 2020. Scaling Laws for Autoregressive Generative Modeling. arXiv:2010.14701 [cs] (Nov. 2020). http:\/\/arxiv.org\/abs\/2010.14701 arXiv:2010.14701."},{"key":"e_1_3_2_1_39_1","volume-title":"Scaling Laws for Transfer. arXiv:2102.01293 [cs] (Feb","author":"Hernandez Danny","year":"2021","unstructured":"Danny Hernandez, Jared Kaplan, Tom Henighan, and Sam McCandlish. 2021. Scaling Laws for Transfer. arXiv:2102.01293 [cs] (Feb. 2021). http:\/\/arxiv.org\/abs\/2102.01293 arXiv:2102.01293."},{"key":"e_1_3_2_1_40_1","volume-title":"Empirically. arXiv:1712.00409 [cs, stat] (Dec.","author":"Hestness Joel","year":"2017","unstructured":"Joel Hestness, Sharan Narang, Newsha Ardalani, Gregory Diamos, Heewoo Jun, Hassan Kianinejad, Md\u00a0Mostofa\u00a0Ali Patwary, Yang Yang, and Yanqi Zhou. 2017. Deep Learning Scaling is Predictable, Empirically. arXiv:1712.00409 [cs, stat] (Dec. 2017). http:\/\/arxiv.org\/abs\/1712.00409 arXiv:1712.00409."},{"key":"e_1_3_2_1_41_1","volume-title":"Building a National AI Research Resource. White Paper","author":"Ho Daniel","unstructured":"Daniel Ho, Jennifer King, Russell Wald, and Christopher Wan. 2021. Building a National AI Research Resource. White Paper. Stanford University. https:\/\/hai.stanford.edu\/white-paper-building-national-ai-research-resource"},{"key":"e_1_3_2_1_42_1","unstructured":"Erik Hoel. 2021. Big Tech is replacing human artists with AI. https:\/\/erikhoel.substack.com\/p\/big-tech-is-replacing-human-artists"},{"key":"e_1_3_2_1_43_1","unstructured":"AI\u00a0Now Institute. 2021. Democratize AI? How the proposed National AI Research Resource falls short. https:\/\/medium.com\/@AINowInstitute\/democratize-ai-how-the-proposed-national-ai-research-resource-falls-short-96ae5f67ccfa"},{"key":"e_1_3_2_1_44_1","volume-title":"Scaling Laws for Neural Language Models. arXiv:2001.08361 [cs, stat] (Jan","author":"Kaplan Jared","year":"2020","unstructured":"Jared Kaplan, Sam McCandlish, Tom Henighan, Tom\u00a0B. Brown, Benjamin Chess, Rewon Child, Scott Gray, Alec Radford, Jeffrey Wu, and Dario Amodei. 2020. Scaling Laws for Neural Language Models. arXiv:2001.08361 [cs, stat] (Jan. 2020). http:\/\/arxiv.org\/abs\/2001.08361 arXiv:2001.08361."},{"key":"e_1_3_2_1_45_1","volume-title":"Alignment of Language Agents. arXiv:2103.14659 [cs] (March","author":"Kenton Zachary","year":"2021","unstructured":"Zachary Kenton, Tom Everitt, Laura Weidinger, Iason Gabriel, Vladimir Mikulik, and Geoffrey Irving. 2021. Alignment of Language Agents. arXiv:2103.14659 [cs] (March 2021). http:\/\/arxiv.org\/abs\/2103.14659 arXiv:2103.14659."},{"key":"e_1_3_2_1_46_1","unstructured":"Josh Kenway Francois Camille Sasha Costanza-Chock Deborah Raji Inioluwa and Joy Buolamwini. 2022. Bug Bounties For Algorithmic Harms?Technical Report. Algorithmic Justice League. https:\/\/www.ajl.org\/bugs"},{"key":"e_1_3_2_1_47_1","volume-title":"What Changes Can Large-scale Language Models Bring? Intensive Study on HyperCLOVA: Billions-scale Korean Generative Pretrained Transformers. arXiv:2109.04650 [cs] (Nov","author":"Kim Boseop","year":"2021","unstructured":"Boseop Kim, HyoungSeok Kim, Sang-Woo Lee, Gichang Lee, Donghyun Kwak, Dong\u00a0Hyeon Jeon, Sunghyun Park, Sungju Kim, Seonhoon Kim, Dongpil Seo, Heungsub Lee, Minyoung Jeong, Sungjae Lee, Minsub Kim, Suk\u00a0Hyun Ko, Seokhun Kim, Taeyong Park, Jinuk Kim, Soyoung Kang, Na-Hyeon Ryu, Kang\u00a0Min Yoo, Minsuk Chang, Soobin Suh, Sookyo In, Jinseong Park, Kyungduk Kim, Hiun Kim, Jisu Jeong, Yong\u00a0Goo Yeo, Donghoon Ham, Dongju Park, Min\u00a0Young Lee, Jaewook Kang, Inho Kang, Jung-Woo Ha, Woomyoung Park, and Nako Sung. 2021. What Changes Can Large-scale Language Models Bring? Intensive Study on HyperCLOVA: Billions-scale Korean Generative Pretrained Transformers. arXiv:2109.04650 [cs] (Nov. 2021). http:\/\/arxiv.org\/abs\/2109.04650 arXiv:2109.04650."},{"key":"e_1_3_2_1_48_1","volume-title":"How Much Does It Cost To Find A Higgs Boson?Forbes (June","author":"Knapp Alex","year":"2012","unstructured":"Alex Knapp. 2012. How Much Does It Cost To Find A Higgs Boson?Forbes (June 2012). https:\/\/www.forbes.com\/sites\/alexknapp\/2012\/07\/05\/how-much-does-it-cost-to-find-a-higgs-boson\/ Section: Tech."},{"key":"e_1_3_2_1_49_1","volume-title":"Advances in Neural Information Processing Systems, F.\u00a0Pereira, C.\u00a0J.\u00a0C. Burges, L.\u00a0Bottou, and K.\u00a0Q","author":"Krizhevsky Alex","year":"2012","unstructured":"Alex Krizhevsky, Ilya Sutskever, and Geoffrey\u00a0E Hinton. 2012. ImageNet Classification with Deep Convolutional Neural Networks. In Advances in Neural Information Processing Systems, F.\u00a0Pereira, C.\u00a0J.\u00a0C. Burges, L.\u00a0Bottou, and K.\u00a0Q. Weinberger (Eds.). Vol.\u00a025. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper\/2012\/file\/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf"},{"key":"e_1_3_2_1_50_1","unstructured":"Connor Leahy. 2022. Announcing GPT-NeoX-20B. https:\/\/blog.eleuther.ai\/announcing-20b\/"},{"key":"e_1_3_2_1_51_1","unstructured":"Chuan Li. 2020. OpenAI\u2019s GPT-3 Language Model: A Technical Overview. https:\/\/lambdalabs.com\/blog\/demystifying-gpt-3\/"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.51593\/2021CA009"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3287560.3287596"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1007\/s13347-020-00405-8"},{"key":"e_1_3_2_1_56_1","unstructured":"Nick Walton [@nickwalton00]. 2020. I\u2019ve noticed a number of people using AI Dungeon to test GPT-3\u2019s abilities. While it\u2019s a great way to see how GPT-3 can power an interesting application. It\u2019s a poor test of GPT-3\u2019s abilities in general. The first generation of any custom prompt is actually GPT-2.https:\/\/twitter.com\/nickwalton00\/status\/1289946861478936577"},{"key":"e_1_3_2_1_57_1","volume-title":"Carbon Emissions and Large Neural Network Training. arXiv:2104.10350 [cs] (April","author":"Patterson David","year":"2021","unstructured":"David Patterson, Joseph Gonzalez, Quoc Le, Chen Liang, Lluis-Miquel Munguia, Daniel Rothchild, David So, Maud Texier, and Jeff Dean. 2021. Carbon Emissions and Large Neural Network Training. arXiv:2104.10350 [cs] (April 2021). http:\/\/arxiv.org\/abs\/2104.10350 arXiv:2104.10350."},{"key":"e_1_3_2_1_58_1","volume-title":"Red Teaming Language Models with Language Models. arXiv:2202.03286 [cs] (Feb","author":"Perez Ethan","year":"2022","unstructured":"Ethan Perez, Saffron Huang, Francis Song, Trevor Cai, Roman Ring, John Aslanides, Amelia Glaese, Nat McAleese, and Geoffrey Irving. 2022. Red Teaming Language Models with Language Models. arXiv:2202.03286 [cs] (Feb. 2022). http:\/\/arxiv.org\/abs\/2202.03286 arXiv:2202.03286."},{"key":"e_1_3_2_1_59_1","volume-title":"Grokking: Generalization Beyond Overfitting on Small Algorithmic Datasets. arXiv:2201.02177 [cs] (Jan.","author":"Power Alethea","year":"2022","unstructured":"Alethea Power, Yuri Burda, Harri Edwards, Igor Babuschkin, and Vedant Misra. 2022. Grokking: Generalization Beyond Overfitting on Small Algorithmic Datasets. arXiv:2201.02177 [cs] (Jan. 2022). http:\/\/arxiv.org\/abs\/2201.02177 arXiv:2201.02177."},{"key":"e_1_3_2_1_60_1","volume-title":"Few-shot Instruction Prompts for Pretrained Language Models to Detect Social Biases. arXiv:2112.07868 [cs] (Dec","author":"Prabhumoye Shrimai","year":"2021","unstructured":"Shrimai Prabhumoye, Rafal Kocielnik, Mohammad Shoeybi, Anima Anandkumar, and Bryan Catanzaro. 2021. Few-shot Instruction Prompts for Pretrained Language Models to Detect Social Biases. arXiv:2112.07868 [cs] (Dec. 2021). http:\/\/arxiv.org\/abs\/2112.07868 arXiv:2112.07868."},{"key":"e_1_3_2_1_61_1","volume-title":"Scaling Laws for the Few-Shot Adaptation of Pre-trained Image Classifiers. arXiv:2110.06990 [cs] (Oct","author":"Prato Gabriele","year":"2021","unstructured":"Gabriele Prato, Simon Guiroy, Ethan Caballero, Irina Rish, and Sarath Chandar. 2021. Scaling Laws for the Few-Shot Adaptation of Pre-trained Image Classifiers. arXiv:2110.06990 [cs] (Oct. 2021). http:\/\/arxiv.org\/abs\/2110.06990 arXiv:2110.06990."},{"key":"e_1_3_2_1_62_1","volume-title":"Learning Transferable Visual Models From Natural Language Supervision. arXiv:2103.00020 [cs] (Feb","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. arXiv:2103.00020 [cs] (Feb. 2021). http:\/\/arxiv.org\/abs\/2103.00020 arXiv:2103.00020."},{"key":"e_1_3_2_1_63_1","unstructured":"Jack\u00a0W. Rae Sebastian Borgeaud Trevor Cai Katie Millican Jordan Hoffmann Francis Song John Aslanides Sarah Henderson Roman Ring Susannah Young Eliza Rutherford Tom Hennigan Jacob Menick Albin Cassirer Richard Powell George van\u00a0den Driessche Lisa\u00a0Anne Hendricks Maribeth Rauh Po-Sen Huang Amelia Glaese Johannes Welbl Sumanth Dathathri Saffron Huang Jonathan Uesato John Mellor Irina Higgins Antonia Creswell Nat McAleese Amy Wu Erich Elsen Siddhant Jayakumar Elena Buchatskaya David Budden Esme Sutherland Karen Simonyan Michela Paganini Laurent Sifre Lena Martens Xiang\u00a0Lorraine Li Adhiguna Kuncoro Aida Nematzadeh Elena Gribovskaya Domenic Donato Angeliki Lazaridou Arthur Mensch Jean-Baptiste Lespiau Maria Tsimpoukelli Nikolai Grigorev Doug Fritz Thibault Sottiaux Mantas Pajarskas Toby Pohlen Zhitao Gong Daniel Toyama Cyprien de\u00a0Masson d\u2019Autume Yujia Li Tayfun Terzi Vladimir Mikulik Igor Babuschkin Aidan Clark Diego de\u00a0Las Casas Aurelia Guy Chris Jones James Bradbury Matthew Johnson Blake Hechtman Laura Weidinger Iason Gabriel William Isaac Ed Lockhart Simon Osindero Laura Rimell Chris Dyer Oriol Vinyals Kareem Ayoub Jeff Stanway Lorrayne Bennett Demis Hassabis Koray Kavukcuoglu and Geoffrey Irving. 2021. Scaling Language Models: Methods Analysis & Insights from Training Gopher. arXiv:2112.11446 [cs] (Dec. 2021). http:\/\/arxiv.org\/abs\/2112.11446 arXiv:2112.11446."},{"key":"e_1_3_2_1_64_1","volume-title":"AI and the Everything in the Whole Wide World Benchmark. arXiv:2111.15366 [cs] (Nov","author":"Raji Inioluwa\u00a0Deborah","year":"2021","unstructured":"Inioluwa\u00a0Deborah Raji, Emily\u00a0M. Bender, Amandalynne Paullada, Emily Denton, and Alex Hanna. 2021. AI and the Everything in the Whole Wide World Benchmark. arXiv:2111.15366 [cs] (Nov. 2021). http:\/\/arxiv.org\/abs\/2111.15366 arXiv:2111.15366."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1145\/3306618.3314244"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3351095.3372873"},{"key":"e_1_3_2_1_67_1","volume-title":"A Constructive Prediction of the Generalization Error Across Scales. arXiv:1909.12673 [cs, stat] (Dec","author":"Rosenfeld S.","year":"2019","unstructured":"Jonathan\u00a0S. Rosenfeld, Amir Rosenfeld, Yonatan Belinkov, and Nir Shavit. 2019. A Constructive Prediction of the Generalization Error Across Scales. arXiv:1909.12673 [cs, stat] (Dec. 2019). http:\/\/arxiv.org\/abs\/1909.12673 arXiv:1909.12673."},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1162\/99608f92.6ed64b30"},{"key":"e_1_3_2_1_69_1","unstructured":"Victor Sanh Albert Webson Colin Raffel Stephen\u00a0H. Bach Lintang Sutawika Zaid Alyafeai Antoine Chaffin Arnaud Stiegler Teven\u00a0Le Scao Arun Raja Manan Dey M.\u00a0Saiful Bari Canwen Xu Urmish Thakker Shanya\u00a0Sharma Sharma Eliza Szczechla Taewoon Kim Gunjan Chhablani Nihal Nayak Debajyoti Datta Jonathan Chang Mike Tian-Jian Jiang Han Wang Matteo Manica Sheng Shen Zheng\u00a0Xin Yong Harshit Pandey Rachel Bawden Thomas Wang Trishala Neeraj Jos Rozen Abheesht Sharma Andrea Santilli Thibault Fevry Jason\u00a0Alan Fries Ryan Teehan Stella Biderman Leo Gao Tali Bers Thomas Wolf and Alexander\u00a0M. Rush. 2021. Multitask Prompted Training Enables Zero-Shot Task Generalization. arXiv:2110.08207 [cs] (Dec. 2021). http:\/\/arxiv.org\/abs\/2110.08207 arXiv:2110.08207."},{"key":"e_1_3_2_1_70_1","doi-asserted-by":"publisher","DOI":"10.1145\/3381831"},{"key":"e_1_3_2_1_71_1","unstructured":"Jaime Sevilla Pablo Villalobos Juan\u00a0Felipe Cer\u00f3n Matthew Burtell Lennart Heim Amogh\u00a0B Nanjajjar Anson Ho Tamay Besiroglu Marius Hobbhahn and Jean-Stanislas Denain. 2021. Parameter Compute and Data Trends in Machine Learning. https:\/\/docs.google.com\/spreadsheets\/d\/1AAIebjNsnJj_uKALHbXNfn3_YsT6sHXtCU0q7OIPuc4\/"},{"key":"e_1_3_2_1_72_1","volume-title":"What Really Happened When Google Ousted Timnit Gebru. Wired (June","author":"Simonite Tom","year":"2021","unstructured":"Tom Simonite. 2021. What Really Happened When Google Ousted Timnit Gebru. Wired (June 2021). https:\/\/www.wired.com\/story\/google-timnit-gebru-ai-what-really-happened\/ Section: tags."},{"key":"e_1_3_2_1_73_1","volume-title":"A Large-Scale Generative Language Model. arXiv:2201.11990 [cs] (Feb.","author":"Smith Shaden","year":"2022","unstructured":"Shaden Smith, Mostofa Patwary, Brandon Norick, Patrick LeGresley, Samyam Rajbhandari, Jared Casper, Zhun Liu, Shrimai Prabhumoye, George Zerveas, Vijay Korthikanti, Elton Zhang, Rewon Child, Reza\u00a0Yazdani Aminabadi, Julie Bernauer, Xia Song, Mohammad Shoeybi, Yuxiong He, Michael Houston, Saurabh Tiwary, and Bryan Catanzaro. 2022. Using DeepSpeed and Megatron to Train Megatron-Turing NLG 530B, A Large-Scale Generative Language Model. arXiv:2201.11990 [cs] (Feb. 2022). http:\/\/arxiv.org\/abs\/2201.11990 arXiv:2201.11990."},{"key":"e_1_3_2_1_74_1","volume-title":"Process for Adapting Language Models to Society (PALMS) with Values-Targeted Datasets. arXiv:2106.10328 [cs] (Nov","author":"Solaiman Irene","year":"2021","unstructured":"Irene Solaiman and Christy Dennison. 2021. Process for Adapting Language Models to Society (PALMS) with Values-Targeted Datasets. arXiv:2106.10328 [cs] (Nov. 2021). http:\/\/arxiv.org\/abs\/2106.10328 arXiv:2106.10328."},{"key":"e_1_3_2_1_75_1","volume-title":"Energy and Policy Considerations for Deep Learning in NLP. arXiv:1906.02243 [cs] (June","author":"Strubell Emma","year":"2019","unstructured":"Emma Strubell, Ananya Ganesh, and Andrew McCallum. 2019. Energy and Policy Considerations for Deep Learning in NLP. arXiv:1906.02243 [cs] (June 2019). http:\/\/arxiv.org\/abs\/1906.02243 arXiv:1906.02243."},{"key":"e_1_3_2_1_76_1","volume-title":"Understanding the Capabilities, Limitations, and Societal Impact of Large Language Models. arXiv:2102.02503 [cs] (Feb","author":"Tamkin Alex","year":"2021","unstructured":"Alex Tamkin, Miles Brundage, Jack Clark, and Deep Ganguli. 2021. Understanding the Capabilities, Limitations, and Societal Impact of Large Language Models. arXiv:2102.02503 [cs] (Feb. 2021). http:\/\/arxiv.org\/abs\/2102.02503 arXiv:2102.02503."},{"key":"e_1_3_2_1_77_1","unstructured":"Latitude Team. 2020. AI Dungeon: Dragon Model Upgrade. https:\/\/aidungeon.medium.com\/ai-dungeon-dragon-model-upgrade-7e8ea579abfe"},{"key":"e_1_3_2_1_78_1","unstructured":"Will Thomas. 2020. Flagship Neutrino Project Working to Keep Costs Within Cap. https:\/\/www.aip.org\/fyi\/2020\/flagship-neutrino-project-working-keep-costs-within-cap Publisher: American Institute of Physics."},{"key":"e_1_3_2_1_79_1","volume-title":"Chi, and Quoc Le","author":"Thoppilan Romal","year":"2022","unstructured":"Romal Thoppilan, Daniel De\u00a0Freitas, Jamie Hall, Noam Shazeer, Apoorv Kulshreshtha, Heng-Tze Cheng, Alicia Jin, Taylor Bos, Leslie Baker, Yu Du, YaGuang Li, Hongrae Lee, Huaixiu\u00a0Steven Zheng, Amin Ghafouri, Marcelo Menegali, Yanping Huang, Maxim Krikun, Dmitry Lepikhin, James Qin, Dehao Chen, Yuanzhong Xu, Zhifeng Chen, Adam Roberts, Maarten Bosma, Yanqi Zhou, Chung-Ching Chang, Igor Krivokon, Will Rusch, Marc Pickett, Kathleen Meier-Hellstern, Meredith\u00a0Ringel Morris, Tulsee Doshi, Renelito\u00a0Delos Santos, Toju Duke, Johnny Soraker, Ben Zevenbergen, Vinodkumar Prabhakaran, Mark Diaz, Ben Hutchinson, Kristen Olson, Alejandra Molina, Erin Hoffman-John, Josh Lee, Lora Aroyo, Ravi Rajakumar, Alena Butryna, Matthew Lamm, Viktoriya Kuzmina, Joe Fenton, Aaron Cohen, Rachel Bernstein, Ray Kurzweil, Blaise Aguera-Arcas, Claire Cui, Marian Croak, Ed Chi, and Quoc Le. 2022. LaMDA: Language Models for Dialog Applications. arXiv:2201.08239 [cs] (Jan. 2022). http:\/\/arxiv.org\/abs\/2201.08239 arXiv:2201.08239."},{"key":"e_1_3_2_1_80_1","unstructured":"Ted Underwood. 2021. Science fiction hasn\u2019t prepared us to imagine machine learning.https:\/\/tedunderwood.com\/2021\/02\/02\/why-sf-hasnt-prepared-us-to-imagine-machine-learning\/"},{"key":"e_1_3_2_1_81_1","unstructured":"Ben Wang and Aran Komatsuzaki. 2021. GPT-J-6B: A 6 Billion Parameter Autoregressive Language Model. https:\/\/github.com\/kingoflolz\/mesh-transformer-jax"},{"key":"e_1_3_2_1_82_1","volume-title":"Exploring Larger-scale Knowledge Enhanced Pre-training for Language Understanding and Generation. arXiv:2112.12731 [cs] (Dec","author":"Wang Shuohuan","year":"2021","unstructured":"Shuohuan Wang, Yu Sun, Yang Xiang, Zhihua Wu, Siyu Ding, Weibao Gong, Shikun Feng, Junyuan Shang, Yanbin Zhao, Chao Pang, Jiaxiang Liu, Xuyi Chen, Yuxiang Lu, Weixin Liu, Xi Wang, Yangfan Bai, Qiuliang Chen, Li Zhao, Shiyong Li, Peng Sun, Dianhai Yu, Yanjun Ma, Hao Tian, Hua Wu, Tian Wu, Wei Zeng, Ge Li, Wen Gao, and Haifeng Wang. 2021. ERNIE 3.0 Titan: Exploring Larger-scale Knowledge Enhanced Pre-training for Language Understanding and Generation. arXiv:2112.12731 [cs] (Dec. 2021). http:\/\/arxiv.org\/abs\/2112.12731 arXiv:2112.12731."},{"key":"e_1_3_2_1_83_1","volume-title":"Finetuned Language Models Are Zero-Shot Learners. arXiv:2109.01652 [cs] (Dec","author":"Wei Jason","year":"2021","unstructured":"Jason Wei, Maarten Bosma, Vincent\u00a0Y. Zhao, Kelvin Guu, Adams\u00a0Wei Yu, Brian Lester, Nan Du, Andrew\u00a0M. Dai, and Quoc\u00a0V. Le. 2021. Finetuned Language Models Are Zero-Shot Learners. arXiv:2109.01652 [cs] (Dec. 2021). http:\/\/arxiv.org\/abs\/2109.01652 arXiv:2109.01652."},{"key":"e_1_3_2_1_84_1","volume-title":"Ethical and social risks of harm from Language Models. arXiv:2112.04359 [cs] (Dec","author":"Weidinger Laura","year":"2021","unstructured":"Laura Weidinger, John Mellor, Maribeth Rauh, Conor Griffin, Jonathan Uesato, Po-Sen Huang, Myra Cheng, Mia Glaese, Borja Balle, Atoosa Kasirzadeh, Zac Kenton, Sasha Brown, Will Hawkins, Tom Stepleton, Courtney Biles, Abeba Birhane, Julia Haas, Laura Rimell, Lisa\u00a0Anne Hendricks, William Isaac, Sean Legassick, Geoffrey Irving, and Iason Gabriel. 2021. Ethical and social risks of harm from Language Models. arXiv:2112.04359 [cs] (Dec. 2021). http:\/\/arxiv.org\/abs\/2112.04359 arXiv:2112.04359."},{"key":"e_1_3_2_1_85_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_86_1","volume-title":"Why and How Governments Should Monitor AI Development. arXiv:2108.12427 [cs] (Aug","author":"Whittlestone Jess","year":"2021","unstructured":"Jess Whittlestone and Jack Clark. 2021. Why and How Governments Should Monitor AI Development. arXiv:2108.12427 [cs] (Aug. 2021). http:\/\/arxiv.org\/abs\/2108.12427 arXiv:2108.12427."},{"key":"e_1_3_2_1_87_1","doi-asserted-by":"publisher","DOI":"10.29297\/orbit.v1i2.49"},{"key":"e_1_3_2_1_88_1","volume-title":"Large-Scale Pre-trained Language Model in Zero-Shot and Few-Shot Learning. arXiv:2110.04725 [cs] (Oct","author":"Wu Shaohua","year":"2021","unstructured":"Shaohua Wu, Xudong Zhao, Tong Yu, Rongguo Zhang, Chong Shen, Hongli Liu, Feng Li, Hong Zhu, Jiangang Luo, Liang Xu, and Xuanwei Zhang. 2021. Yuan 1.0: Large-Scale Pre-trained Language Model in Zero-Shot and Few-Shot Learning. arXiv:2110.04725 [cs] (Oct. 2021). http:\/\/arxiv.org\/abs\/2110.04725 arXiv:2110.04725."},{"key":"e_1_3_2_1_89_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.235"},{"key":"e_1_3_2_1_90_1","volume-title":"Large-scale Autoregressive Pretrained Chinese Language Models with Auto-parallel Computation. arXiv:2104.12369 [cs] (April","author":"Zeng Wei","year":"2021","unstructured":"Wei Zeng, Xiaozhe Ren, Teng Su, Hui Wang, Yi Liao, Zhiwei Wang, Xin Jiang, ZhenZhang Yang, Kaisheng Wang, Xiaoda Zhang, Chen Li, Ziyan Gong, Yifan Yao, Xinjing Huang, Jun Wang, Jianfeng Yu, Qi Guo, Yue Yu, Yan Zhang, Jin Wang, Hengtao Tao, Dasen Yan, Zexuan Yi, Fang Peng, Fangqing Jiang, Han Zhang, Lingfeng Deng, Yehong Zhang, Zhe Lin, Chao Zhang, Shaojie Zhang, Mingyue Guo, Shanzhi Gu, Gaojun Fan, Yaowei Wang, Xuefeng Jin, Qun Liu, and Yonghong Tian. 2021. PanGu-$\\alpha$: Large-scale Autoregressive Pretrained Chinese Language Models with Auto-parallel Computation. arXiv:2104.12369 [cs] (April 2021). http:\/\/arxiv.org\/abs\/2104.12369 arXiv:2104.12369."},{"key":"e_1_3_2_1_91_1","volume-title":"Randomness In Neural Network Training: Characterizing The Impact of Tooling. arXiv:2106.11872 [cs] (June","author":"Zhuang Donglin","year":"2021","unstructured":"Donglin Zhuang, Xingyao Zhang, Shuaiwen\u00a0Leon Song, and Sara Hooker. 2021. Randomness In Neural Network Training: Characterizing The Impact of Tooling. arXiv:2106.11872 [cs] (June 2021). http:\/\/arxiv.org\/abs\/2106.11872 arXiv:2106.11872."}],"event":{"name":"FAccT '22: 2022 ACM Conference on Fairness, Accountability, and Transparency","location":"Seoul Republic of Korea","acronym":"FAccT '22","sponsor":["ACM Association for Computing Machinery"]},"container-title":["2022 ACM Conference on Fairness Accountability and Transparency"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3531146.3533229","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3531146.3533229","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T19:31:30Z","timestamp":1750188690000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3531146.3533229"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,20]]},"references-count":90,"alternative-id":["10.1145\/3531146.3533229","10.1145\/3531146"],"URL":"https:\/\/doi.org\/10.1145\/3531146.3533229","relation":{},"subject":[],"published":{"date-parts":[[2022,6,20]]},"assertion":[{"value":"2022-06-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}