{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T21:51:09Z","timestamp":1780609869374,"version":"3.54.1"},"reference-count":66,"publisher":"MIT Press - Journals","license":[{"start":{"date-parts":[[2022,2,11]],"date-time":"2022-02-11T00:00:00Z","timestamp":1644537600000},"content-version":"vor","delay-in-days":41,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["direct.mit.edu"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,2,9]]},"abstract":"<jats:title>Abstract<\/jats:title>\n               <jats:p>We present Samanantar, the largest publicly available parallel corpora collection for Indic languages. The collection contains a total of 49.7 million sentence pairs between English and 11 Indic languages (from two language families). Specifically, we compile 12.4 million sentence pairs from existing, publicly available parallel corpora, and additionally mine 37.4 million sentence pairs from the Web, resulting in a 4\u00d7 increase. We mine the parallel sentences from the Web by combining many corpora, tools, and methods: (a) Web-crawled monolingual corpora, (b) document OCR for extracting sentences from scanned documents, (c) multilingual representation models for aligning sentences, and (d) approximate nearest neighbor search for searching in a large collection of sentences. Human evaluation of samples from the newly mined corpora validate the high quality of the parallel sentences across 11 languages. Further, we extract 83.4 million sentence pairs between all 55 Indic language pairs from the English-centric parallel corpus using English as the pivot language. We trained multilingual NMT models spanning all these languages on Samanantar which outperform existing models and baselines on publicly available benchmarks, such as FLORES, establishing the utility of Samanantar. Our data and models are available publicly at Samanantar and we hope they will help advance research in NMT and multilingual NLP for Indic languages.<\/jats:p>","DOI":"10.1162\/tacl_a_00452","type":"journal-article","created":{"date-parts":[[2022,2,11]],"date-time":"2022-02-11T16:37:06Z","timestamp":1644597426000},"page":"145-162","update-policy":"https:\/\/doi.org\/10.1162\/mitpressjournals.corrections.policy","source":"Crossref","is-referenced-by-count":95,"title":["<i>Samanantar<\/i>: The Largest Publicly Available Parallel Corpora Collection for 11 Indic Languages"],"prefix":"10.1162","volume":"10","author":[{"given":"Gowtham","family":"Ramesh","sequence":"first","affiliation":[{"name":"RBCDSAI, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sumanth","family":"Doddapaneni","sequence":"additional","affiliation":[{"name":"RBCDSAI, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Aravinth","family":"Bheemaraj","sequence":"additional","affiliation":[{"name":"Tarento Technologies, India"},{"name":"EkStep Foundation, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mayank","family":"Jobanputra","sequence":"additional","affiliation":[{"name":"IIT Madras, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Raghavan","family":"AK","sequence":"additional","affiliation":[{"name":"AI4Bharat, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ajitesh","family":"Sharma","sequence":"additional","affiliation":[{"name":"Tarento Technologies, India"},{"name":"EkStep Foundation, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sujit","family":"Sahoo","sequence":"additional","affiliation":[{"name":"Tarento Technologies, India"},{"name":"EkStep Foundation, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Harshita","family":"Diddee","sequence":"additional","affiliation":[{"name":"AI4Bharat, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mahalakshmi","family":"J","sequence":"additional","affiliation":[{"name":"AI4Bharat, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Divyanshu","family":"Kakwani","sequence":"additional","affiliation":[{"name":"IIT Madras, India"},{"name":"AI4Bharat, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Navneet","family":"Kumar","sequence":"additional","affiliation":[{"name":"Tarento Technologies, India"},{"name":"EkStep Foundation, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Aswin","family":"Pradeep","sequence":"additional","affiliation":[{"name":"Tarento Technologies, India"},{"name":"EkStep Foundation, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Srihari","family":"Nagaraj","sequence":"additional","affiliation":[{"name":"Tarento Technologies, India"},{"name":"EkStep Foundation, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kumar","family":"Deepak","sequence":"additional","affiliation":[{"name":"Tarento Technologies, India"},{"name":"EkStep Foundation, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vivek","family":"Raghavan","sequence":"additional","affiliation":[{"name":"EkStep Foundation, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Anoop","family":"Kunchukuttan","sequence":"additional","affiliation":[{"name":"AI4Bharat, India"},{"name":"Microsoft, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Pratyush","family":"Kumar","sequence":"additional","affiliation":[{"name":"RBCDSAI, India"},{"name":"IIT Madras, India"},{"name":"AI4Bharat, India"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Mitesh Shantadevi","family":"Khapra","sequence":"additional","affiliation":[{"name":"RBCDSAI, India"},{"name":"IIT Madras, India"},{"name":"AI4Bharat, India"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"281","published-online":{"date-parts":[[2022,2,9]]},"reference":[{"key":"2022033118511603900_bib1","article-title":"Languages of India and India and as a Linguistic Area","author":"Abbi","year":"2012"},{"key":"2022033118511603900_bib2","doi-asserted-by":"crossref","first-page":"3204","DOI":"10.18653\/v1\/P19-1310","article-title":"JW300: A wide-coverage parallel corpus for low-resource languages","volume-title":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","author":"Agi\u0107","year":"2019"},{"key":"2022033118511603900_bib3","first-page":"497","article-title":"SemEval-2016 task 1: Semantic textual similarity, monolingual and cross-lingual evaluation","volume-title":"Proceedings of the 10th International Workshop on Semantic Evaluation (SemEval-2016)","author":"Agirre","year":"2016"},{"key":"2022033118511603900_bib4","first-page":"3874","article-title":"Massively multilingual neural machine translation","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)","author":"Aharoni","year":"2019"},{"key":"2022033118511603900_bib5","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2020.nlpcovid19-2.5","article-title":"Tico-19: The translation initiative for COVID-19","author":"Anastasopoulos","year":"2020"},{"key":"2022033118511603900_bib6","article-title":"Massively multilingual neural machine translation in the wild: Findings and challenges","author":"Arivazhagan","year":"2019"},{"key":"2022033118511603900_bib7","doi-asserted-by":"crossref","first-page":"597","DOI":"10.1162\/tacl_a_00288","article-title":"Massively multilingual sentence embeddings for zero-shot cross-lingual transfer and beyond","volume":"7","author":"Artetxe","year":"2019","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"2022033118511603900_bib8","article-title":"Neural machine translation by jointly learning to align and translate","volume-title":"3rd International Conference on Learning Representations, ICLR 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings","author":"Bahdanau","year":"2015"},{"key":"2022033118511603900_bib9","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18653\/v1\/W19-5301","article-title":"Findings of the 2020 conference on machine translation (WMT20)","volume-title":"Proceedings of the Fifth Conference on Machine Translation","author":"Barrault","year":"2020"},{"key":"2022033118511603900_bib10","doi-asserted-by":"publisher","first-page":"1","DOI":"10.18653\/v1\/W19-5301","article-title":"Findings of the 2019 conference on machine translation (WMT19)","volume-title":"Proceedings of the Fourth Conference on Machine Translation (Volume 2: Shared Task Papers, Day 1)","author":"Barrault","year":"2019"},{"key":"2022033118511603900_bib11","doi-asserted-by":"publisher","DOI":"10.1145\/1553374.1553380","article-title":"Curriculum learning","volume-title":"ICML \u201909","author":"Bengio","year":"2009"},{"key":"2022033118511603900_bib12","doi-asserted-by":"publisher","first-page":"12","DOI":"10.3115\/v1\/W14-3302","article-title":"Findings of the 2014 workshop on statistical machine translation","volume-title":"Proceedings of the Ninth Workshop on Statistical Machine Translation","author":"Bojar","year":"2014"},{"key":"2022033118511603900_bib13","article-title":"Quality at a glance: An audit of Web-crawled multilingual datasets","author":"Caswell","year":"2021","journal-title":"CoRR"},{"issue":"2","key":"2022033118511603900_bib14","doi-asserted-by":"publisher","first-page":"375","DOI":"10.1007\/s10579-014-9287-y","article-title":"A massively parallel corpus: The bible in 100 languages","volume":"49","author":"Christodouloupoulos","year":"2015","journal-title":"Language Resources and Evaluation"},{"key":"2022033118511603900_bib15","first-page":"282","article-title":"An empirical study of language relatedness for transfer learning in neural machine translation","volume-title":"Proceedings of the 31st Pacific Asia Conference on Language, Information and Computation","author":"Dabre","year":"2017"},{"key":"2022033118511603900_bib16","doi-asserted-by":"publisher","first-page":"5960","DOI":"10.18653\/v1\/2020.emnlp-main.480","article-title":"CCAligned: A massive collection of cross-lingual web-document pairs","volume-title":"Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)","author":"El-Kishky","year":"2020"},{"key":"2022033118511603900_bib17","doi-asserted-by":"publisher","DOI":"10.2307\/410649","article-title":"India as a lingustic area","author":"Emeneau","year":"1956","journal-title":"Language"},{"key":"2022033118511603900_bib18","article-title":"Language-agnostic bert sentence embedding","author":"Feng","year":"2020"},{"key":"2022033118511603900_bib19","first-page":"866","article-title":"Multi-way, multilingual neural machine translation with a shared attention mechanism","volume-title":"Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","author":"Firat","year":"2016"},{"key":"2022033118511603900_bib20","first-page":"550","article-title":"Complete multilingual neural machine translation","volume-title":"Proceedings of the Fifth Conference on Machine Translation","author":"Freitag","year":"2020"},{"key":"2022033118511603900_bib21","article-title":"The FLORES-101 evaluation benchmark for low-resource and multilingual machine translation","author":"Goyal","year":"2021","journal-title":"CoRR"},{"key":"2022033118511603900_bib22","first-page":"202","article-title":"Contact relatedness can help improve multilingual NMT: Microsoft STCI-MT @ WMT20","volume-title":"Proceedings of the Fifth Conference on Machine Translation","author":"Goyal","year":"2020"},{"key":"2022033118511603900_bib23","article-title":"Accelerating large-scale inference with anisotropic vector quantization","volume-title":"International Conference on Machine Learning","author":"Guo","year":"2020"},{"key":"2022033118511603900_bib24","doi-asserted-by":"publisher","first-page":"6098","DOI":"10.18653\/v1\/D19-1632","article-title":"The FLORES evaluation datasets for low-resource machine translation: Nepali\u2013English and Sinhala\u2013English","volume-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","author":"Guzm\u00e1n","year":"2019"},{"key":"2022033118511603900_bib25","article-title":"Pmindia \u2013 a collection of parallel corpora of languages of India","author":"Haddow","year":"2020"},{"key":"2022033118511603900_bib26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.207","article-title":"Not low-resource anymore: Aligner ensembling, batch filtering, and new datasets for Bengali-English machine translation","author":"Hasan","year":"2020"},{"key":"2022033118511603900_bib27","article-title":"Billion-scale similarity search with GPUS","author":"Johnson","year":"2017","journal-title":"arXiv preprint arXiv:1702.08734"},{"key":"2022033118511603900_bib28","doi-asserted-by":"publisher","first-page":"339","DOI":"10.1162\/tacl_a_00065","article-title":"Google\u2019s multilingual neural machine translation system: Enabling zero-shot translation","volume":"5","author":"Johnson","year":"2017","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"2022033118511603900_bib29","doi-asserted-by":"publisher","first-page":"888","DOI":"10.18653\/v1\/W18-6478","article-title":"Dual conditional cross-entropy filtering of noisy parallel corpora","volume-title":"Proceedings of the Third Conference on Machine Translation: Shared Task Papers","author":"Junczys-Dowmunt","year":"2018"},{"issue":"1","key":"2022033118511603900_bib30","doi-asserted-by":"publisher","first-page":"117","DOI":"10.1109\/TPAMI.2010.57","article-title":"Product quantization for nearest neighbor search","volume":"33","author":"J\u00e9gou","year":"2011","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2022033118511603900_bib31","doi-asserted-by":"publisher","first-page":"4948","DOI":"10.18653\/v1\/2020.findings-emnlp.445","article-title":"IndicNLPSuite: Monolingual corpora, evaluation benchmarks and pre-trained multilingual language models for Indian languages","volume-title":"Findings of the Association for Computational Linguistics: EMNLP 2020","author":"Kakwani","year":"2020"},{"key":"2022033118511603900_bib32","doi-asserted-by":"publisher","first-page":"244","DOI":"10.18653\/v1\/W18-6325","article-title":"Trivial transfer learning for low-resource neural machine translation","volume-title":"Proceedings of the Third Conference on Machine Translation: Research Papers","author":"Kocmi","year":"2018"},{"key":"2022033118511603900_bib33","doi-asserted-by":"publisher","first-page":"28","DOI":"10.18653\/v1\/W17-3204","article-title":"Six challenges for neural machine translation","volume-title":"Proceedings of the First Workshop on Neural Machine Translation","author":"Koehn","year":"2017"},{"key":"2022033118511603900_bib34","article-title":"The IndicNLP Library","author":"Kunchukuttan","year":"2020"},{"key":"2022033118511603900_bib35","article-title":"The IIT Bombay English-Hindi parallel corpus","author":"Kunchukuttan","year":"2018"},{"key":"2022033118511603900_bib36","first-page":"pages 923\u2013pages 929","article-title":"OpenSubtitles2016: Extracting large parallel corpora from movie and TV subtitles","volume-title":"Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC\u201916)","author":"Lison","year":"2016"},{"key":"2022033118511603900_bib37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.wat-1.1","article-title":"Overview of the 8th workshop on Asian translation","volume-title":"Proceedings of the 8th Workshop on Asian Translation","author":"Nakazawa","year":"2021"},{"key":"2022033118511603900_bib38","first-page":"1","article-title":"Overview of the 7th workshop on Asian translation","volume-title":"Proceedings of the 7th Workshop on Asian Translation","author":"Nakazawa","year":"2020"},{"key":"2022033118511603900_bib39","article-title":"Transfer learning across low-resource, related languages for neural machine translation","volume-title":"International Joint Conference on Natural Language Processing","author":"Nguyen","year":"2017"},{"key":"2022033118511603900_bib40","first-page":"9","article-title":"Asynchronous pipelines for processing huge corpora on medium to low resource infrastructures","volume-title":"Proceedings of the Workshop on Challenges in the Management of Large Corpora","author":"Suarez","year":"2019"},{"key":"2022033118511603900_bib41","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/N19-4009","article-title":"fairseq: A fast, extensible toolkit for sequence modeling","volume-title":"Proceedings of NAACL-HLT 2019: Demonstrations","author":"Ott","year":"2019"},{"key":"2022033118511603900_bib42","first-page":"14","article-title":"OdiEnCorp 2.0: Odia-English parallel corpus for machine translation","volume-title":"Proceedings of the WILDRE5\u2013 5th Workshop on Indian Language Data: Resources and Evaluation","author":"Parida","year":"2020"},{"key":"2022033118511603900_bib43","doi-asserted-by":"publisher","DOI":"10.1145\/3430984.3431026","article-title":"Revisiting low resource status of Indian languages in machine translation","author":"Philip","year":"2020","journal-title":"8th ACM IKDD CODS and 26th COMAD"},{"key":"2022033118511603900_bib44","first-page":"401","article-title":"Constructing parallel corpora for six indian languages via crowdsourcing","volume-title":"Proceedings of the Seventh Workshop on Statistical Machine Translation","author":"Post","year":"2012"},{"key":"2022033118511603900_bib45","first-page":"pages 113\u2013pages 122","article-title":"Morphological processing for English-Tamil statistical machine translation","volume-title":"Proceedings of the Workshop on Machine Translation and Parsing in Indian Languages (MTPIL-2012)","author":"Ramasamy","year":"2012"},{"key":"2022033118511603900_bib46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.365","article-title":"Making monolingual sentence embeddings multilingual using knowledge distillation","author":"Reimers","year":"2020","journal-title":"arXiv preprint arXiv:2004.09813"},{"key":"2022033118511603900_bib47","first-page":"528","article-title":"Subword segmentation and a single bridge language affect zero-shot neural machine translation","volume-title":"Proceedings of the Fifth Conference on Machine Translation","author":"Rios","year":"2020"},{"key":"2022033118511603900_bib48","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/ICSDA.2016.7918974","article-title":"Introduction of the Asian language treebank","volume-title":"2016 Conference of The Oriental Chapter of International Committee for Coordination and Standardization of Speech Databases and Assessment Techniques (O-COCOSDA)","author":"Riza","year":"2016"},{"key":"2022033118511603900_bib49","article-title":"Wikimatrix: Mining 135m parallel sentences in 1620 language pairs from Wikipedia","author":"Schwenk","year":"2019"},{"key":"2022033118511603900_bib50","article-title":"Ccmatrix: Mining billions of high-quality parallel sentences on the WEB","author":"Schwenk","year":"2019","journal-title":"CoRR"},{"key":"2022033118511603900_bib51","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.507","article-title":"Ccmatrix: Mining billions of high-quality parallel sentences on the Web","author":"Schwenk","year":"2020"},{"key":"2022033118511603900_bib52","doi-asserted-by":"publisher","first-page":"86","DOI":"10.18653\/v1\/P16-1009","article-title":"Improving neural machine translation models with monolingual data","volume-title":"Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","author":"Sennrich","year":"2016"},{"key":"2022033118511603900_bib53","doi-asserted-by":"publisher","first-page":"1715","DOI":"10.18653\/v1\/P16-1162","article-title":"Neural machine translation of rare words with subword units","volume-title":"Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","author":"Sennrich","year":"2016"},{"key":"2022033118511603900_bib54","first-page":"pages 175\u2013pages 182","article-title":"Iterative, MT-based sentence alignment of parallel texts","volume-title":"Proceedings of the 18th Nordic Conference of Computational Linguistics (NODALIDA 2011)","author":"Sennrich","year":"2011"},{"key":"2022033118511603900_bib55","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/ICACCP.2019.8882969","article-title":"Neural machine translation system of indic languages\u2014an attention based approach","volume-title":"2019 Second International Conference on Advanced Computational and Communication Paradigms (ICACCP)","author":"Shah","year":"2019"},{"key":"2022033118511603900_bib56","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9781139003575","volume-title":"South Asian Languages: A Syntactic Typology","author":"Subb\u0101r\u0101o","year":"2012"},{"key":"2022033118511603900_bib57","first-page":"13771","article-title":"Diskann: Fast accurate billion-point nearest neighbor search on a single node","volume-title":"Advances in Neural Information Processing Systems","author":"Subramanya","year":"2019"},{"key":"2022033118511603900_bib58","doi-asserted-by":"publisher","first-page":"963","DOI":"10.18653\/v1\/D19-1089","article-title":"Multilingual neural machine translation with language clustering","volume-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","author":"Tan","year":"2019"},{"key":"2022033118511603900_bib59","article-title":"Multilingual translation with extensible multilingual pretraining and finetuning","author":"Tang","year":"2020"},{"key":"2022033118511603900_bib60","doi-asserted-by":"publisher","first-page":"1342","DOI":"10.18653\/v1\/D19-1136","article-title":"Vecalign: Improved sentence alignment in linear time and space","volume-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","author":"Thompson","year":"2019"},{"key":"2022033118511603900_bib61","first-page":"479","article-title":"OPUS-MT \u2013 building open translation services for the world","volume-title":"Proceedings of the 22nd Annual Conference of the European Association for Machine Translation","author":"Tiedemann","year":"2020"},{"key":"2022033118511603900_bib62","article-title":"OPUS-MT\u2014Building open translation services for the World","volume-title":"Proceedings of the 22nd Annual Conferenec of the European Association for Machine Translation (EAMT)","author":"Tiedemann","year":"2020"},{"key":"2022033118511603900_bib63","article-title":"Parallel data, tools and interfaces in opus","volume-title":"Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC\u201912)","author":"Tiedemann","year":"2012"},{"key":"2022033118511603900_bib64","article-title":"Attention is all you need","author":"Vaswani","year":"2017"},{"key":"2022033118511603900_bib65","article-title":"Google\u2019s neural machine translation system: Bridging the gap between human and machine translation","author":"Yonghui","year":"2016","journal-title":"CoRR"},{"key":"2022033118511603900_bib66","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2021.naacl-main.41","article-title":"mt5: A massively multilingual pre-trained text-to-text transformer","author":"Xue","year":"2021"}],"container-title":["Transactions of the Association for Computational Linguistics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/direct.mit.edu\/tacl\/article-pdf\/doi\/10.1162\/tacl_a_00452\/1987010\/tacl_a_00452.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/direct.mit.edu\/tacl\/article-pdf\/doi\/10.1162\/tacl_a_00452\/1987010\/tacl_a_00452.pdf","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,3,31]],"date-time":"2022-03-31T23:52:39Z","timestamp":1648770759000},"score":1,"resource":{"primary":{"URL":"https:\/\/direct.mit.edu\/tacl\/article\/doi\/10.1162\/tacl_a_00452\/109468\/Samanantar-The-Largest-Publicly-Available-Parallel"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022]]},"references-count":66,"URL":"https:\/\/doi.org\/10.1162\/tacl_a_00452","relation":{},"ISSN":["2307-387X"],"issn-type":[{"value":"2307-387X","type":"electronic"}],"subject":[],"published-other":{"date-parts":[[2022]]},"published":{"date-parts":[[2022]]}}}