{"created":"2024-02-27T23:48:07.019420+00:00","id":2000562,"links":{},"metadata":{"_buckets":{"deposit":"3ced2f2f-9339-4ba5-95b1-2ba6c18084a6"},"_deposit":{"created_by":15,"id":"2000562","owner":"15","owners":[15],"pid":{"revision_id":0,"type":"depid","value":"2000562"},"status":"published"},"_oai":{"id":"oai:kitami-it.repo.nii.ac.jp:02000562","sets":["1","1:87"]},"author_link":[],"control_number":"2000562","item_1646810750418":{"attribute_name":"出版タイプ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_ab4af688f83e57aa","subitem_version_type":"AM"}]},"item_3_biblio_info_186":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"2","bibliographicVolumeNumber":"60","bibliographic_titles":[{"bibliographic_title":"Information Processing & Management","bibliographic_titleLang":"en"}]}]},"item_3_description_184":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"In recent years, neural models learned through self-supervised pretraining on large scale multilingual text or speech data have exhibited promising results for underresourced languages, especially when a relatively large amount of data from related language(s) is available. While the technology has a potential for facilitating tasks carried out in language documentation projects, such as speech transcription, pretraining a multilingual model from scratch for every new language would be highly impractical. We investigate the possibility for adapting an existing multilingual wav2vec 2.0 model for a new language, focusing on actual fieldwork data from a critically endangered tongue: Ainu. Specifically, we (i) examine the feasibility of leveraging data from similar languages also in fine-tuning; (ii) verify whether the model’s performance can be improved by further pretraining on target language data. Our results show that continued pretraining is the most effective method to adapt a wav2vec 2.0 model for a new language and leads to considerable reduction in error rates. Furthermore, we find that if a model pretrained on a related speech variety or an unrelated language with similar phonological characteristics is available, multilingual fine-tuning using additional data from that language can have positive impact on speech recognition performance when there is very little labeled data in the target language.","subitem_description_language":"en","subitem_description_type":"Abstract"}]},"item_3_publisher_212":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"Elsevier","subitem_publisher_language":"en"}]},"item_3_relation_191":{"attribute_name":"DOI","attribute_value_mlt":[{"subitem_relation_type_id":{"subitem_relation_type_id_text":"https://doi.org/10.1016/j.ipm.2022.103148","subitem_relation_type_select":"DOI"}}]},"item_3_rights_192":{"attribute_name":"権利","attribute_value_mlt":[{"subitem_rights":"c2023 Elsevier Ltd. All rights reserved.","subitem_rights_language":"en"}]},"item_3_select_195":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_select_item":"author","subitem_select_language":"en"}]},"item_3_source_id_187":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"0306-4573","subitem_source_identifier_type":"PISSN"}]},"item_access_right":{"attribute_name":"アクセス権","attribute_value_mlt":[{"subitem_access_right":"open access","subitem_access_right_uri":"http://purl.org/coar/access_right/c_abf2"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Karol Nowakowski","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Michal Ptaszynski","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Kyoko Murasaki","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"Jagna Nieuważny","creatorNameLang":"en"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2025-03-02"}],"filename":"2301.07295.pdf","filesize":[{"value":"524 KB"}],"format":"application/pdf","mimetype":"application/pdf","url":{"objectType":"fulltext","url":"https://kitami-it.repo.nii.ac.jp/record/2000562/files/2301.07295.pdf"},"version_id":"5ad33180-18b9-432d-9c4d-82fab8444513"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"journal article","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"Adapting multilingual speech representation model for a new, underresourced language through multilingual fine-tuning and continued pretraining","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Adapting multilingual speech representation model for a new, underresourced language through multilingual fine-tuning and continued pretraining","subitem_title_language":"en"}]},"item_type_id":"3","owner":"15","path":["1","87"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2024-02-28"},"publish_date":"2024-02-28","publish_status":"0","recid":"2000562","relation_version_is_last":true,"title":["Adapting multilingual speech representation model for a new, underresourced language through multilingual fine-tuning and continued pretraining"],"weko_creator_id":"15","weko_shared_id":-1},"updated":"2024-06-03T02:16:40.091982+00:00"}