{"created":"2022-10-05T06:13:39.922900+00:00","id":2000332,"links":{},"metadata":{"_buckets":{"deposit":"ba3559fb-a276-465a-91a3-558635a20160"},"_deposit":{"created_by":15,"id":"2000332","owner":"15","owners":[15],"owners_ext":{"displayname":"北見工業大学学術機関リポジトリ(KIT-R)","username":"kitir"},"pid":{"revision_id":0,"type":"depid","value":"2000332"},"status":"published"},"_oai":{"id":"oai:kitami-it.repo.nii.ac.jp:02000332","sets":["2","2:6"]},"author_link":[],"control_number":"2000332","item_7_biblio_info_6":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2022-09","bibliographicIssueDateType":"Issued"}}]},"item_7_date_granted_63":{"attribute_name":"学位授与年月日","attribute_value_mlt":[{"subitem_dategranted":"2022-09-06"}]},"item_7_degree_grantor_61":{"attribute_name":"学位授与機関","attribute_value_mlt":[{"subitem_degreegrantor":[{"subitem_degreegrantor_language":"ja","subitem_degreegrantor_name":"北見工業大学"}],"subitem_degreegrantor_identifier":[{"subitem_degreegrantor_identifier_name":"10106","subitem_degreegrantor_identifier_scheme":"kakenhi"}]}]},"item_7_degree_name_60":{"attribute_name":"学位名","attribute_value_mlt":[{"subitem_degreename":"博士(工学)","subitem_degreename_language":"ja"}]},"item_7_description_4":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"In this thesis, I study two different methods for improving multilingual automatic cyberbullying\ndetection. First, I study the effectiveness of Feature Density (FD) using different linguisticallybacked\nfeature preprocessing methods in order to estimate dataset complexity, which in turn is\nused to comparatively estimate the potential performance of machine learning (ML) classifiers\nprior to any training. I hypothesize that estimating dataset complexity allows for the reduction\nof the number of required experiments iterations, making it possible to optimize the resourceintensive\ntraining of ML models which is becoming a serious issue due to the increases in available\ndataset sizes and the ever rising popularity of models based on Deep Neural Networks (DNN).\nThe problem of constantly increasing needs for more powerful computational resources is also\naffecting the environment due to alarmingly-growing amount of CO2 emissions caused by training\nof large-scale ML models. I use cyberbullying datasets collected for multiple languages, namely\nEnglish, Japanese and Polish. The difference in linguistic complexity of datasets allows me to\nadditionally discuss the efficacy of linguistically-backed word preprocessing.\nSecond, I study the selection of transfer languages for automatic abusive language detection.\nI demonstrate the effectiveness of cross-lingual transfer learning for zero-shot abusive language\ndetection. This way it is possible to use existing data from higher-resource languages to build\nbetter detection systems for languages lacking data. The datasets are from eight different languages\nfrom three language families. I measure the distance between the languages using several language\nsimilarity measures, especially by quantifying the World Atlas of Language Structures. I show\nthat there is a correlation between linguistic similarity and classifier performance, making it\npossible to choose an optimal transfer language for zero shot abusive language detection.\nNext, I demonstrate that this method is also generally applicable to multiple Natural Language\nProcessing tasks, specifically sentiment analysis, named entity recognition and dependency parsing.\nI show that there is also a correlation between linguistic similarity and zero-shot cross-lingual\ntransfer performance for these tasks, allowing me to select an ideal transfer language in order to\naid with the problem of dealing with languages that do not currently have a sufficient amount\nof data. Lastly, I show that the World Atlas of Language Structures can be quantified into an\neffective linguistic similarity method.","subitem_description_language":"en","subitem_description_type":"Abstract"}]},"item_7_dissertation_number_64":{"attribute_name":"学位授与番号","attribute_value_mlt":[{"subitem_dissertationnumber":"甲第203号"}]},"item_7_identifier_registration":{"attribute_name":"ID登録","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.19000/0002000332","subitem_identifier_reg_type":"JaLC"}]},"item_7_select_15":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_select_item":"ETD","subitem_select_language":"en"}]},"item_7_text_66":{"attribute_name":"研究科・専攻名","attribute_value_mlt":[{"subitem_text_language":"ja","subitem_text_value":"生産基盤工学専攻"}]},"item_access_right":{"attribute_name":"アクセス権","attribute_value_mlt":[{"subitem_access_right":"open access","subitem_access_right_uri":"http://purl.org/coar/access_right/c_abf2"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorAlternatives":[{"creatorAlternative":"エロネン ユーソ カレビ クリスティアン","creatorAlternativeLang":"ja"}],"creatorNames":[{"creatorName":"Eronen Juuso Kalevi Kristian","creatorNameLang":"en"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_access","date":[{"dateType":"Available","dateValue":"2022-10-05"}],"filename":"PhD_Thesis_Eronen .pdf","filesize":[{"value":"1.8 MB"}],"format":"application/pdf","mimetype":"application/pdf","url":{"objectType":"fulltext","url":"https://kitami-it.repo.nii.ac.jp/record/2000332/files/PhD_Thesis_Eronen .pdf"},"version_id":"14d0c316-3965-4ada-b83c-10f66c5e2248"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"doctoral thesis","resourceuri":"http://purl.org/coar/resource_type/c_db06"}]},"item_title":"素性密度及びクロスリンガルゼロショット転移学習による多言語のネットいじめ自動検出の改良に関する研究","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"素性密度及びクロスリンガルゼロショット転移学習による多言語のネットいじめ自動検出の改良に関する研究","subitem_title_language":"ja"},{"subitem_title":"Improving Multilingual Automatic Cyberbullying Detection With Feature Density And Cross-lingual Zero-shot Transfer","subitem_title_language":"en"}]},"item_type_id":"7","owner":"15","path":["2","6"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2022-09-06"},"publish_date":"2022-09-06","publish_status":"0","recid":"2000332","relation_version_is_last":true,"title":["素性密度及びクロスリンガルゼロショット転移学習による多言語のネットいじめ自動検出の改良に関する研究"],"weko_creator_id":"15","weko_shared_id":3},"updated":"2022-12-13T02:23:05.467811+00:00"}