@phdthesis{oai:kitami-it.repo.nii.ac.jp:00008658,
 author = {PAWEL, CEZARY LEMPA},
 month = {Sep},
 note = {Language models are an indispensable element of Natural Language Processing (NLP) research. They are used in machine translation, speech recognition, part-of-speech tagging, handwriting recognition, syntactic parsing, information retrieval and others. In short, language models are probability distributions over sequences of words. There are countless numbers of NLP solutions, algorithms and programs applying language models in specific tasks. Unfortunately, often these are not optimized, but rely on default, most commonly used sets of parameters. For example, many of them use numerous objective functions with different variables but without proper weights applied to them. Users usually set these variables themselves, which causes the results not to exceed a certain mediocre level. In case of small number of variables, users can adjust them manually, but optimization of objective functions with massive number of variables, especially multi-objective functions is difficult and time consuming. This was the motivation to propose an application of a Genetic Algorithms (GAs) to optimize the weighting process.
GAs are subset of Evolutionary Algorithms (EAs), inspired by the process of natural selection known from nature. They use bio-inspired operators such as selection, crossover and mutation to generate solutions for optimization and search problems. This way GAs represent randomized heuristic search strategies simulating natural selection process, where the population is composed of candidate solutions. They are focused on evolving a population from which strong and diverse candidates can emerge via mutation and crossover (mating). There exist different types of GAs, moreover the same type of GA can bring different quality of solutions, depending on multiple variables, which include starting population, number of generations or fitness function. Finding the best starting parameters and type of GA the most appropriate for a given optimization problem is a next challenge. For that reason, I created a library that automatically applies multiple types of GAs in optimization purposes.
The library was created in C++ language, with the use of .NET environment. Its main goal is to be used with different secondary programs and applications, without significant interfering in the original structure of the solution. Basic function of library allows the use of several different kinds of GAs like: Simple GA, Uniform Crossover GA, n-point Crossover GA, GA with sexual selection, GA with chromosome aging and so forth. User can freely define starting parameters for GA including: population size, starting population, number of generations, type of mutation and crossover. Advanced functions of the library allow the use of multithreaded processing for running several GAs in the same time. Basic option of multithreading runs the same type of GA with different starting parameters, advanced version allows to exchange information between different threads every set number of generations. In case of large number of variables to compute, it is also possible to separate a mutation and crossover for several threads running at the same time.
The most important functionality of the library is its easy adjustability in optimization of different kinds of applications. The library is used to run the original program in every generation of GA with new weights for variables generated from natural selection. Time of program running is closely related with original program processing time. It depends on the type of original solution and the time of processing one generation is similar to one run of the optimized program.
During creating and testing the library, numerous experiments have been carried out. In preliminary experiments the library was used for optimization of construction of mechanical elements. Later the application was tested on natural language processing and related solutions. One part of the research was optimizing Quantitative Learner’s Motivation Model. The goal of this experiment was to optimize the formula for prediction of learning motivation by means of different weights for three values: interest, usefulness in the future and satisfaction. For this optimization, an application in C# using GA library was created. Data sets for the experiments were acquired from questionnaires enquiring about the above three elements in actual university classes. The results of the experiment showed improvement in the estimation of student’s learning motivation up to over 17 percentage points of Fscore.
The final experiment aimed to optimize the implementation of Support Vector Ma-chines (SVMs) for the problem of pattern recognition in natural language data. SVMs are a machine learning algorithm based on statistical learning theory. They are applied to large number of real-world applications, such as text categorization, hand-written character recognition, etc. Original program was created in C++. For this application numerous different types of GAs were tested with different number of generations, weight range and starting parameters. Optimization was successful, with different scale of improvement based on previously mentioned conditions, with the highest achieved improvement of over 6 percentage points of recall comparing to baseline and reaching 78%. All experiments data are included in this work.},
 school = {北見工業大学},
 title = {Development of Optimization Method with the Use of Genetic Algorithms for Natural Language and Related Models},
 year = {2018}
}