WebSVN – Moodle – Autoría – /lib/mlbackend/php/phpml/src/Phpml/Helper/Optimizer/ConjugateGradient.php

Rev	Autor	Línea Nro.	Línea
1	efrain	1	`<?php`
		2
		3	`declare(strict_types=1);`
		4
		5	`namespace Phpml\Helper\Optimizer;`
		6
		7	`use Closure;`
		8
		9	`/**`
		10	`* Conjugate Gradient method to solve a non-linear f(x) with respect to unknown x`
		11	`* See https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method)`
		12	`*`
		13	`* The method applied below is explained in the below document in a practical manner`
		14	`* - http://web.cs.iastate.edu/~cs577/handouts/conjugate-gradient.pdf`
		15	`*`
		16	`* However it is compliant with the general Conjugate Gradient method with`
		17	`* Fletcher-Reeves update method. Note that, the f(x) is assumed to be one-dimensional`
		18	`* and one gradient is utilized for all dimensions in the given data.`
		19	`*/`
		20	`class ConjugateGradient extends GD`
		21	`{`
		22	`public function runOptimization(array $samples, array $targets, Closure $gradientCb): array`
		23	`{`
		24	`$this->samples = $samples;`
		25	`$this->targets = $targets;`
		26	`$this->gradientCb = $gradientCb;`
		27	`$this->sampleCount = count($samples);`
		28	`$this->costValues = [];`
		29
		30	`$d = MP::muls($this->gradient($this->theta), -1);`
		31
		32	`for ($i = 0; $i < $this->maxIterations; ++$i) {`
		33	`// Obtain α that minimizes f(θ + α.d)`
		34	`$alpha = $this->getAlpha($d);`
		35
		36	`// θ(k+1) = θ(k) + α.d`
		37	`$thetaNew = $this->getNewTheta($alpha, $d);`
		38
		39	`// β = \|\|∇f(x(k+1))\|\|² ∕ \|\|∇f(x(k))\|\|²`
		40	`$beta = $this->getBeta($thetaNew);`
		41
		42	`// d(k+1) =–∇f(x(k+1)) + β(k).d(k)`
		43	`$d = $this->getNewDirection($thetaNew, $beta, $d);`
		44
		45	`// Save values for the next iteration`
		46	`$oldTheta = $this->theta;`
		47	`$this->costValues[] = $this->cost($thetaNew);`
		48
		49	`$this->theta = $thetaNew;`
		50	`if ($this->enableEarlyStop && $this->earlyStop($oldTheta)) {`
		51	`break;`
		52	`}`
		53	`}`
		54
		55	`$this->clear();`
		56
		57	`return $this->theta;`
		58	`}`
		59
		60	`/**`
		61	`* Executes the callback function for the problem and returns`
		62	`* sum of the gradient for all samples & targets.`
		63	`*/`
		64	`protected function gradient(array $theta): array`
		65	`{`
		66	`[, $updates, $penalty] = parent::gradient($theta);`
		67
		68	`// Calculate gradient for each dimension`
		69	`$gradient = [];`
		70	`for ($i = 0; $i <= $this->dimensions; ++$i) {`
		71	`if ($i === 0) {`
		72	`$gradient[$i] = array_sum($updates);`
		73	`} else {`
		74	`$col = array_column($this->samples, $i - 1);`
		75	`$error = 0;`
		76	`foreach ($col as $index => $val) {`
		77	`$error += $val * $updates[$index];`
		78	`}`
		79
		80	`$gradient[$i] = $error + $penalty * $theta[$i];`
		81	`}`
		82	`}`
		83
		84	`return $gradient;`
		85	`}`
		86
		87	`/**`
		88	`* Returns the value of f(x) for given solution`
		89	`*/`
		90	`protected function cost(array $theta): float`
		91	`{`
		92	`[$cost] = parent::gradient($theta);`
		93
		94	`return array_sum($cost) / (int) $this->sampleCount;`
		95	`}`
		96
		97	`/**`
		98	`* Calculates alpha that minimizes the function f(θ + α.d)`
		99	`* by performing a line search that does not rely upon the derivation.`
		100	`*`
		101	`* There are several alternatives for this function. For now, we`
		102	`* prefer a method inspired from the bisection method for its simplicity.`
		103	`* This algorithm attempts to find an optimum alpha value between 0.0001 and 0.01`
		104	`*`
		105	`* Algorithm as follows:`
		106	`* a) Probe a small alpha (0.0001) and calculate cost function`
		107	`* b) Probe a larger alpha (0.01) and calculate cost function`
		108	`* b-1) If cost function decreases, continue enlarging alpha`
		109	`* b-2) If cost function increases, take the midpoint and try again`
		110	`*/`
		111	`protected function getAlpha(array $d): float`
		112	`{`
		113	`$small = MP::muls($d, 0.0001);`
		114	`$large = MP::muls($d, 0.01);`
		115
		116	`// Obtain θ + α.d for two initial values, x0 and x1`
		117	`$x0 = MP::add($this->theta, $small);`
		118	`$x1 = MP::add($this->theta, $large);`
		119
		120	`$epsilon = 0.0001;`
		121	`$iteration = 0;`
		122	`do {`
		123	`$fx1 = $this->cost($x1);`
		124	`$fx0 = $this->cost($x0);`
		125
		126	`// If the difference between two values is small enough`
		127	`// then break the loop`
		128	`if (abs($fx1 - $fx0) <= $epsilon) {`
		129	`break;`
		130	`}`
		131
		132	`if ($fx1 < $fx0) {`
		133	`$x0 = $x1;`
		134	`$x1 = MP::adds($x1, 0.01); // Enlarge second`
		135	`} else {`
		136	`$x1 = MP::divs(MP::add($x1, $x0), 2.0);`
		137	`} // Get to the midpoint`
		138
		139	`$error = $fx1 / $this->dimensions;`
		140	`} while ($error <= $epsilon \|\| $iteration++ < 10);`
		141
		142	`// Return α = θ / d`
		143	`// For accuracy, choose a dimension which maximize \|d[i]\|`
		144	`$imax = 0;`
		145	`for ($i = 1; $i <= $this->dimensions; ++$i) {`
		146	`if (abs($d[$i]) > abs($d[$imax])) {`
		147	`$imax = $i;`
		148	`}`
		149	`}`
		150
		151	`if ($d[$imax] == 0) {`
		152	`return $x1[$imax] - $this->theta[$imax];`
		153	`}`
		154
		155	`return ($x1[$imax] - $this->theta[$imax]) / $d[$imax];`
		156	`}`
		157
		158	`/**`
		159	`* Calculates new set of solutions with given alpha (for each θ(k)) and`
		160	`* gradient direction.`
		161	`*`
		162	`* θ(k+1) = θ(k) + α.d`
		163	`*/`
		164	`protected function getNewTheta(float $alpha, array $d): array`
		165	`{`
		166	`return MP::add($this->theta, MP::muls($d, $alpha));`
		167	`}`
		168
		169	`/**`
		170	`* Calculates new beta (β) for given set of solutions by using`
		171	`* Fletcher–Reeves method.`
		172	`*`
		173	`* β = \|\|f(x(k+1))\|\|² ∕ \|\|f(x(k))\|\|²`
		174	`*`
		175	`* See:`
		176	`* R. Fletcher and C. M. Reeves, "Function minimization by conjugate gradients", Comput. J. 7 (1964), 149–154.`
		177	`*/`
		178	`protected function getBeta(array $newTheta): float`
		179	`{`
		180	`$gNew = $this->gradient($newTheta);`
		181	`$gOld = $this->gradient($this->theta);`
		182	`$dNew = 0;`
		183	`$dOld = 1e-100;`
		184	`for ($i = 0; $i <= $this->dimensions; ++$i) {`
		185	`$dNew += $gNew[$i] ** 2;`
		186	`$dOld += $gOld[$i] ** 2;`
		187	`}`
		188
		189	`return $dNew / $dOld;`
		190	`}`
		191
		192	`/**`
		193	`* Calculates the new conjugate direction`
		194	`*`
		195	`* d(k+1) =–∇f(x(k+1)) + β(k).d(k)`
		196	`*/`
		197	`protected function getNewDirection(array $theta, float $beta, array $d): array`
		198	`{`
		199	`$grad = $this->gradient($theta);`
		200
		201	`return MP::add(MP::muls($grad, -1), MP::muls($d, $beta));`
		202	`}`
		203	`}`
		204
		205	`/**`
		206	`* Handles element-wise vector operations between vector-vector`
		207	`* and vector-scalar variables`
		208	`*/`
		209	`class MP`
		210	`{`
		211	`/**`
		212	`* Element-wise <b>multiplication</b> of two vectors of the same size`
		213	`*/`
		214	`public static function mul(array $m1, array $m2): array`
		215	`{`
		216	`$res = [];`
		217	`foreach ($m1 as $i => $val) {`
		218	`$res[] = $val * $m2[$i];`
		219	`}`
		220
		221	`return $res;`
		222	`}`
		223
		224	`/**`
		225	`* Element-wise <b>division</b> of two vectors of the same size`
		226	`*/`
		227	`public static function div(array $m1, array $m2): array`
		228	`{`
		229	`$res = [];`
		230	`foreach ($m1 as $i => $val) {`
		231	`$res[] = $val / $m2[$i];`
		232	`}`
		233
		234	`return $res;`
		235	`}`
		236
		237	`/**`
		238	`* Element-wise <b>addition</b> of two vectors of the same size`
		239	`*/`
		240	`public static function add(array $m1, array $m2, int $mag = 1): array`
		241	`{`
		242	`$res = [];`
		243	`foreach ($m1 as $i => $val) {`
		244	`$res[] = $val + $mag * $m2[$i];`
		245	`}`
		246
		247	`return $res;`
		248	`}`
		249
		250	`/**`
		251	`* Element-wise <b>subtraction</b> of two vectors of the same size`
		252	`*/`
		253	`public static function sub(array $m1, array $m2): array`
		254	`{`
		255	`return self::add($m1, $m2, -1);`
		256	`}`
		257
		258	`/**`
		259	`* Element-wise <b>multiplication</b> of a vector with a scalar`
		260	`*/`
		261	`public static function muls(array $m1, float $m2): array`
		262	`{`
		263	`$res = [];`
		264	`foreach ($m1 as $val) {`
		265	`$res[] = $val * $m2;`
		266	`}`
		267
		268	`return $res;`
		269	`}`
		270
		271	`/**`
		272	`* Element-wise <b>division</b> of a vector with a scalar`
		273	`*/`
		274	`public static function divs(array $m1, float $m2): array`
		275	`{`
		276	`$res = [];`
		277	`foreach ($m1 as $val) {`
		278	`$res[] = $val / ($m2 + 1e-32);`
		279	`}`
		280
		281	`return $res;`
		282	`}`
		283
		284	`/**`
		285	`* Element-wise <b>addition</b> of a vector with a scalar`
		286	`*/`
		287	`public static function adds(array $m1, float $m2, int $mag = 1): array`
		288	`{`
		289	`$res = [];`
		290	`foreach ($m1 as $val) {`
		291	`$res[] = $val + $mag * $m2;`
		292	`}`
		293
		294	`return $res;`
		295	`}`
		296
		297	`/**`
		298	`* Element-wise <b>subtraction</b> of a vector with a scalar`
		299	`*/`
		300	`public static function subs(array $m1, float $m2): array`
		301	`{`
		302	`return self::adds($m1, $m2, -1);`
		303	`}`
		304	`}`

Proyectos de Subversion Moodle

(root)/lib/mlbackend/php/phpml/src/Phpml/Helper/Optimizer/ConjugateGradient.php @ 482 – Rev