WebSVN – Moodle – Autoría – /lib/yuilib/3.18.1/text-wordbreak/text-wordbreak.js

Rev	Autor	Línea Nro.	Línea
1	efrain	1	`YUI.add('text-wordbreak', function (Y, NAME) {`
		2
		3	`/**`
		4	`* Provides utility methods for splitting strings on word breaks and determining`
		5	`* whether a character index represents a word boundary.`
		6	`*`
		7	`* @module text`
		8	`* @submodule text-wordbreak`
		9	`*/`
		10
		11	`/**`
		12	`* <p>`
		13	`* Provides utility methods for splitting strings on word breaks and determining`
		14	`* whether a character index represents a word boundary, using the generic word`
		15	`* breaking algorithm defined in the Unicode Text Segmentation guidelines`
		16	`* (<a href="http://unicode.org/reports/tr29/#Word_Boundaries">Unicode Standard`
		17	`* Annex #29</a>).`
		18	`* </p>`
		19	`*`
		20	`* <p>`
		21	`* This algorithm provides a reasonable default for many languages. However, it`
		22	`* does not cover language or context specific requirements, and it does not`
		23	`* provide meaningful results at all for languages that don't use spaces between`
		24	`* words, such as Chinese, Japanese, Thai, Lao, Khmer, and others. Server-based`
		25	`* word breaking services usually provide significantly better results with`
		26	`* better performance.`
		27	`* </p>`
		28	`*`
		29	`* @class Text.WordBreak`
		30	`* @static`
		31	`*/`
		32
		33	`var Text = Y.Text,`
		34	`WBData = Text.Data.WordBreak,`
		35
		36	`// Constants representing code point classifications.`
		37	`ALETTER = 0,`
		38	`MIDNUMLET = 1,`
		39	`MIDLETTER = 2,`
		40	`MIDNUM = 3,`
		41	`NUMERIC = 4,`
		42	`CR = 5,`
		43	`LF = 6,`
		44	`NEWLINE = 7,`
		45	`EXTEND = 8,`
		46	`FORMAT = 9,`
		47	`KATAKANA = 10,`
		48	`EXTENDNUMLET = 11,`
		49	`OTHER = 12,`
		50
		51	`// RegExp objects generated from code point data. Each regex matches a single`
		52	`// character against a set of Unicode code points. The index of each item in`
		53	`// this array must match its corresponding code point constant value defined`
		54	`// above.`
		55	`SETS = [`
		56	`new RegExp(WBData.aletter),`
		57	`new RegExp(WBData.midnumlet),`
		58	`new RegExp(WBData.midletter),`
		59	`new RegExp(WBData.midnum),`
		60	`new RegExp(WBData.numeric),`
		61	`new RegExp(WBData.cr),`
		62	`new RegExp(WBData.lf),`
		63	`new RegExp(WBData.newline),`
		64	`new RegExp(WBData.extend),`
		65	`new RegExp(WBData.format),`
		66	`new RegExp(WBData.katakana),`
		67	`new RegExp(WBData.extendnumlet)`
		68	`],`
		69
		70	`EMPTY_STRING = '',`
		71	`PUNCTUATION = new RegExp('^' + WBData.punctuation + '$'),`
		72	`WHITESPACE = /\s/,`
		73
		74	`WordBreak = {`
		75	`// -- Public Static Methods ------------------------------------------------`
		76
		77	`/**`
		78	`* Splits the specified string into an array of individual words.`
		79	`*`
		80	`* @method getWords`
		81	`* @param {String} string String to split.`
		82	`* @param {Object} options (optional) Options object containing zero or more`
		83	`* of the following properties:`
		84	`*`
		85	`* <dl>`
		86	`* <dt>ignoreCase (Boolean)</dt>`
		87	`* <dd>`
		88	`* If <code>true</code>, the string will be converted to lowercase`
		89	`* before being split. Default is <code>false</code>.`
		90	`* </dd>`
		91	`*`
		92	`* <dt>includePunctuation (Boolean)</dt>`
		93	`* <dd>`
		94	`* If <code>true</code>, the returned array will include punctuation`
		95	`* characters. Default is <code>false</code>.`
		96	`* </dd>`
		97	`*`
		98	`* <dt>includeWhitespace (Boolean)</dt>`
		99	`* <dd>`
		100	`* If <code>true</code>, the returned array will include whitespace`
		101	`* characters. Default is <code>false</code>.`
		102	`* </dd>`
		103	`* </dl>`
		104	`* @return {Array} Array of words.`
		105	`* @static`
		106	`*/`
		107	`getWords: function (string, options) {`
		108	`var i = 0,`
		109	`map = WordBreak._classify(string),`
		110	`len = map.length,`
		111	`word = [],`
		112	`words = [],`
		113	`chr,`
		114	`includePunctuation,`
		115	`includeWhitespace;`
		116
		117	`if (!options) {`
		118	`options = {};`
		119	`}`
		120
		121	`if (options.ignoreCase) {`
		122	`string = string.toLowerCase();`
		123	`}`
		124
		125	`includePunctuation = options.includePunctuation;`
		126	`includeWhitespace = options.includeWhitespace;`
		127
		128	`// Loop through each character in the classification map and determine`
		129	`// whether it precedes a word boundary, building an array of distinct`
		130	`// words as we go.`
		131	`for (; i < len; ++i) {`
		132	`chr = string.charAt(i);`
		133
		134	`// Append this character to the current word.`
		135	`word.push(chr);`
		136
		137	`// If there's a word boundary between the current character and the`
		138	`// next character, append the current word to the words array and`
		139	`// start building a new word.`
		140	`if (WordBreak._isWordBoundary(map, i)) {`
		141	`word = word.join(EMPTY_STRING);`
		142
		143	`if (word &&`
		144	`(includeWhitespace \|\| !WHITESPACE.test(word)) &&`
		145	`(includePunctuation \|\| !PUNCTUATION.test(word))) {`
		146	`words.push(word);`
		147	`}`
		148
		149	`word = [];`
		150	`}`
		151	`}`
		152
		153	`return words;`
		154	`},`
		155
		156	`/**`
		157	`* Returns an array containing only unique words from the specified string.`
		158	`* For example, the string <code>'foo bar baz foo'</code> would result in`
		159	`* the array <code>['foo', 'bar', 'baz']</code>.`
		160	`*`
		161	`* @method getUniqueWords`
		162	`* @param {String} string String to split.`
		163	`* @param {Object} options (optional) Options (see <code>getWords()</code>`
		164	`* for details).`
		165	`* @return {Array} Array of unique words.`
		166	`* @static`
		167	`*/`
		168	`getUniqueWords: function (string, options) {`
		169	`return Y.Array.unique(WordBreak.getWords(string, options));`
		170	`},`
		171
		172	`/**`
		173	`* <p>`
		174	`* Returns <code>true</code> if there is a word boundary between the`
		175	`* specified character index and the next character index (or the end of the`
		176	`* string).`
		177	`* </p>`
		178	`*`
		179	`* <p>`
		180	`* Note that there are always word breaks at the beginning and end of a`
		181	`* string, so <code>isWordBoundary('', 0)</code> and`
		182	`* <code>isWordBoundary('a', 0)</code> will both return <code>true</code>.`
		183	`* </p>`
		184	`*`
		185	`* @method isWordBoundary`
		186	`* @param {String} string String to test.`
		187	`* @param {Number} index Character index to test within the string.`
		188	`* @return {Boolean} <code>true</code> for a word boundary,`
		189	`* <code>false</code> otherwise.`
		190	`* @static`
		191	`*/`
		192	`isWordBoundary: function (string, index) {`
		193	`return WordBreak._isWordBoundary(WordBreak._classify(string), index);`
		194	`},`
		195
		196	`// -- Protected Static Methods ---------------------------------------------`
		197
		198	`/**`
		199	`* Returns a character classification map for the specified string.`
		200	`*`
		201	`* @method _classify`
		202	`* @param {String} string String to classify.`
		203	`* @return {Array} Classification map.`
		204	`* @protected`
		205	`* @static`
		206	`*/`
		207	`_classify: function (string) {`
		208	`var chr,`
		209	`map = [],`
		210	`i = 0,`
		211	`j,`
		212	`set,`
		213	`stringLength = string.length,`
		214	`setsLength = SETS.length,`
		215	`type;`
		216
		217	`for (; i < stringLength; ++i) {`
		218	`chr = string.charAt(i);`
		219	`type = OTHER;`
		220
		221	`for (j = 0; j < setsLength; ++j) {`
		222	`set = SETS[j];`
		223
		224	`if (set && set.test(chr)) {`
		225	`type = j;`
		226	`break;`
		227	`}`
		228	`}`
		229
		230	`map.push(type);`
		231	`}`
		232
		233	`return map;`
		234	`},`
		235
		236	`/**`
		237	`* <p>`
		238	`* Returns <code>true</code> if there is a word boundary between the`
		239	`* specified character index and the next character index (or the end of the`
		240	`* string).`
		241	`* </p>`
		242	`*`
		243	`* <p>`
		244	`* Note that there are always word breaks at the beginning and end of a`
		245	`* string, so <code>_isWordBoundary('', 0)</code> and`
		246	`* <code>_isWordBoundary('a', 0)</code> will both return <code>true</code>.`
		247	`* </p>`
		248	`*`
		249	`* @method _isWordBoundary`
		250	`* @param {Array} map Character classification map generated by`
		251	`* <code>_classify</code>.`
		252	`* @param {Number} index Character index to test.`
		253	`* @return {Boolean}`
		254	`* @protected`
		255	`* @static`
		256	`*/`
		257	`_isWordBoundary: function (map, index) {`
		258	`var prevType,`
		259	`type = map[index],`
		260	`nextType = map[index + 1],`
		261	`nextNextType;`
		262
		263	`if (index < 0 \|\| (index > map.length - 1 && index !== 0)) {`
		264	`return false;`
		265	`}`
		266
		267	`// WB5. Don't break between most letters.`
		268	`if (type === ALETTER && nextType === ALETTER) {`
		269	`return false;`
		270	`}`
		271
		272	`nextNextType = map[index + 2];`
		273
		274	`// WB6. Don't break letters across certain punctuation.`
		275	`if (type === ALETTER &&`
		276	`(nextType === MIDLETTER \|\| nextType === MIDNUMLET) &&`
		277	`nextNextType === ALETTER) {`
		278	`return false;`
		279	`}`
		280
		281	`prevType = map[index - 1];`
		282
		283	`// WB7. Don't break letters across certain punctuation.`
		284	`if ((type === MIDLETTER \|\| type === MIDNUMLET) &&`
		285	`nextType === ALETTER &&`
		286	`prevType === ALETTER) {`
		287	`return false;`
		288	`}`
		289
		290	`// WB8/WB9/WB10. Don't break inside sequences of digits or digits`
		291	`// adjacent to letters.`
		292	`if ((type === NUMERIC \|\| type === ALETTER) &&`
		293	`(nextType === NUMERIC \|\| nextType === ALETTER)) {`
		294	`return false;`
		295	`}`
		296
		297	`// WB11. Don't break inside numeric sequences like "3.2" or`
		298	`// "3,456.789".`
		299	`if ((type === MIDNUM \|\| type === MIDNUMLET) &&`
		300	`nextType === NUMERIC &&`
		301	`prevType === NUMERIC) {`
		302	`return false;`
		303	`}`
		304
		305	`// WB12. Don't break inside numeric sequences like "3.2" or`
		306	`// "3,456.789".`
		307	`if (type === NUMERIC &&`
		308	`(nextType === MIDNUM \|\| nextType === MIDNUMLET) &&`
		309	`nextNextType === NUMERIC) {`
		310	`return false;`
		311	`}`
		312
		313	`// WB4. Ignore format and extend characters.`
		314	`if (type === EXTEND \|\| type === FORMAT \|\|`
		315	`prevType === EXTEND \|\| prevType === FORMAT \|\|`
		316	`nextType === EXTEND \|\| nextType === FORMAT) {`
		317	`return false;`
		318	`}`
		319
		320	`// WB3. Don't break inside CRLF.`
		321	`if (type === CR && nextType === LF) {`
		322	`return false;`
		323	`}`
		324
		325	`// WB3a. Break before newlines (including CR and LF).`
		326	`if (type === NEWLINE \|\| type === CR \|\| type === LF) {`
		327	`return true;`
		328	`}`
		329
		330	`// WB3b. Break after newlines (including CR and LF).`
		331	`if (nextType === NEWLINE \|\| nextType === CR \|\| nextType === LF) {`
		332	`return true;`
		333	`}`
		334
		335	`// WB13. Don't break between Katakana characters.`
		336	`if (type === KATAKANA && nextType === KATAKANA) {`
		337	`return false;`
		338	`}`
		339
		340	`// WB13a. Don't break from extenders.`
		341	`if (nextType === EXTENDNUMLET &&`
		342	`(type === ALETTER \|\| type === NUMERIC \|\| type === KATAKANA \|\|`
		343	`type === EXTENDNUMLET)) {`
		344	`return false;`
		345	`}`
		346
		347	`// WB13b. Don't break from extenders.`
		348	`if (type === EXTENDNUMLET &&`
		349	`(nextType === ALETTER \|\| nextType === NUMERIC \|\|`
		350	`nextType === KATAKANA)) {`
		351	`return false;`
		352	`}`
		353
		354	`// Break after any character not covered by the rules above.`
		355	`return true;`
		356	`}`
		357	`};`
		358
		359	`Text.WordBreak = WordBreak;`
		360
		361
		362	`}, '3.18.1', {"requires": ["array-extras", "text-data-wordbreak"]});`

Proyectos de Subversion Moodle

(root)/lib/yuilib/3.18.1/text-wordbreak/text-wordbreak.js – Rev 1