WebSVN – Moodle – Autoría – /repository/url/locallib.php

Rev	Autor	Línea Nro.	Línea
1	efrain	1	`<?php`
		2
		3	`/**`
		4	`* Copyright (c) 2008, David R. Nadeau, NadeauSoftware.com.`
		5	`* All rights reserved.`
		6	`*`
		7	`* Redistribution and use in source and binary forms, with or without`
		8	`* modification, are permitted provided that the following conditions`
		9	`* are met:`
		10	`*`
		11	`* * Redistributions of source code must retain the above copyright`
		12	`* notice, this list of conditions and the following disclaimer.`
		13	`*`
		14	`* * Redistributions in binary form must reproduce the above`
		15	`* copyright notice, this list of conditions and the following`
		16	`* disclaimer in the documentation and/or other materials provided`
		17	`* with the distribution.`
		18	`*`
		19	`* * Neither the names of David R. Nadeau or NadeauSoftware.com, nor`
		20	`* the names of its contributors may be used to endorse or promote`
		21	`* products derived from this software without specific prior`
		22	`* written permission.`
		23	`*`
		24	`* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
		25	`* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
		26	`* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS`
		27	`* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE`
		28	`* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,`
		29	`* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,`
		30	`* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;`
		31	`* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER`
		32	`* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT`
		33	`* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY`
		34	`* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY`
		35	`* OF SUCH DAMAGE.`
		36	`*/`
		37
		38	`/*`
		39	`* This is a BSD License approved by the Open Source Initiative (OSI).`
		40	`* See: http://www.opensource.org/licenses/bsd-license.php`
		41	`*/`
		42
		43	`defined('MOODLE_INTERNAL') \|\| die();`
		44
		45	`/**`
		46	`* Combine a base URL and a relative URL to produce a new`
		47	`* absolute URL. The base URL is often the URL of a page,`
		48	`* and the relative URL is a URL embedded on that page.`
		49	`*`
		50	`* This function implements the "absolutize" algorithm from`
		51	`* the RFC3986 specification for URLs.`
		52	`*`
		53	`* This function supports multi-byte characters with the UTF-8 encoding,`
		54	`* per the URL specification.`
		55	`*`
		56	`* Parameters:`
		57	`* baseUrl the absolute base URL.`
		58	`*`
		59	`* url the relative URL to convert.`
		60	`*`
		61	`* Return values:`
		62	`* An absolute URL that combines parts of the base and relative`
		63	`* URLs, or FALSE if the base URL is not absolute or if either`
		64	`* URL cannot be parsed.`
		65	`*/`
		66	`function url_to_absolute( $baseUrl, $relativeUrl )`
		67	`{`
		68	`// If relative URL has a scheme, clean path and return.`
		69	`$r = split_url( $relativeUrl );`
		70	`if ( $r === FALSE )`
		71	`return FALSE;`
		72	`if ( !empty( $r['scheme'] ) )`
		73	`{`
		74	`if ( !empty( $r['path'] ) && $r['path'][0] == '/' )`
		75	`$r['path'] = url_remove_dot_segments( $r['path'] );`
		76	`return join_url( $r );`
		77	`}`
		78
		79	`// Make sure the base URL is absolute.`
		80	`$b = split_url( $baseUrl );`
		81	`if ( $b === FALSE \|\| empty( $b['scheme'] ) \|\| empty( $b['host'] ) )`
		82	`return FALSE;`
		83	`$r['scheme'] = $b['scheme'];`
		84	`if (empty($b['path'])) {`
		85	`$b['path'] = '';`
		86	`}`
		87
		88	`// If relative URL has an authority, clean path and return.`
		89	`if ( isset( $r['host'] ) )`
		90	`{`
		91	`if ( !empty( $r['path'] ) )`
		92	`$r['path'] = url_remove_dot_segments( $r['path'] );`
		93	`return join_url( $r );`
		94	`}`
		95	`unset( $r['port'] );`
		96	`unset( $r['user'] );`
		97	`unset( $r['pass'] );`
		98
		99	`// Copy base authority.`
		100	`$r['host'] = $b['host'];`
		101	`if ( isset( $b['port'] ) ) $r['port'] = $b['port'];`
		102	`if ( isset( $b['user'] ) ) $r['user'] = $b['user'];`
		103	`if ( isset( $b['pass'] ) ) $r['pass'] = $b['pass'];`
		104
		105	`// If relative URL has no path, use base path`
		106	`if ( empty( $r['path'] ) )`
		107	`{`
		108	`if ( !empty( $b['path'] ) )`
		109	`$r['path'] = $b['path'];`
		110	`if ( !isset( $r['query'] ) && isset( $b['query'] ) )`
		111	`$r['query'] = $b['query'];`
		112	`return join_url( $r );`
		113	`}`
		114
		115	`// If relative URL path doesn't start with /, merge with base path.`
		116	`if ($r['path'][0] != '/') {`
		117	`$base = core_text::strrchr($b['path'], '/', TRUE);`
		118	`if ($base === FALSE) {`
		119	`$base = '';`
		120	`}`
		121	`$r['path'] = $base . '/' . $r['path'];`
		122	`}`
		123	`$r['path'] = url_remove_dot_segments($r['path']);`
		124	`return join_url($r);`
		125	`}`
		126
		127	`/**`
		128	`* Filter out "." and ".." segments from a URL's path and return`
		129	`* the result.`
		130	`*`
		131	`* This function implements the "remove_dot_segments" algorithm from`
		132	`* the RFC3986 specification for URLs.`
		133	`*`
		134	`* This function supports multi-byte characters with the UTF-8 encoding,`
		135	`* per the URL specification.`
		136	`*`
		137	`* Parameters:`
		138	`* path the path to filter`
		139	`*`
		140	`* Return values:`
		141	`* The filtered path with "." and ".." removed.`
		142	`*/`
		143	`function url_remove_dot_segments( $path )`
		144	`{`
		145	`// multi-byte character explode`
		146	`$inSegs = preg_split( '!/!u', $path );`
		147	`$outSegs = array( );`
		148	`foreach ( $inSegs as $seg )`
		149	`{`
		150	`if ( $seg == '' \|\| $seg == '.')`
		151	`continue;`
		152	`if ( $seg == '..' )`
		153	`array_pop( $outSegs );`
		154	`else`
		155	`array_push( $outSegs, $seg );`
		156	`}`
		157	`$outPath = implode( '/', $outSegs );`
		158
		159	`if ($path[0] == '/') {`
		160	`$outPath = '/' . $outPath;`
		161	`}`
		162
		163	`// Compare last multi-byte character against '/'.`
		164	`if ($outPath != '/' && (core_text::strlen($path) - 1) == core_text::strrpos($path, '/', 'UTF-8')) {`
		165	`$outPath .= '/';`
		166	`}`
		167	`return $outPath;`
		168	`}`
		169
		170	`/**`
		171	`* This function parses an absolute or relative URL and splits it`
		172	`* into individual components.`
		173	`*`
		174	`* RFC3986 specifies the components of a Uniform Resource Identifier (URI).`
		175	`* A portion of the ABNFs are repeated here:`
		176	`*`
		177	`* URI-reference = URI`
		178	`* / relative-ref`
		179	`*`
		180	`* URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]`
		181	`*`
		182	`* relative-ref = relative-part [ "?" query ] [ "#" fragment ]`
		183	`*`
		184	`* hier-part = "//" authority path-abempty`
		185	`* / path-absolute`
		186	`* / path-rootless`
		187	`* / path-empty`
		188	`*`
		189	`* relative-part = "//" authority path-abempty`
		190	`* / path-absolute`
		191	`* / path-noscheme`
		192	`* / path-empty`
		193	`*`
		194	`* authority = [ userinfo "@" ] host [ ":" port ]`
		195	`*`
		196	`* So, a URL has the following major components:`
		197	`*`
		198	`* scheme`
		199	`* The name of a method used to interpret the rest of`
		200	`* the URL. Examples: "http", "https", "mailto", "file'.`
		201	`*`
		202	`* authority`
		203	`* The name of the authority governing the URL's name`
		204	`* space. Examples: "example.com", "user@example.com",`
		205	`* "example.com:80", "user:password@example.com:80".`
		206	`*`
		207	`* The authority may include a host name, port number,`
		208	`* user name, and password.`
		209	`*`
		210	`* The host may be a name, an IPv4 numeric address, or`
		211	`* an IPv6 numeric address.`
		212	`*`
		213	`* path`
		214	`* The hierarchical path to the URL's resource.`
		215	`* Examples: "/index.htm", "/scripts/page.php".`
		216	`*`
		217	`* query`
		218	`* The data for a query. Examples: "?search=google.com".`
		219	`*`
		220	`* fragment`
		221	`* The name of a secondary resource relative to that named`
		222	`* by the path. Examples: "#section1", "#header".`
		223	`*`
		224	`* An "absolute" URL must include a scheme and path. The authority, query,`
		225	`* and fragment components are optional.`
		226	`*`
		227	`* A "relative" URL does not include a scheme and must include a path. The`
		228	`* authority, query, and fragment components are optional.`
		229	`*`
		230	`* This function splits the $url argument into the following components`
		231	`* and returns them in an associative array. Keys to that array include:`
		232	`*`
		233	`* "scheme" The scheme, such as "http".`
		234	`* "host" The host name, IPv4, or IPv6 address.`
		235	`* "port" The port number.`
		236	`* "user" The user name.`
		237	`* "pass" The user password.`
		238	`* "path" The path, such as a file path for "http".`
		239	`* "query" The query.`
		240	`* "fragment" The fragment.`
		241	`*`
		242	`* One or more of these may not be present, depending upon the URL.`
		243	`*`
		244	`* Optionally, the "user", "pass", "host" (if a name, not an IP address),`
		245	`* "path", "query", and "fragment" may have percent-encoded characters`
		246	`* decoded. The "scheme" and "port" cannot include percent-encoded`
		247	`* characters and are never decoded. Decoding occurs after the URL has`
		248	`* been parsed.`
		249	`*`
		250	`* Parameters:`
		251	`* url the URL to parse.`
		252	`*`
		253	`* decode an optional boolean flag selecting whether`
		254	`* to decode percent encoding or not. Default = TRUE.`
		255	`*`
		256	`* Return values:`
		257	`* the associative array of URL parts, or FALSE if the URL is`
		258	`* too malformed to recognize any parts.`
		259	`*/`
		260	`function split_url( $url, $decode=FALSE)`
		261	`{`
		262	`// Character sets from RFC3986.`
		263	`$xunressub = 'a-zA-Z\d\-._~\!$&\'()*+,;=';`
		264	`$xpchar = $xunressub . ':@% ';`
		265
		266	`// Scheme from RFC3986.`
		267	`$xscheme = '([a-zA-Z][a-zA-Z\d+-.]*)';`
		268
		269	`// User info (user + password) from RFC3986.`
		270	`$xuserinfo = '(([' . $xunressub . '%]*)' .`
		271	`'(:([' . $xunressub . ':%]*))?)';`
		272
		273	`// IPv4 from RFC3986 (without digit constraints).`
		274	`$xipv4 = '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})';`
		275
		276	`// IPv6 from RFC2732 (without digit and grouping constraints).`
		277	`$xipv6 = '(\[([a-fA-F\d.:]+)\])';`
		278
		279	`// Host name from RFC1035. Technically, must start with a letter.`
		280	`// Relax that restriction to better parse URL structure, then`
		281	`// leave host name validation to application.`
		282	`$xhost_name = '([a-zA-Z\d\-.%]+)';`
		283
		284	`// Authority from RFC3986. Skip IP future.`
		285	`$xhost = '(' . $xhost_name . '\|' . $xipv4 . '\|' . $xipv6 . ')';`
		286	`$xport = '(\d*)';`
		287	`$xauthority = '((' . $xuserinfo . '@)?' . $xhost .`
		288	`'?(:' . $xport . ')?)';`
		289
		290	`// Path from RFC3986. Blend absolute & relative for efficiency.`
		291	`$xslash_seg = '(/[' . $xpchar . ']*)';`
		292	`$xpath_authabs = '((//' . $xauthority . ')((/[' . $xpchar . '])))';`
		293	`$xpath_rel = '([' . $xpchar . ']+' . $xslash_seg . '*)';`
		294	`$xpath_abs = '(/(' . $xpath_rel . ')?)';`
		295	`$xapath = '(' . $xpath_authabs . '\|' . $xpath_abs .`
		296	`'\|' . $xpath_rel . ')';`
		297
		298	`// Query and fragment from RFC3986.`
		299	`$xqueryfrag = '([' . $xpchar . '/?' . ']*)';`
		300
		301	`// URL.`
		302	`$xurl = '^(' . $xscheme . ':)?' . $xapath . '?' .`
		303	`'(\?' . $xqueryfrag . ')?(#' . $xqueryfrag . ')?$';`
		304
		305
		306	`// Split the URL into components.`
		307	`if ( !preg_match( '!' . $xurl . '!', $url, $m ) )`
		308	`return FALSE;`
		309
		310	`if ( !empty($m[2]) ) $parts['scheme'] = strtolower($m[2]);`
		311
		312	`if ( !empty($m[7]) ) {`
		313	`if ( isset( $m[9] ) ) $parts['user'] = $m[9];`
		314	`else $parts['user'] = '';`
		315	`}`
		316	`if ( !empty($m[10]) ) $parts['pass'] = $m[11];`
		317
		318	`if ( !empty($m[13]) ) $h=$parts['host'] = $m[13];`
		319	`else if ( !empty($m[14]) ) $parts['host'] = $m[14];`
		320	`else if ( !empty($m[16]) ) $parts['host'] = $m[16];`
		321	`else if ( !empty( $m[5] ) ) $parts['host'] = '';`
		322	`if ( !empty($m[17]) ) $parts['port'] = $m[18];`
		323
		324	`if ( !empty($m[19]) ) $parts['path'] = $m[19];`
		325	`else if ( !empty($m[21]) ) $parts['path'] = $m[21];`
		326	`else if ( !empty($m[25]) ) $parts['path'] = $m[25];`
		327
		328	`if ( !empty($m[27]) ) $parts['query'] = $m[28];`
		329	`if ( !empty($m[29]) ) $parts['fragment']= $m[30];`
		330
		331	`if ( !$decode )`
		332	`return $parts;`
		333	`if ( !empty($parts['user']) )`
		334	`$parts['user'] = rawurldecode( $parts['user'] );`
		335	`if ( !empty($parts['pass']) )`
		336	`$parts['pass'] = rawurldecode( $parts['pass'] );`
		337	`if ( !empty($parts['path']) )`
		338	`$parts['path'] = rawurldecode( $parts['path'] );`
		339	`if ( isset($h) )`
		340	`$parts['host'] = rawurldecode( $parts['host'] );`
		341	`if ( !empty($parts['query']) )`
		342	`$parts['query'] = rawurldecode( $parts['query'] );`
		343	`if ( !empty($parts['fragment']) )`
		344	`$parts['fragment'] = rawurldecode( $parts['fragment'] );`
		345	`return $parts;`
		346	`}`
		347
		348	`/**`
		349	`* This function joins together URL components to form a complete URL.`
		350	`*`
		351	`* RFC3986 specifies the components of a Uniform Resource Identifier (URI).`
		352	`* This function implements the specification's "component recomposition"`
		353	`* algorithm for combining URI components into a full URI string.`
		354	`*`
		355	`* The $parts argument is an associative array containing zero or`
		356	`* more of the following:`
		357	`*`
		358	`* "scheme" The scheme, such as "http".`
		359	`* "host" The host name, IPv4, or IPv6 address.`
		360	`* "port" The port number.`
		361	`* "user" The user name.`
		362	`* "pass" The user password.`
		363	`* "path" The path, such as a file path for "http".`
		364	`* "query" The query.`
		365	`* "fragment" The fragment.`
		366	`*`
		367	`* The "port", "user", and "pass" values are only used when a "host"`
		368	`* is present.`
		369	`*`
		370	`* The optional $encode argument indicates if appropriate URL components`
		371	`* should be percent-encoded as they are assembled into the URL. Encoding`
		372	`* is only applied to the "user", "pass", "host" (if a host name, not an`
		373	`* IP address), "path", "query", and "fragment" components. The "scheme"`
		374	`* and "port" are never encoded. When a "scheme" and "host" are both`
		375	`* present, the "path" is presumed to be hierarchical and encoding`
		376	`* processes each segment of the hierarchy separately (i.e., the slashes`
		377	`* are left alone).`
		378	`*`
		379	`* The assembled URL string is returned.`
		380	`*`
		381	`* Parameters:`
		382	`* parts an associative array of strings containing the`
		383	`* individual parts of a URL.`
		384	`*`
		385	`* encode an optional boolean flag selecting whether`
		386	`* to do percent encoding or not. Default = true.`
		387	`*`
		388	`* Return values:`
		389	`* Returns the assembled URL string. The string is an absolute`
		390	`* URL if a scheme is supplied, and a relative URL if not. An`
		391	`* empty string is returned if the $parts array does not contain`
		392	`* any of the needed values.`
		393	`*/`
		394	`function join_url( $parts, $encode=FALSE)`
		395	`{`
		396	`if ( $encode )`
		397	`{`
		398	`if ( isset( $parts['user'] ) )`
		399	`$parts['user'] = rawurlencode( $parts['user'] );`
		400	`if ( isset( $parts['pass'] ) )`
		401	`$parts['pass'] = rawurlencode( $parts['pass'] );`
		402	`if ( isset( $parts['host'] ) &&`
		403	`!preg_match( '!^(\[[\da-f.:]+\]])\|([\da-f.:]+)$!ui', $parts['host'] ) )`
		404	`$parts['host'] = rawurlencode( $parts['host'] );`
		405	`if ( !empty( $parts['path'] ) )`
		406	`$parts['path'] = preg_replace( '!%2F!ui', '/',`
		407	`rawurlencode( $parts['path'] ) );`
		408	`if ( isset( $parts['query'] ) )`
		409	`$parts['query'] = rawurlencode( $parts['query'] );`
		410	`if ( isset( $parts['fragment'] ) )`
		411	`$parts['fragment'] = rawurlencode( $parts['fragment'] );`
		412	`}`
		413
		414	`$url = '';`
		415	`if ( !empty( $parts['scheme'] ) )`
		416	`$url .= $parts['scheme'] . ':';`
		417	`if ( isset( $parts['host'] ) )`
		418	`{`
		419	`$url .= '//';`
		420	`if ( isset( $parts['user'] ) )`
		421	`{`
		422	`$url .= $parts['user'];`
		423	`if ( isset( $parts['pass'] ) )`
		424	`$url .= ':' . $parts['pass'];`
		425	`$url .= '@';`
		426	`}`
		427	`if ( preg_match( '!^[\da-f]*:[\da-f.:]+$!ui', $parts['host'] ) )`
		428	`$url .= '[' . $parts['host'] . ']'; // IPv6`
		429	`else`
		430	`$url .= $parts['host']; // IPv4 or name`
		431	`if ( isset( $parts['port'] ) )`
		432	`$url .= ':' . $parts['port'];`
		433	`if ( !empty( $parts['path'] ) && $parts['path'][0] != '/' )`
		434	`$url .= '/';`
		435	`}`
		436	`if ( !empty( $parts['path'] ) )`
		437	`$url .= $parts['path'];`
		438	`if ( isset( $parts['query'] ) )`
		439	`$url .= '?' . $parts['query'];`
		440	`if ( isset( $parts['fragment'] ) )`
		441	`$url .= '#' . $parts['fragment'];`
		442	`return $url;`
		443	`}`
		444
		445	`/**`
		446	`* This function encodes URL to form a URL which is properly`
		447	`* percent encoded to replace disallowed characters.`
		448	`*`
		449	`* RFC3986 specifies the allowed characters in the URL as well as`
		450	`* reserved characters in the URL. This function replaces all the`
		451	`* disallowed characters in the URL with their repective percent`
		452	`* encodings. Already encoded characters are not encoded again,`
		453	`* such as '%20' is not encoded to '%2520'.`
		454	`*`
		455	`* Parameters:`
		456	`* url the url to encode.`
		457	`*`
		458	`* Return values:`
		459	`* Returns the encoded URL string.`
		460	`*/`
		461	`function encode_url($url) {`
		462	`$reserved = array(`
		463	`":" => '!%3A!ui',`
		464	`"/" => '!%2F!ui',`
		465	`"?" => '!%3F!ui',`
		466	`"#" => '!%23!ui',`
		467	`"[" => '!%5B!ui',`
		468	`"]" => '!%5D!ui',`
		469	`"@" => '!%40!ui',`
		470	`"!" => '!%21!ui',`
		471	`"$" => '!%24!ui',`
		472	`"&" => '!%26!ui',`
		473	`"'" => '!%27!ui',`
		474	`"(" => '!%28!ui',`
		475	`")" => '!%29!ui',`
		476	`"*" => '!%2A!ui',`
		477	`"+" => '!%2B!ui',`
		478	`"," => '!%2C!ui',`
		479	`";" => '!%3B!ui',`
		480	`"=" => '!%3D!ui',`
		481	`"%" => '!%25!ui',`
		482	`);`
		483
		484	`$url = rawurlencode($url);`
		485	`$url = preg_replace(array_values($reserved), array_keys($reserved), $url);`
		486	`return $url;`
		487	`}`
		488
		489	`/**`
		490	`* Extract URLs from a web page.`
		491	`*`
		492	`* URLs are extracted from a long list of tags and attributes as defined`
		493	`* by the HTML 2.0, HTML 3.2, HTML 4.01, and draft HTML 5.0 specifications.`
		494	`* URLs are also extracted from tags and attributes that are common`
		495	`* extensions of HTML, from the draft Forms 2.0 specification, from XHTML,`
		496	`* and from WML 1.3 and 2.0.`
		497	`*`
		498	`* The function returns an associative array of associative arrays of`
		499	`* arrays of URLs. The outermost array's keys are the tag (element) name,`
		500	`* such as "a" for <a> or "img" for <img>. The values for these entries`
		501	`* are associative arrays where the keys are attribute names for those`
		502	`* tags, such as "href" for <a href="...">. Finally, the values for`
		503	`* those arrays are URLs found in those tags and attributes throughout`
		504	`* the text.`
		505	`*`
		506	`* Parameters:`
		507	`* text the UTF-8 text to scan`
		508	`*`
		509	`* Return values:`
		510	`* an associative array where keys are tags and values are an`
		511	`* associative array where keys are attributes and values are`
		512	`* an array of URLs.`
		513	`*`
		514	`* See:`
		515	`* http://nadeausoftware.com/articles/2008/01/php_tip_how_extract_urls_web_page`
		516	`*/`
		517	`function extract_html_urls( $text )`
		518	`{`
		519	`$match_elements = array(`
		520	`// HTML`
		521	`array('element'=>'a', 'attribute'=>'href'), // 2.0`
		522	`array('element'=>'a', 'attribute'=>'urn'), // 2.0`
		523	`array('element'=>'base', 'attribute'=>'href'), // 2.0`
		524	`array('element'=>'form', 'attribute'=>'action'), // 2.0`
		525	`array('element'=>'img', 'attribute'=>'src'), // 2.0`
		526	`array('element'=>'link', 'attribute'=>'href'), // 2.0`
		527
		528	`array('element'=>'applet', 'attribute'=>'code'), // 3.2`
		529	`array('element'=>'applet', 'attribute'=>'codebase'), // 3.2`
		530	`array('element'=>'area', 'attribute'=>'href'), // 3.2`
		531	`array('element'=>'body', 'attribute'=>'background'), // 3.2`
		532	`array('element'=>'img', 'attribute'=>'usemap'), // 3.2`
		533	`array('element'=>'input', 'attribute'=>'src'), // 3.2`
		534
		535	`array('element'=>'applet', 'attribute'=>'archive'), // 4.01`
		536	`array('element'=>'applet', 'attribute'=>'object'), // 4.01`
		537	`array('element'=>'blockquote', 'attribute'=>'cite'), // 4.01`
		538	`array('element'=>'del', 'attribute'=>'cite'), // 4.01`
		539	`array('element'=>'frame', 'attribute'=>'longdesc'), // 4.01`
		540	`array('element'=>'frame', 'attribute'=>'src'), // 4.01`
		541	`array('element'=>'head', 'attribute'=>'profile'), // 4.01`
		542	`array('element'=>'iframe', 'attribute'=>'longdesc'), // 4.01`
		543	`array('element'=>'iframe', 'attribute'=>'src'), // 4.01`
		544	`array('element'=>'img', 'attribute'=>'longdesc'), // 4.01`
		545	`array('element'=>'input', 'attribute'=>'usemap'), // 4.01`
		546	`array('element'=>'ins', 'attribute'=>'cite'), // 4.01`
		547	`array('element'=>'object', 'attribute'=>'archive'), // 4.01`
		548	`array('element'=>'object', 'attribute'=>'classid'), // 4.01`
		549	`array('element'=>'object', 'attribute'=>'codebase'), // 4.01`
		550	`array('element'=>'object', 'attribute'=>'data'), // 4.01`
		551	`array('element'=>'object', 'attribute'=>'usemap'), // 4.01`
		552	`array('element'=>'q', 'attribute'=>'cite'), // 4.01`
		553	`array('element'=>'script', 'attribute'=>'src'), // 4.01`
		554
		555	`array('element'=>'audio', 'attribute'=>'src'), // 5.0`
		556	`array('element'=>'command', 'attribute'=>'icon'), // 5.0`
		557	`array('element'=>'embed', 'attribute'=>'src'), // 5.0`
		558	`array('element'=>'event-source','attribute'=>'src'), // 5.0`
		559	`array('element'=>'html', 'attribute'=>'manifest'), // 5.0`
		560	`array('element'=>'source', 'attribute'=>'src'), // 5.0`
		561	`array('element'=>'video', 'attribute'=>'src'), // 5.0`
		562	`array('element'=>'video', 'attribute'=>'poster'), // 5.0`
		563
		564	`array('element'=>'bgsound', 'attribute'=>'src'), // Extension`
		565	`array('element'=>'body', 'attribute'=>'credits'), // Extension`
		566	`array('element'=>'body', 'attribute'=>'instructions'), // Extension`
		567	`array('element'=>'body', 'attribute'=>'logo'), // Extension`
		568	`array('element'=>'div', 'attribute'=>'href'), // Extension`
		569	`array('element'=>'div', 'attribute'=>'src'), // Extension`
		570	`array('element'=>'embed', 'attribute'=>'code'), // Extension`
		571	`array('element'=>'embed', 'attribute'=>'pluginspage'), // Extension`
		572	`array('element'=>'html', 'attribute'=>'background'), // Extension`
		573	`array('element'=>'ilayer', 'attribute'=>'src'), // Extension`
		574	`array('element'=>'img', 'attribute'=>'dynsrc'), // Extension`
		575	`array('element'=>'img', 'attribute'=>'lowsrc'), // Extension`
		576	`array('element'=>'input', 'attribute'=>'dynsrc'), // Extension`
		577	`array('element'=>'input', 'attribute'=>'lowsrc'), // Extension`
		578	`array('element'=>'table', 'attribute'=>'background'), // Extension`
		579	`array('element'=>'td', 'attribute'=>'background'), // Extension`
		580	`array('element'=>'th', 'attribute'=>'background'), // Extension`
		581	`array('element'=>'layer', 'attribute'=>'src'), // Extension`
		582	`array('element'=>'xml', 'attribute'=>'src'), // Extension`
		583
		584	`array('element'=>'button', 'attribute'=>'action'), // Forms 2.0`
		585	`array('element'=>'datalist', 'attribute'=>'data'), // Forms 2.0`
		586	`array('element'=>'form', 'attribute'=>'data'), // Forms 2.0`
		587	`array('element'=>'input', 'attribute'=>'action'), // Forms 2.0`
		588	`array('element'=>'select', 'attribute'=>'data'), // Forms 2.0`
		589
		590	`// XHTML`
		591	`array('element'=>'html', 'attribute'=>'xmlns'),`
		592
		593	`// WML`
		594	`array('element'=>'access', 'attribute'=>'path'), // 1.3`
		595	`array('element'=>'card', 'attribute'=>'onenterforward'), // 1.3`
		596	`array('element'=>'card', 'attribute'=>'onenterbackward'),// 1.3`
		597	`array('element'=>'card', 'attribute'=>'ontimer'), // 1.3`
		598	`array('element'=>'go', 'attribute'=>'href'), // 1.3`
		599	`array('element'=>'option', 'attribute'=>'onpick'), // 1.3`
		600	`array('element'=>'template', 'attribute'=>'onenterforward'), // 1.3`
		601	`array('element'=>'template', 'attribute'=>'onenterbackward'),// 1.3`
		602	`array('element'=>'template', 'attribute'=>'ontimer'), // 1.3`
		603	`array('element'=>'wml', 'attribute'=>'xmlns'), // 2.0`
		604	`);`
		605
		606	`$match_metas = array(`
		607	`'content-base',`
		608	`'content-location',`
		609	`'referer',`
		610	`'location',`
		611	`'refresh',`
		612	`);`
		613
		614	`// Extract all elements`
		615	`if ( !preg_match_all( '/<([a-z][^>]*)>/iu', $text, $matches ) )`
		616	`return array( );`
		617	`$elements = $matches[1];`
		618	`$value_pattern = '=(("([^"])")\|([^\s]))';`
		619
		620	`// Match elements and attributes`
		621	`foreach ( $match_elements as $match_element )`
		622	`{`
		623	`$name = $match_element['element'];`
		624	`$attr = $match_element['attribute'];`
		625	`$pattern = '/^' . $name . '\s.*' . $attr . $value_pattern . '/iu';`
		626	`if ( $name == 'object' )`
		627	`$split_pattern = '/\s*/u'; // Space-separated URL list`
		628	`else if ( $name == 'archive' )`
		629	`$split_pattern = '/,\s*/u'; // Comma-separated URL list`
		630	`else`
		631	`unset( $split_pattern ); // Single URL`
		632	`foreach ( $elements as $element )`
		633	`{`
		634	`if ( !preg_match( $pattern, $element, $match ) )`
		635	`continue;`
		636	`$m = empty($match[3]) ? (!empty($match[4])?$match[4]:'') : $match[3];`
		637	`if ( !isset( $split_pattern ) )`
		638	`$urls[$name][$attr][] = $m;`
		639	`else`
		640	`{`
		641	`$msplit = preg_split( $split_pattern, $m );`
		642	`foreach ( $msplit as $ms )`
		643	`$urls[$name][$attr][] = $ms;`
		644	`}`
		645	`}`
		646	`}`
		647
		648	`// Match meta http-equiv elements`
		649	`foreach ( $match_metas as $match_meta )`
		650	`{`
		651	`$attr_pattern = '/http-equiv="?' . $match_meta . '"?/iu';`
		652	`$content_pattern = '/content' . $value_pattern . '/iu';`
		653	`$refresh_pattern = '/\d;\s(url=)?(.*)$/iu';`
		654	`foreach ( $elements as $element )`
		655	`{`
		656	`if ( !preg_match( '/^meta/iu', $element ) \|\|`
		657	`!preg_match( $attr_pattern, $element ) \|\|`
		658	`!preg_match( $content_pattern, $element, $match ) )`
		659	`continue;`
		660	`$m = empty($match[3]) ? $match[4] : $match[3];`
		661	`if ( $match_meta != 'refresh' )`
		662	`$urls['meta']['http-equiv'][] = $m;`
		663	`else if ( preg_match( $refresh_pattern, $m, $match ) )`
		664	`$urls['meta']['http-equiv'][] = $match[2];`
		665	`}`
		666	`}`
		667
		668	`// Match style attributes`
		669	`$urls['style'] = array( );`
		670	`$style_pattern = '/style' . $value_pattern . '/iu';`
		671	`foreach ( $elements as $element )`
		672	`{`
		673	`if ( !preg_match( $style_pattern, $element, $match ) )`
		674	`continue;`
		675	`$m = empty($match[3]) ? $match[4] : $match[3];`
		676	`$style_urls = extract_css_urls( $m );`
		677	`if ( !empty( $style_urls ) )`
		678	`$urls['style'] = array_merge_recursive(`
		679	`$urls['style'], $style_urls );`
		680	`}`
		681
		682	`// Match style bodies`
		683	`if ( preg_match_all( '/<style[^>]>(.?)<\/style>/siu', $text, $style_bodies ) )`
		684	`{`
		685	`foreach ( $style_bodies[1] as $style_body )`
		686	`{`
		687	`$style_urls = extract_css_urls( $style_body );`
		688	`if ( !empty( $style_urls ) )`
		689	`$urls['style'] = array_merge_recursive(`
		690	`$urls['style'], $style_urls );`
		691	`}`
		692	`}`
		693	`if ( empty($urls['style']) )`
		694	`unset( $urls['style'] );`
		695
		696	`return $urls;`
		697	`}`
		698	`/**`
		699	`* Extract URLs from UTF-8 CSS text.`
		700	`*`
		701	`* URLs within @import statements and url() property functions are extracted`
		702	`* and returned in an associative array of arrays. Array keys indicate`
		703	`* the use context for the URL, including:`
		704	`*`
		705	`* "import"`
		706	`* "property"`
		707	`*`
		708	`* Each value in the associative array is an array of URLs.`
		709	`*`
		710	`* Parameters:`
		711	`* text the UTF-8 text to scan`
		712	`*`
		713	`* Return values:`
		714	`* an associative array of arrays of URLs.`
		715	`*`
		716	`* See:`
		717	`* http://nadeausoftware.com/articles/2008/01/php_tip_how_extract_urls_css_file`
		718	`*/`
		719	`function extract_css_urls( $text )`
		720	`{`
		721	`$urls = array( );`
		722
		723	`$url_pattern = '(([^\\\\\'", ]*(\\\\.)?)+)';`
		724	`$urlfunc_pattern = 'url$\s[\'"]?' . $url_pattern . '[\'"]?\s$';`
		725	`$pattern = '/(' .`
		726	`'(@import\s*[\'"]' . $url_pattern . '[\'"])' .`
		727	`'\|(@import\s*' . $urlfunc_pattern . ')' .`
		728	`'\|(' . $urlfunc_pattern . ')' . ')/iu';`
		729	`if ( !preg_match_all( $pattern, $text, $matches ) )`
		730	`return $urls;`
		731
		732	`// @import '...'`
		733	`// @import "..."`
		734	`foreach ( $matches[3] as $match )`
		735	`if ( !empty($match) )`
		736	`$urls['import'][] =`
		737	`preg_replace( '/\\\\(.)/u', '\\1', $match );`
		738
		739	`// @import url(...)`
		740	`// @import url('...')`
		741	`// @import url("...")`
		742	`foreach ( $matches[7] as $match )`
		743	`if ( !empty($match) )`
		744	`$urls['import'][] =`
		745	`preg_replace( '/\\\\(.)/u', '\\1', $match );`
		746
		747	`// url(...)`
		748	`// url('...')`
		749	`// url("...")`
		750	`foreach ( $matches[11] as $match )`
		751	`if ( !empty($match) )`
		752	`$urls['property'][] =`
		753	`preg_replace( '/\\\\(.)/u', '\\1', $match );`
		754
		755	`return $urls;`
		756	`}`

Proyectos de Subversion Moodle

(root)/repository/url/locallib.php – Rev 1