AutorÃa | Ultima modificación | Ver Log |
<?php/*** HTML Purifier's internal representation of a URI.* @note* Internal data-structures are completely escaped. If the data needs* to be used in a non-URI context (which is very unlikely), be sure* to decode it first. The URI may not necessarily be well-formed until* validate() is called.*/class HTMLPurifier_URI{/*** @type string*/public $scheme;/*** @type string*/public $userinfo;/*** @type string*/public $host;/*** @type int*/public $port;/*** @type string*/public $path;/*** @type string*/public $query;/*** @type string*/public $fragment;/*** @param string $scheme* @param string $userinfo* @param string $host* @param int $port* @param string $path* @param string $query* @param string $fragment* @note Automatically normalizes scheme and port*/public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment){$this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);$this->userinfo = $userinfo;$this->host = $host;$this->port = is_null($port) ? $port : (int)$port;$this->path = $path;$this->query = $query;$this->fragment = $fragment;}/*** Retrieves a scheme object corresponding to the URI's scheme/default* @param HTMLPurifier_Config $config* @param HTMLPurifier_Context $context* @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI*/public function getSchemeObj($config, $context){$registry = HTMLPurifier_URISchemeRegistry::instance();if ($this->scheme !== null) {$scheme_obj = $registry->getScheme($this->scheme, $config, $context);if (!$scheme_obj) {return false;} // invalid scheme, clean it out} else {// no scheme: retrieve the default one$def = $config->getDefinition('URI');$scheme_obj = $def->getDefaultScheme($config, $context);if (!$scheme_obj) {if ($def->defaultScheme !== null) {// something funky happened to the default scheme objecttrigger_error('Default scheme object "' . $def->defaultScheme . '" was not readable',E_USER_WARNING);} // suppress error if it's nullreturn false;}}return $scheme_obj;}/*** Generic validation method applicable for all schemes. May modify* this URI in order to get it into a compliant form.* @param HTMLPurifier_Config $config* @param HTMLPurifier_Context $context* @return bool True if validation/filtering succeeds, false if failure*/public function validate($config, $context){// ABNF definitions from RFC 3986$chars_sub_delims = '!$&\'()*+,;=';$chars_gen_delims = ':/?#[]@';$chars_pchar = $chars_sub_delims . ':@';// validate hostif (!is_null($this->host)) {$host_def = new HTMLPurifier_AttrDef_URI_Host();$this->host = $host_def->validate($this->host, $config, $context);if ($this->host === false) {$this->host = null;}}// validate scheme// NOTE: It's not appropriate to check whether or not this// scheme is in our registry, since a URIFilter may convert a// URI that we don't allow into one we do. So instead, we just// check if the scheme can be dropped because there is no host// and it is our default scheme.if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {// support for relative paths is pretty abysmal when the// scheme is present, so axe it when possible$def = $config->getDefinition('URI');if ($def->defaultScheme === $this->scheme) {$this->scheme = null;}}// validate usernameif (!is_null($this->userinfo)) {$encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');$this->userinfo = $encoder->encode($this->userinfo);}// validate portif (!is_null($this->port)) {if ($this->port < 1 || $this->port > 65535) {$this->port = null;}}// validate path$segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');if (!is_null($this->host)) { // this catches $this->host === ''// path-abempty (hier and relative)// http://www.example.com/my/path// //www.example.com/my/path (looks odd, but works, and// recognized by most browsers)// (this set is valid or invalid on a scheme by scheme// basis, so we'll deal with it later)// file:///my/path// ///my/path$this->path = $segments_encoder->encode($this->path);} elseif ($this->path !== '') {if ($this->path[0] === '/') {// path-absolute (hier and relative)// http:/my/path// /my/pathif (strlen($this->path) >= 2 && $this->path[1] === '/') {// This could happen if both the host gets stripped// out// http://my/path// //my/path$this->path = '';} else {$this->path = $segments_encoder->encode($this->path);}} elseif (!is_null($this->scheme)) {// path-rootless (hier)// http:my/path// Short circuit evaluation means we don't need to check nz$this->path = $segments_encoder->encode($this->path);} else {// path-noscheme (relative)// my/path// (once again, not checking nz)$segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');$c = strpos($this->path, '/');if ($c !== false) {$this->path =$segment_nc_encoder->encode(substr($this->path, 0, $c)) .$segments_encoder->encode(substr($this->path, $c));} else {$this->path = $segment_nc_encoder->encode($this->path);}}} else {// path-empty (hier and relative)$this->path = ''; // just to be safe}// qf = query and fragment$qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');if (!is_null($this->query)) {$this->query = $qf_encoder->encode($this->query);}if (!is_null($this->fragment)) {$this->fragment = $qf_encoder->encode($this->fragment);}return true;}/*** Convert URI back to string* @return string URI appropriate for output*/public function toString(){// reconstruct authority$authority = null;// there is a rendering difference between a null authority// (http:foo-bar) and an empty string authority// (http:///foo-bar).if (!is_null($this->host)) {$authority = '';if (!is_null($this->userinfo)) {$authority .= $this->userinfo . '@';}$authority .= $this->host;if (!is_null($this->port)) {$authority .= ':' . $this->port;}}// Reconstruct the result// One might wonder about parsing quirks from browsers after// this reconstruction. Unfortunately, parsing behavior depends// on what *scheme* was employed (file:///foo is handled *very*// differently than http:///foo), so unfortunately we have to// defer to the schemes to do the right thing.$result = '';if (!is_null($this->scheme)) {$result .= $this->scheme . ':';}if (!is_null($authority)) {$result .= '//' . $authority;}$result .= $this->path;if (!is_null($this->query)) {$result .= '?' . $this->query;}if (!is_null($this->fragment)) {$result .= '#' . $this->fragment;}return $result;}/*** Returns true if this URL might be considered a 'local' URL given* the current context. This is true when the host is null, or* when it matches the host supplied to the configuration.** Note that this does not do any scheme checking, so it is mostly* only appropriate for metadata that doesn't care about protocol* security. isBenign is probably what you actually want.* @param HTMLPurifier_Config $config* @param HTMLPurifier_Context $context* @return bool*/public function isLocal($config, $context){if ($this->host === null) {return true;}$uri_def = $config->getDefinition('URI');if ($uri_def->host === $this->host) {return true;}return false;}/*** Returns true if this URL should be considered a 'benign' URL,* that is:** - It is a local URL (isLocal), and* - It has a equal or better level of security* @param HTMLPurifier_Config $config* @param HTMLPurifier_Context $context* @return bool*/public function isBenign($config, $context){if (!$this->isLocal($config, $context)) {return false;}$scheme_obj = $this->getSchemeObj($config, $context);if (!$scheme_obj) {return false;} // conservative approach$current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);if ($current_scheme_obj->secure) {if (!$scheme_obj->secure) {return false;}}return true;}}// vim: et sw=4 sts=4