| 1 | 
           efrain | 
           1 | 
           <?php
  | 
        
        
            | 
            | 
           2 | 
              | 
        
        
            | 
            | 
           3 | 
           /**
  | 
        
        
            | 
            | 
           4 | 
            * Validates a host according to the IPv4, IPv6 and DNS (future) specifications.
  | 
        
        
            | 
            | 
           5 | 
            */
  | 
        
        
            | 
            | 
           6 | 
           class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
  | 
        
        
            | 
            | 
           7 | 
           {
  | 
        
        
            | 
            | 
           8 | 
              | 
        
        
            | 
            | 
           9 | 
               /**
  | 
        
        
            | 
            | 
           10 | 
                * IPv4 sub-validator.
  | 
        
        
            | 
            | 
           11 | 
                * @type HTMLPurifier_AttrDef_URI_IPv4
  | 
        
        
            | 
            | 
           12 | 
                */
  | 
        
        
            | 
            | 
           13 | 
               protected $ipv4;
  | 
        
        
            | 
            | 
           14 | 
              | 
        
        
            | 
            | 
           15 | 
               /**
  | 
        
        
            | 
            | 
           16 | 
                * IPv6 sub-validator.
  | 
        
        
            | 
            | 
           17 | 
                * @type HTMLPurifier_AttrDef_URI_IPv6
  | 
        
        
            | 
            | 
           18 | 
                */
  | 
        
        
            | 
            | 
           19 | 
               protected $ipv6;
  | 
        
        
            | 
            | 
           20 | 
              | 
        
        
            | 
            | 
           21 | 
               public function __construct()
  | 
        
        
            | 
            | 
           22 | 
               {
  | 
        
        
            | 
            | 
           23 | 
                   $this->ipv4 = new HTMLPurifier_AttrDef_URI_IPv4();
  | 
        
        
            | 
            | 
           24 | 
                   $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6();
  | 
        
        
            | 
            | 
           25 | 
               }
  | 
        
        
            | 
            | 
           26 | 
              | 
        
        
            | 
            | 
           27 | 
               /**
  | 
        
        
            | 
            | 
           28 | 
                * @param string $string
  | 
        
        
            | 
            | 
           29 | 
                * @param HTMLPurifier_Config $config
  | 
        
        
            | 
            | 
           30 | 
                * @param HTMLPurifier_Context $context
  | 
        
        
            | 
            | 
           31 | 
                * @return bool|string
  | 
        
        
            | 
            | 
           32 | 
                */
  | 
        
        
            | 
            | 
           33 | 
               public function validate($string, $config, $context)
  | 
        
        
            | 
            | 
           34 | 
               {
  | 
        
        
            | 
            | 
           35 | 
                   $length = strlen($string);
  | 
        
        
            | 
            | 
           36 | 
                   // empty hostname is OK; it's usually semantically equivalent:
  | 
        
        
            | 
            | 
           37 | 
                   // the default host as defined by a URI scheme is used:
  | 
        
        
            | 
            | 
           38 | 
                   //
  | 
        
        
            | 
            | 
           39 | 
                   //      If the URI scheme defines a default for host, then that
  | 
        
        
            | 
            | 
           40 | 
                   //      default applies when the host subcomponent is undefined
  | 
        
        
            | 
            | 
           41 | 
                   //      or when the registered name is empty (zero length).
  | 
        
        
            | 
            | 
           42 | 
                   if ($string === '') {
  | 
        
        
            | 
            | 
           43 | 
                       return '';
  | 
        
        
            | 
            | 
           44 | 
                   }
  | 
        
        
            | 
            | 
           45 | 
                   if ($length > 1 && $string[0] === '[' && $string[$length - 1] === ']') {
  | 
        
        
            | 
            | 
           46 | 
                       //IPv6
  | 
        
        
            | 
            | 
           47 | 
                       $ip = substr($string, 1, $length - 2);
  | 
        
        
            | 
            | 
           48 | 
                       $valid = $this->ipv6->validate($ip, $config, $context);
  | 
        
        
            | 
            | 
           49 | 
                       if ($valid === false) {
  | 
        
        
            | 
            | 
           50 | 
                           return false;
  | 
        
        
            | 
            | 
           51 | 
                       }
  | 
        
        
            | 
            | 
           52 | 
                       return '[' . $valid . ']';
  | 
        
        
            | 
            | 
           53 | 
                   }
  | 
        
        
            | 
            | 
           54 | 
              | 
        
        
            | 
            | 
           55 | 
                   // need to do checks on unusual encodings too
  | 
        
        
            | 
            | 
           56 | 
                   $ipv4 = $this->ipv4->validate($string, $config, $context);
  | 
        
        
            | 
            | 
           57 | 
                   if ($ipv4 !== false) {
  | 
        
        
            | 
            | 
           58 | 
                       return $ipv4;
  | 
        
        
            | 
            | 
           59 | 
                   }
  | 
        
        
            | 
            | 
           60 | 
              | 
        
        
            | 
            | 
           61 | 
                   // A regular domain name.
  | 
        
        
            | 
            | 
           62 | 
              | 
        
        
            | 
            | 
           63 | 
                   // This doesn't match I18N domain names, but we don't have proper IRI support,
  | 
        
        
            | 
            | 
           64 | 
                   // so force users to insert Punycode.
  | 
        
        
            | 
            | 
           65 | 
              | 
        
        
           | 1441 | 
           ariadna | 
           66 | 
                   // Underscores defined as Unreserved Characters in RFC 3986 are
  | 
        
        
            | 
            | 
           67 | 
                   // allowed in a URI. There are cases where we want to consider a
  | 
        
        
            | 
            | 
           68 | 
                   // URI containing "_" such as "_dmarc.example.com".
  | 
        
        
            | 
            | 
           69 | 
                   // Underscores are not allowed in the default. If you want to
  | 
        
        
            | 
            | 
           70 | 
                   // allow it, set Core.AllowHostnameUnderscore to true.
  | 
        
        
           | 1 | 
           efrain | 
           71 | 
                   $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
  | 
        
        
            | 
            | 
           72 | 
              | 
        
        
            | 
            | 
           73 | 
                   // Based off of RFC 1738, but amended so that
  | 
        
        
            | 
            | 
           74 | 
                   // as per RFC 3696, the top label need only not be all numeric.
  | 
        
        
            | 
            | 
           75 | 
                   // The productions describing this are:
  | 
        
        
            | 
            | 
           76 | 
                   $a   = '[a-z]';     // alpha
  | 
        
        
           | 1441 | 
           ariadna | 
           77 | 
                   $an  = "[a-z0-9$underscore]";  // alphanum
  | 
        
        
           | 1 | 
           efrain | 
           78 | 
                   $and = "[a-z0-9-$underscore]"; // alphanum | "-"
  | 
        
        
            | 
            | 
           79 | 
                   // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
  | 
        
        
            | 
            | 
           80 | 
                   $domainlabel = "$an(?:$and*$an)?";
  | 
        
        
            | 
            | 
           81 | 
                   // AMENDED as per RFC 3696
  | 
        
        
            | 
            | 
           82 | 
                   // toplabel    = alphanum | alphanum *( alphanum | "-" ) alphanum
  | 
        
        
            | 
            | 
           83 | 
                   //      side condition: not all numeric
  | 
        
        
            | 
            | 
           84 | 
                   $toplabel = "$an(?:$and*$an)?";
  | 
        
        
            | 
            | 
           85 | 
                   // hostname    = *( domainlabel "." ) toplabel [ "." ]
  | 
        
        
            | 
            | 
           86 | 
                   if (preg_match("/^(?:$domainlabel\.)*($toplabel)\.?$/i", $string, $matches)) {
  | 
        
        
            | 
            | 
           87 | 
                       if (!ctype_digit($matches[1])) {
  | 
        
        
            | 
            | 
           88 | 
                           return $string;
  | 
        
        
            | 
            | 
           89 | 
                       }
  | 
        
        
            | 
            | 
           90 | 
                   }
  | 
        
        
            | 
            | 
           91 | 
              | 
        
        
            | 
            | 
           92 | 
                   // PHP 5.3 and later support this functionality natively
  | 
        
        
            | 
            | 
           93 | 
                   if (function_exists('idn_to_ascii')) {
  | 
        
        
            | 
            | 
           94 | 
                       if (defined('IDNA_NONTRANSITIONAL_TO_ASCII') && defined('INTL_IDNA_VARIANT_UTS46')) {
  | 
        
        
            | 
            | 
           95 | 
                           $string = idn_to_ascii($string, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46);
  | 
        
        
            | 
            | 
           96 | 
                       } else {
  | 
        
        
            | 
            | 
           97 | 
                           $string = idn_to_ascii($string);
  | 
        
        
            | 
            | 
           98 | 
                       }
  | 
        
        
            | 
            | 
           99 | 
              | 
        
        
            | 
            | 
           100 | 
                   // If we have Net_IDNA2 support, we can support IRIs by
  | 
        
        
            | 
            | 
           101 | 
                   // punycoding them. (This is the most portable thing to do,
  | 
        
        
            | 
            | 
           102 | 
                   // since otherwise we have to assume browsers support
  | 
        
        
            | 
            | 
           103 | 
                   } elseif ($config->get('Core.EnableIDNA') && class_exists('Net_IDNA2')) {
  | 
        
        
            | 
            | 
           104 | 
                       $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
  | 
        
        
            | 
            | 
           105 | 
                       // we need to encode each period separately
  | 
        
        
            | 
            | 
           106 | 
                       $parts = explode('.', $string);
  | 
        
        
            | 
            | 
           107 | 
                       try {
  | 
        
        
            | 
            | 
           108 | 
                           $new_parts = array();
  | 
        
        
            | 
            | 
           109 | 
                           foreach ($parts as $part) {
  | 
        
        
            | 
            | 
           110 | 
                               $encodable = false;
  | 
        
        
            | 
            | 
           111 | 
                               for ($i = 0, $c = strlen($part); $i < $c; $i++) {
  | 
        
        
            | 
            | 
           112 | 
                                   if (ord($part[$i]) > 0x7a) {
  | 
        
        
            | 
            | 
           113 | 
                                       $encodable = true;
  | 
        
        
            | 
            | 
           114 | 
                                       break;
  | 
        
        
            | 
            | 
           115 | 
                                   }
  | 
        
        
            | 
            | 
           116 | 
                               }
  | 
        
        
            | 
            | 
           117 | 
                               if (!$encodable) {
  | 
        
        
            | 
            | 
           118 | 
                                   $new_parts[] = $part;
  | 
        
        
            | 
            | 
           119 | 
                               } else {
  | 
        
        
            | 
            | 
           120 | 
                                   $new_parts[] = $idna->encode($part);
  | 
        
        
            | 
            | 
           121 | 
                               }
  | 
        
        
            | 
            | 
           122 | 
                           }
  | 
        
        
            | 
            | 
           123 | 
                           $string = implode('.', $new_parts);
  | 
        
        
            | 
            | 
           124 | 
                       } catch (Exception $e) {
  | 
        
        
            | 
            | 
           125 | 
                           // XXX error reporting
  | 
        
        
            | 
            | 
           126 | 
                       }
  | 
        
        
            | 
            | 
           127 | 
                   }
  | 
        
        
            | 
            | 
           128 | 
                   // Try again
  | 
        
        
            | 
            | 
           129 | 
                   if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
  | 
        
        
            | 
            | 
           130 | 
                       return $string;
  | 
        
        
            | 
            | 
           131 | 
                   }
  | 
        
        
            | 
            | 
           132 | 
                   return false;
  | 
        
        
            | 
            | 
           133 | 
               }
  | 
        
        
            | 
            | 
           134 | 
           }
  | 
        
        
            | 
            | 
           135 | 
              | 
        
        
            | 
            | 
           136 | 
           // vim: et sw=4 sts=4
  |