1 |
efrain |
1 |
<?php
|
|
|
2 |
|
|
|
3 |
/**
|
|
|
4 |
* Injector that converts http, https and ftp text URLs to actual links.
|
|
|
5 |
*/
|
|
|
6 |
class HTMLPurifier_Injector_Linkify extends HTMLPurifier_Injector
|
|
|
7 |
{
|
|
|
8 |
/**
|
|
|
9 |
* @type string
|
|
|
10 |
*/
|
|
|
11 |
public $name = 'Linkify';
|
|
|
12 |
|
|
|
13 |
/**
|
|
|
14 |
* @type array
|
|
|
15 |
*/
|
|
|
16 |
public $needed = array('a' => array('href'));
|
|
|
17 |
|
|
|
18 |
/**
|
|
|
19 |
* @param HTMLPurifier_Token $token
|
|
|
20 |
*/
|
|
|
21 |
public function handleText(&$token)
|
|
|
22 |
{
|
|
|
23 |
if (!$this->allowsElement('a')) {
|
|
|
24 |
return;
|
|
|
25 |
}
|
|
|
26 |
|
|
|
27 |
if (strpos($token->data, '://') === false) {
|
|
|
28 |
// our really quick heuristic failed, abort
|
|
|
29 |
// this may not work so well if we want to match things like
|
|
|
30 |
// "google.com", but then again, most people don't
|
|
|
31 |
return;
|
|
|
32 |
}
|
|
|
33 |
|
|
|
34 |
// there is/are URL(s). Let's split the string.
|
|
|
35 |
// We use this regex:
|
|
|
36 |
// https://gist.github.com/gruber/249502
|
|
|
37 |
// but with @cscott's backtracking fix and also
|
|
|
38 |
// the Unicode characters un-Unicodified.
|
|
|
39 |
$bits = preg_split(
|
|
|
40 |
'/\\b((?:[a-z][\\w\\-]+:(?:\\/{1,3}|[a-z0-9%])|www\\d{0,3}[.]|[a-z0-9.\\-]+[.][a-z]{2,4}\\/)(?:[^\\s()<>]|\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\))+(?:\\((?:[^\\s()<>]|(?:\\([^\\s()<>]+\\)))*\\)|[^\\s`!()\\[\\]{};:\'".,<>?\x{00ab}\x{00bb}\x{201c}\x{201d}\x{2018}\x{2019}]))/iu',
|
|
|
41 |
$token->data, -1, PREG_SPLIT_DELIM_CAPTURE);
|
|
|
42 |
|
|
|
43 |
if ($bits === false) {
|
|
|
44 |
return;
|
|
|
45 |
}
|
|
|
46 |
|
|
|
47 |
$token = array();
|
|
|
48 |
|
|
|
49 |
// $i = index
|
|
|
50 |
// $c = count
|
|
|
51 |
// $l = is link
|
|
|
52 |
for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) {
|
|
|
53 |
if (!$l) {
|
|
|
54 |
if ($bits[$i] === '') {
|
|
|
55 |
continue;
|
|
|
56 |
}
|
|
|
57 |
$token[] = new HTMLPurifier_Token_Text($bits[$i]);
|
|
|
58 |
} else {
|
|
|
59 |
$token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i]));
|
|
|
60 |
$token[] = new HTMLPurifier_Token_Text($bits[$i]);
|
|
|
61 |
$token[] = new HTMLPurifier_Token_End('a');
|
|
|
62 |
}
|
|
|
63 |
}
|
|
|
64 |
}
|
|
|
65 |
}
|
|
|
66 |
|
|
|
67 |
// vim: et sw=4 sts=4
|