1 |
efrain |
1 |
<?php
|
|
|
2 |
// This file is part of Moodle - http://moodle.org/
|
|
|
3 |
//
|
|
|
4 |
// Moodle is free software: you can redistribute it and/or modify
|
|
|
5 |
// it under the terms of the GNU General Public License as published by
|
|
|
6 |
// the Free Software Foundation, either version 3 of the License, or
|
|
|
7 |
// (at your option) any later version.
|
|
|
8 |
//
|
|
|
9 |
// Moodle is distributed in the hope that it will be useful,
|
|
|
10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
12 |
// GNU General Public License for more details.
|
|
|
13 |
//
|
|
|
14 |
// You should have received a copy of the GNU General Public License
|
|
|
15 |
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
|
|
|
16 |
|
|
|
17 |
namespace core;
|
|
|
18 |
|
|
|
19 |
/**
|
|
|
20 |
* Tests our html2text hacks
|
|
|
21 |
*
|
|
|
22 |
* Note: includes original tests from testweblib.php
|
|
|
23 |
*
|
|
|
24 |
* @package core
|
|
|
25 |
* @category test
|
|
|
26 |
* @copyright 2012 Petr Skoda {@link http://skodak.org}
|
|
|
27 |
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
|
28 |
*/
|
|
|
29 |
class html2text_test extends \basic_testcase {
|
|
|
30 |
|
|
|
31 |
/**
|
|
|
32 |
* ALT as image replacements.
|
|
|
33 |
*/
|
11 |
efrain |
34 |
public function test_images(): void {
|
1 |
efrain |
35 |
$this->assertSame('[edit]', html_to_text('<img src="edit.png" alt="edit" />'));
|
|
|
36 |
|
|
|
37 |
$text = 'xx<img src="gif.gif" alt="some gif" />xx';
|
|
|
38 |
$result = html_to_text($text, null, false, false);
|
|
|
39 |
$this->assertSame($result, 'xx[some gif]xx');
|
|
|
40 |
}
|
|
|
41 |
|
|
|
42 |
/**
|
|
|
43 |
* No magic quotes messing.
|
|
|
44 |
*/
|
11 |
efrain |
45 |
public function test_no_strip_slashes(): void {
|
1 |
efrain |
46 |
$this->assertSame('[\edit]', html_to_text('<img src="edit.png" alt="\edit" />'));
|
|
|
47 |
|
|
|
48 |
$text = '\\magic\\quotes\\are\\\\horrible';
|
|
|
49 |
$result = html_to_text($text, null, false, false);
|
|
|
50 |
$this->assertSame($result, $text);
|
|
|
51 |
}
|
|
|
52 |
|
|
|
53 |
/**
|
|
|
54 |
* core_text integration.
|
|
|
55 |
*/
|
11 |
efrain |
56 |
public function test_core_text(): void {
|
1 |
efrain |
57 |
$text = '<strong>Žluťoučký koníček</strong>';
|
|
|
58 |
$result = html_to_text($text, null, false, false);
|
|
|
59 |
$this->assertSame($result, 'ŽLUŤOUČKÝ KONÍČEK');
|
|
|
60 |
}
|
|
|
61 |
|
|
|
62 |
/**
|
|
|
63 |
* Protect 0.
|
|
|
64 |
*/
|
11 |
efrain |
65 |
public function test_zero(): void {
|
1 |
efrain |
66 |
$text = '0';
|
|
|
67 |
$result = html_to_text($text, null, false, false);
|
|
|
68 |
$this->assertSame($result, $text);
|
|
|
69 |
|
|
|
70 |
$this->assertSame('0', html_to_text('0'));
|
|
|
71 |
}
|
|
|
72 |
|
|
|
73 |
/**
|
|
|
74 |
* Test the links list enumeration.
|
|
|
75 |
*/
|
11 |
efrain |
76 |
public function test_build_link_list(): void {
|
1 |
efrain |
77 |
|
|
|
78 |
// Note the trailing whitespace left intentionally in the text after first link.
|
|
|
79 |
$text = 'Total of <a title="List of integrated issues"
|
|
|
80 |
href="http://tr.mdl.org/sh.jspa?r=1&j=p+%3D+%22I+d%22+%3D"> ' . '
|
|
|
81 |
<strong>27 issues</strong></a> and <a href="http://another.url/?f=a&b=2">some</a> other
|
|
|
82 |
have been fixed <strong><a href="http://third.url/view.php">last week</a></strong>';
|
|
|
83 |
|
|
|
84 |
// Do not collect links.
|
|
|
85 |
$result = html_to_text($text, 5000, false);
|
|
|
86 |
$this->assertSame('Total of 27 ISSUES and some other have been fixed LAST WEEK', $result);
|
|
|
87 |
|
|
|
88 |
// Collect and enumerate links.
|
|
|
89 |
$result = html_to_text($text, 5000, true);
|
|
|
90 |
$this->assertSame(0, strpos($result, 'Total of 27 ISSUES [1] and some [2] other have been fixed LAST WEEK [3]'));
|
|
|
91 |
$this->assertSame(false, strpos($result, '[0]'));
|
|
|
92 |
$this->assertSame(1, preg_match('|^'.preg_quote('[1] http://tr.mdl.org/sh.jspa?r=1&j=p+%3D+%22I+d%22+%3D').'$|m', $result));
|
|
|
93 |
$this->assertSame(1, preg_match('|^'.preg_quote('[2] http://another.url/?f=a&b=2').'$|m', $result));
|
|
|
94 |
$this->assertSame(1, preg_match('|^'.preg_quote('[3] http://third.url/view.php').'$|m', $result));
|
|
|
95 |
$this->assertSame(false, strpos($result, '[4]'));
|
|
|
96 |
|
|
|
97 |
// Test multiple occurrences of the same URL.
|
|
|
98 |
$text = '<p>See <a href="http://moodle.org">moodle.org</a>,
|
|
|
99 |
<a href="http://www.google.fr">google</a>, <a href="http://www.univ-lemans.fr">univ-lemans</a>
|
|
|
100 |
and <a href="http://www.google.fr">google</a>.
|
|
|
101 |
Also try <a href="https://www.google.fr">google via HTTPS</a>.';
|
|
|
102 |
$result = html_to_text($text, 5000, true);
|
|
|
103 |
$this->assertSame(0, strpos($result, 'See moodle.org [1], google [2], univ-lemans [3] and google [2]. Also try google via HTTPS [4].'));
|
|
|
104 |
$this->assertSame(false, strpos($result, '[0]'));
|
|
|
105 |
$this->assertSame(1, preg_match('|^'.preg_quote('[1] http://moodle.org').'$|m', $result));
|
|
|
106 |
$this->assertSame(1, preg_match('|^'.preg_quote('[2] http://www.google.fr').'$|m', $result));
|
|
|
107 |
$this->assertSame(1, preg_match('|^'.preg_quote('[3] http://www.univ-lemans.fr').'$|m', $result));
|
|
|
108 |
$this->assertSame(1, preg_match('|^'.preg_quote('[4] https://www.google.fr').'$|m', $result));
|
|
|
109 |
$this->assertSame(false, strpos($result, '[5]'));
|
|
|
110 |
}
|
|
|
111 |
|
|
|
112 |
/**
|
|
|
113 |
* Various invalid HTML typed by users that ignore html strict.
|
|
|
114 |
**/
|
11 |
efrain |
115 |
public function test_invalid_html(): void {
|
1 |
efrain |
116 |
$text = 'Gin & Tonic';
|
|
|
117 |
$result = html_to_text($text, null, false, false);
|
|
|
118 |
$this->assertSame($result, $text);
|
|
|
119 |
|
|
|
120 |
$text = 'Gin > Tonic';
|
|
|
121 |
$result = html_to_text($text, null, false, false);
|
|
|
122 |
$this->assertSame($result, $text);
|
|
|
123 |
|
|
|
124 |
$text = 'Gin < Tonic';
|
|
|
125 |
$result = html_to_text($text, null, false, false);
|
|
|
126 |
$this->assertSame($result, $text);
|
|
|
127 |
}
|
|
|
128 |
|
|
|
129 |
/**
|
|
|
130 |
* Basic text formatting.
|
|
|
131 |
*/
|
11 |
efrain |
132 |
public function test_simple(): void {
|
1 |
efrain |
133 |
$this->assertSame("_Hello_ WORLD!\n", html_to_text('<p><i>Hello</i> <b>world</b>!</p>'));
|
|
|
134 |
$this->assertSame("All the WORLD’S a stage.\n\n-- William Shakespeare\n", html_to_text('<p>All the <strong>world’s</strong> a stage.</p><p>-- William Shakespeare</p>'));
|
|
|
135 |
$this->assertSame("HELLO WORLD!\n\n", html_to_text('<h1>Hello world!</h1>'));
|
|
|
136 |
$this->assertSame("Hello\nworld!", html_to_text('Hello<br />world!'));
|
|
|
137 |
}
|
|
|
138 |
|
|
|
139 |
/**
|
|
|
140 |
* Test line wrapping.
|
|
|
141 |
*/
|
11 |
efrain |
142 |
public function test_text_nowrap(): void {
|
1 |
efrain |
143 |
$long = "Here is a long string, more than 75 characters long, since by default html_to_text wraps text at 75 chars.";
|
|
|
144 |
$wrapped = "Here is a long string, more than 75 characters long, since by default\nhtml_to_text wraps text at 75 chars.";
|
|
|
145 |
$this->assertSame($long, html_to_text($long, 0));
|
|
|
146 |
$this->assertSame($wrapped, html_to_text($long));
|
|
|
147 |
}
|
|
|
148 |
|
|
|
149 |
/**
|
|
|
150 |
* Whitespace removal.
|
|
|
151 |
*/
|
11 |
efrain |
152 |
public function test_trailing_whitespace(): void {
|
1 |
efrain |
153 |
$this->assertSame('With trailing whitespace and some more text', html_to_text("With trailing whitespace \nand some more text", 0));
|
|
|
154 |
}
|
|
|
155 |
|
|
|
156 |
/**
|
|
|
157 |
* PRE parsing.
|
|
|
158 |
*/
|
11 |
efrain |
159 |
public function test_html_to_text_pre_parsing_problem(): void {
|
1 |
efrain |
160 |
$strorig = 'Consider the following function:<br /><pre><span style="color: rgb(153, 51, 102);">void FillMeUp(char* in_string) {'.
|
|
|
161 |
'<br /> int i = 0;<br /> while (in_string[i] != \'\0\') {<br /> in_string[i] = \'X\';<br /> i++;<br /> }<br />'.
|
|
|
162 |
'}</span></pre>What would happen if a non-terminated string were input to this function?<br /><br />';
|
|
|
163 |
|
|
|
164 |
// Note, the spaces in the <pre> section are Unicode NBSPs - they may not be displayed in your editor.
|
|
|
165 |
$strconv = 'Consider the following function:
|
|
|
166 |
|
|
|
167 |
void FillMeUp(char* in_string) {
|
|
|
168 |
int i = 0;
|
|
|
169 |
while (in_string[i] != \'\0\') {
|
|
|
170 |
in_string[i] = \'X\';
|
|
|
171 |
i++;
|
|
|
172 |
}
|
|
|
173 |
}
|
|
|
174 |
What would happen if a non-terminated string were input to this function?
|
|
|
175 |
|
|
|
176 |
';
|
|
|
177 |
|
|
|
178 |
$this->assertSame($strconv, html_to_text($strorig));
|
|
|
179 |
}
|
|
|
180 |
|
|
|
181 |
/**
|
|
|
182 |
* Scripts should be stripped.
|
|
|
183 |
*/
|
11 |
efrain |
184 |
public function test_strip_scripts(): void {
|
1 |
efrain |
185 |
$this->assertSame('Interesting text',
|
|
|
186 |
html_to_text('Interesting <script type="text/javascript">var what_a_mess = "Yuck!";</script> text', 0));
|
|
|
187 |
}
|
|
|
188 |
}
|