1 |
efrain |
1 |
<?php
|
|
|
2 |
// This file is part of Moodle - http://moodle.org/
|
|
|
3 |
//
|
|
|
4 |
// Moodle is free software: you can redistribute it and/or modify
|
|
|
5 |
// it under the terms of the GNU General Public License as published by
|
|
|
6 |
// the Free Software Foundation, either version 3 of the License, or
|
|
|
7 |
// (at your option) any later version.
|
|
|
8 |
//
|
|
|
9 |
// Moodle is distributed in the hope that it will be useful,
|
|
|
10 |
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
11 |
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
12 |
// GNU General Public License for more details.
|
|
|
13 |
//
|
|
|
14 |
// You should have received a copy of the GNU General Public License
|
|
|
15 |
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
|
|
|
16 |
|
|
|
17 |
namespace core;
|
|
|
18 |
|
|
|
19 |
/**
|
|
|
20 |
* Tests our html2text hacks
|
|
|
21 |
*
|
|
|
22 |
* Note: includes original tests from testweblib.php
|
|
|
23 |
*
|
|
|
24 |
* @package core
|
|
|
25 |
* @category test
|
|
|
26 |
* @copyright 2012 Petr Skoda {@link http://skodak.org}
|
|
|
27 |
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
1441 |
ariadna |
28 |
* @covers ::html_to_text
|
1 |
efrain |
29 |
*/
|
1441 |
ariadna |
30 |
final class html2text_test extends \basic_testcase {
|
1 |
efrain |
31 |
/**
|
1441 |
ariadna |
32 |
* Data provider for general tests.
|
|
|
33 |
*
|
|
|
34 |
* @return array
|
1 |
efrain |
35 |
*/
|
1441 |
ariadna |
36 |
public static function examples_provider(): array {
|
|
|
37 |
// Used in the line wrapping tests.
|
|
|
38 |
// phpcs:ignore Generic.Files.LineLength.TooLong
|
|
|
39 |
$long = "Here is a long string, more than 75 characters long, since by default html_to_text wraps text at 75 chars.";
|
|
|
40 |
// phpcs:ignore Generic.Files.LineLength.TooLong
|
|
|
41 |
$wrapped = "Here is a long string, more than 75 characters long, since by default\nhtml_to_text wraps text at 75 chars.";
|
1 |
efrain |
42 |
|
1441 |
ariadna |
43 |
// These two are used in the PRE parsing tests.
|
|
|
44 |
// phpcs:ignore Generic.Files.LineLength.TooLong
|
|
|
45 |
$strorig = 'Consider the following function:<br /><pre><span style="color: rgb(153, 51, 102);">void FillMeUp(char* in_string) {'.
|
|
|
46 |
'<br /> int i = 0;<br /> while (in_string[i] != \'\0\') {<br /> in_string[i] = \'X\';<br /> i++;<br /> }<br />'.
|
|
|
47 |
'}</span></pre>What would happen if a non-terminated string were input to this function?<br /><br />';
|
1 |
efrain |
48 |
|
1441 |
ariadna |
49 |
// Note, the spaces in the <pre> section are Unicode NBSPs - they may not be displayed in your editor.
|
|
|
50 |
$strconv = <<<EOF
|
|
|
51 |
Consider the following function:
|
1 |
efrain |
52 |
|
1441 |
ariadna |
53 |
void FillMeUp(char* in_string) {
|
|
|
54 |
int i = 0;
|
|
|
55 |
while (in_string[i] != '\\0') {
|
|
|
56 |
in_string[i] = 'X';
|
|
|
57 |
i++;
|
|
|
58 |
}
|
|
|
59 |
}
|
|
|
60 |
What would happen if a non-terminated string were input to this function?
|
1 |
efrain |
61 |
|
1441 |
ariadna |
62 |
|
|
|
63 |
EOF;
|
|
|
64 |
|
|
|
65 |
return [
|
|
|
66 |
// Image alt tag replacements.
|
|
|
67 |
'Image alt tag' => [
|
|
|
68 |
'[edit]',
|
|
|
69 |
[],
|
|
|
70 |
'<img src="edit.png" alt="edit" />',
|
|
|
71 |
],
|
|
|
72 |
'Image alt tag between strings' => [
|
|
|
73 |
'xx[some gif]xx',
|
|
|
74 |
[
|
|
|
75 |
'dolinks' => false,
|
|
|
76 |
],
|
|
|
77 |
'xx<img src="gif.gif" alt="some gif" />xx',
|
|
|
78 |
],
|
|
|
79 |
'core_text integration' => [
|
|
|
80 |
'ŽLUŤOUČKÝ KONÍČEK',
|
|
|
81 |
['dolinks' => false],
|
|
|
82 |
'<strong>Žluťoučký koníček</strong>',
|
|
|
83 |
],
|
|
|
84 |
'No strip slashes in a tag' => [
|
|
|
85 |
'[\edit]',
|
|
|
86 |
[],
|
|
|
87 |
'<img src="edit.png" alt="\edit" />',
|
|
|
88 |
],
|
|
|
89 |
'No strip slashes in a string' => [
|
|
|
90 |
'\\magic\\quotes\\are\\\\horrible',
|
|
|
91 |
[],
|
|
|
92 |
'\\magic\\quotes\\are\\\\horrible',
|
|
|
93 |
],
|
|
|
94 |
'Protect "0"' => [
|
|
|
95 |
'0',
|
|
|
96 |
['dolinks' => false],
|
|
|
97 |
'0',
|
|
|
98 |
],
|
|
|
99 |
'Invalid HTML 1' => [
|
|
|
100 |
'Gin & Tonic',
|
|
|
101 |
[],
|
|
|
102 |
'Gin & Tonic',
|
|
|
103 |
],
|
|
|
104 |
'Invalid HTML 2' => [
|
|
|
105 |
'Gin > Tonic',
|
|
|
106 |
[],
|
|
|
107 |
'Gin > Tonic',
|
|
|
108 |
],
|
|
|
109 |
'Invalid HTML 3' => [
|
|
|
110 |
'Gin < Tonic',
|
|
|
111 |
[],
|
|
|
112 |
'Gin < Tonic',
|
|
|
113 |
],
|
|
|
114 |
'Simple test 1' => [
|
|
|
115 |
"_Hello_ WORLD!\n",
|
|
|
116 |
[],
|
|
|
117 |
'<p><i>Hello</i> <b>world</b>!</p>',
|
|
|
118 |
],
|
|
|
119 |
'Simple test 2' => [
|
|
|
120 |
"All the WORLD’S a stage.\n\n-- William Shakespeare\n",
|
|
|
121 |
[],
|
|
|
122 |
'<p>All the <strong>world’s</strong> a stage.</p><p>-- William Shakespeare</p>',
|
|
|
123 |
],
|
|
|
124 |
'Simple test 3' => [
|
|
|
125 |
"HELLO WORLD!\n\n",
|
|
|
126 |
[],
|
|
|
127 |
'<h1>Hello world!</h1>',
|
|
|
128 |
],
|
|
|
129 |
'Simple test 4' => [
|
|
|
130 |
"Hello\nworld!",
|
|
|
131 |
[],
|
|
|
132 |
'Hello<br />world!',
|
|
|
133 |
],
|
|
|
134 |
'No wrapping when width set to 0' => [
|
|
|
135 |
$long,
|
|
|
136 |
['width' => 0],
|
|
|
137 |
$long,
|
|
|
138 |
],
|
|
|
139 |
'Wrapping when width set to default' => [
|
|
|
140 |
$wrapped,
|
|
|
141 |
[],
|
|
|
142 |
$long,
|
|
|
143 |
],
|
|
|
144 |
'Trailing whitespace removal' => [
|
|
|
145 |
'With trailing whitespace and some more text',
|
|
|
146 |
[],
|
|
|
147 |
"With trailing whitespace \nand some more text",
|
|
|
148 |
],
|
|
|
149 |
'PRE parsing' => [
|
|
|
150 |
$strconv,
|
|
|
151 |
[],
|
|
|
152 |
$strorig,
|
|
|
153 |
],
|
|
|
154 |
'Strip script tags' => [
|
|
|
155 |
'Interesting text',
|
|
|
156 |
[],
|
|
|
157 |
'Interesting <script type="text/javascript">var what_a_mess = "Yuck!";</script> text',
|
|
|
158 |
],
|
|
|
159 |
'Trailing spaces before newline or tab' => [
|
|
|
160 |
"Some text with trailing space\n\nAnd some more text\n",
|
|
|
161 |
[],
|
|
|
162 |
'<p>Some text with trailing space </p> <p>And some more text</p>',
|
|
|
163 |
],
|
|
|
164 |
'Trailing spaces before newline or tab (list)' => [
|
|
|
165 |
"\t* Some text with trailing space\n\t* And some more text\n\n",
|
|
|
166 |
[],
|
|
|
167 |
'<ul><li>Some text with trailing space </li> <li> And some more text </li> </ul>',
|
|
|
168 |
],
|
|
|
169 |
];
|
1 |
efrain |
170 |
}
|
|
|
171 |
|
|
|
172 |
/**
|
1441 |
ariadna |
173 |
* Test html2text with various examples.
|
|
|
174 |
*
|
|
|
175 |
* @dataProvider examples_provider
|
|
|
176 |
* @param string $expected
|
|
|
177 |
* @param array $options
|
|
|
178 |
* @param string $html
|
1 |
efrain |
179 |
*/
|
1441 |
ariadna |
180 |
public function test_runner(
|
|
|
181 |
string $expected,
|
|
|
182 |
array $options,
|
|
|
183 |
string $html,
|
|
|
184 |
): void {
|
|
|
185 |
$this->assertSame($expected, html_to_text($html, ...$options));
|
1 |
efrain |
186 |
}
|
|
|
187 |
|
|
|
188 |
/**
|
|
|
189 |
* Test the links list enumeration.
|
|
|
190 |
*/
|
11 |
efrain |
191 |
public function test_build_link_list(): void {
|
1 |
efrain |
192 |
|
|
|
193 |
// Note the trailing whitespace left intentionally in the text after first link.
|
|
|
194 |
$text = 'Total of <a title="List of integrated issues"
|
|
|
195 |
href="http://tr.mdl.org/sh.jspa?r=1&j=p+%3D+%22I+d%22+%3D"> ' . '
|
|
|
196 |
<strong>27 issues</strong></a> and <a href="http://another.url/?f=a&b=2">some</a> other
|
|
|
197 |
have been fixed <strong><a href="http://third.url/view.php">last week</a></strong>';
|
|
|
198 |
|
|
|
199 |
// Do not collect links.
|
|
|
200 |
$result = html_to_text($text, 5000, false);
|
|
|
201 |
$this->assertSame('Total of 27 ISSUES and some other have been fixed LAST WEEK', $result);
|
|
|
202 |
|
|
|
203 |
// Collect and enumerate links.
|
|
|
204 |
$result = html_to_text($text, 5000, true);
|
|
|
205 |
$this->assertSame(0, strpos($result, 'Total of 27 ISSUES [1] and some [2] other have been fixed LAST WEEK [3]'));
|
|
|
206 |
$this->assertSame(false, strpos($result, '[0]'));
|
|
|
207 |
$this->assertSame(1, preg_match('|^'.preg_quote('[1] http://tr.mdl.org/sh.jspa?r=1&j=p+%3D+%22I+d%22+%3D').'$|m', $result));
|
|
|
208 |
$this->assertSame(1, preg_match('|^'.preg_quote('[2] http://another.url/?f=a&b=2').'$|m', $result));
|
|
|
209 |
$this->assertSame(1, preg_match('|^'.preg_quote('[3] http://third.url/view.php').'$|m', $result));
|
|
|
210 |
$this->assertSame(false, strpos($result, '[4]'));
|
|
|
211 |
|
|
|
212 |
// Test multiple occurrences of the same URL.
|
|
|
213 |
$text = '<p>See <a href="http://moodle.org">moodle.org</a>,
|
|
|
214 |
<a href="http://www.google.fr">google</a>, <a href="http://www.univ-lemans.fr">univ-lemans</a>
|
|
|
215 |
and <a href="http://www.google.fr">google</a>.
|
|
|
216 |
Also try <a href="https://www.google.fr">google via HTTPS</a>.';
|
|
|
217 |
$result = html_to_text($text, 5000, true);
|
|
|
218 |
$this->assertSame(0, strpos($result, 'See moodle.org [1], google [2], univ-lemans [3] and google [2]. Also try google via HTTPS [4].'));
|
|
|
219 |
$this->assertSame(false, strpos($result, '[0]'));
|
|
|
220 |
$this->assertSame(1, preg_match('|^'.preg_quote('[1] http://moodle.org').'$|m', $result));
|
|
|
221 |
$this->assertSame(1, preg_match('|^'.preg_quote('[2] http://www.google.fr').'$|m', $result));
|
|
|
222 |
$this->assertSame(1, preg_match('|^'.preg_quote('[3] http://www.univ-lemans.fr').'$|m', $result));
|
|
|
223 |
$this->assertSame(1, preg_match('|^'.preg_quote('[4] https://www.google.fr').'$|m', $result));
|
|
|
224 |
$this->assertSame(false, strpos($result, '[5]'));
|
|
|
225 |
}
|
|
|
226 |
}
|