AutorÃa | Ultima modificación | Ver Log |
// This file is part of Moodle - http://moodle.org///// Moodle is free software: you can redistribute it and/or modify// it under the terms of the GNU General Public License as published by// the Free Software Foundation, either version 3 of the License, or// (at your option) any later version.//// Moodle is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the// GNU General Public License for more details.//// You should have received a copy of the GNU General Public License// along with Moodle. If not, see <http://www.gnu.org/licenses/>./*** @module moodle-editor_atto-editor* @submodule clean*//*** Functions for the Atto editor to clean the generated content.** See {{#crossLink "M.editor_atto.Editor"}}{{/crossLink}} for details.** @namespace M.editor_atto* @class EditorClean*/function EditorClean() {}EditorClean.ATTRS = {};EditorClean.prototype = {/*** Clean the generated HTML content without modifying the editor content.** This includes removes all YUI ids from the generated content.** @return {string} The cleaned HTML content.*/getCleanHTML: function() {// Clone the editor so that we don't actually modify the real content.var editorClone = this.editor.cloneNode(true),html;// Remove all YUI IDs.Y.each(editorClone.all('[id^="yui"]'), function(node) {node.removeAttribute('id');});editorClone.all('.atto_control').remove(true);html = editorClone.get('innerHTML');// Revert untouched editor contents to an empty string.var emptyContents = [// For FF and Chrome.'<p></p>','<p><br></p>','<br>','<p dir="rtl" style="text-align: right;"></p>','<p dir="rtl" style="text-align: right;"><br></p>','<p dir="ltr" style="text-align: left;"></p>','<p dir="ltr" style="text-align: left;"><br></p>',// For IE 9 and 10.'<p> </p>','<p><br> </p>','<p dir="rtl" style="text-align: right;"> </p>','<p dir="rtl" style="text-align: right;"><br> </p>','<p dir="ltr" style="text-align: left;"> </p>','<p dir="ltr" style="text-align: left;"><br> </p>'];if (emptyContents.includes(html)) {return '';}// Remove any and all nasties from source.return this._cleanHTML(html);},/*** Clean the HTML content of the editor.** @method cleanEditorHTML* @chainable*/cleanEditorHTML: function() {var startValue = this.editor.get('innerHTML');this.editor.set('innerHTML', this._cleanHTML(startValue));return this;},/*** Clean the specified HTML content and remove any content which could cause issues.** @method _cleanHTML* @private* @param {String} content The content to clean* @param {Boolean} deepClean If true, do a more in depth (and resource intensive) cleaning of the HTML.* @return {String} The cleaned HTML*/_cleanHTML: function(content, deepClean) {// Removing limited things that can break the page or a disallowed, like unclosed comments, style blocks, etc.var rules = [// Remove any style blocks. Some browsers do not work well with them in a contenteditable.// Plus style blocks are not allowed in body html, except with "scoped", which most browsers don't support as of 2015.// Reference: "http://stackoverflow.com/questions/1068280/javascript-regex-multiline-flag-doesnt-work"{regex: /<style[^>]*>[\s\S]*?<\/style>/gi, replace: ""},// Remove any open HTML comment opens that are not followed by a close. This can completely break page layout.{regex: /<!--(?![\s\S]*?-->)/gi, replace: ""},// Source: "http://www.codinghorror.com/blog/2006/01/cleaning-words-nasty-html.html"// Remove forbidden tags for content, title, meta, style, st0-9, head, font, html, body, link.{regex: /<\/?(?:title|meta|style|st\d|head\b|font|html|body|link)[^>]*?>/gi, replace: ""}];content = this._filterContentWithRules(content, rules);if (deepClean) {content = this._cleanHTMLLists(content);}return content;},/*** Take the supplied content and run on the supplied regex rules.** @method _filterContentWithRules* @private* @param {String} content The content to clean* @param {Array} rules An array of structures: [ {regex: /something/, replace: "something"}, {...}, ...]* @return {String} The cleaned content*/_filterContentWithRules: function(content, rules) {var i = 0;for (i = 0; i < rules.length; i++) {content = content.replace(rules[i].regex, rules[i].replace);}return content;},/*** Intercept and clean html paste events.** @method pasteCleanup* @param {Object} sourceEvent The YUI EventFacade object* @return {Boolean} True if the passed event should continue, false if not.*/pasteCleanup: function(sourceEvent) {// We only expect paste events, but we will check anyways.if (sourceEvent.type === 'paste') {// Register the delayed paste cleanup. We will cancel it if we register the fallback cleanup.var delayedCleanup = this.postPasteCleanupDelayed();// The YUI event wrapper doesn't provide paste event info, so we need the underlying event.var event = sourceEvent._event;// Check if we have a valid clipboardData object in the event.// IE has a clipboard object at window.clipboardData, but as of IE 11, it does not provide HTML content access.if (event && event.clipboardData && event.clipboardData.getData && event.clipboardData.types) {// Check if there is HTML type to be pasted, if we can get it, we want to scrub before insert.var types = event.clipboardData.types;var isHTML = false;// Different browsers use different containers to hold the types, so test various functions.if (typeof types.contains === 'function') {isHTML = types.contains('text/html');} else if (typeof types.indexOf === 'function') {isHTML = (types.indexOf('text/html') > -1);}var content;if (isHTML) {// Get the clipboard content.try {content = event.clipboardData.getData('text/html');} catch (error) {// Something went wrong. Fallback.delayedCleanup.cancel();this.fallbackPasteCleanupDelayed();return true;}// Stop the original paste.sourceEvent.preventDefault();// Scrub the paste content.content = this._cleanPasteHTML(content);// Insert the content.this.insertContentAtFocusPoint(content);// Update the text area.this.updateOriginal();return false;} else {try {// Plaintext clipboard content can only be retrieved this way.content = event.clipboardData.getData('text');} catch (error) {// Something went wrong. Fallback.// Due to poor cross browser clipboard compatibility, the failure to find html doesn't mean it isn't there.// Wait for the clipboard event to finish then fallback clean the entire editor.delayedCleanup.cancel();this.fallbackPasteCleanupDelayed();return true;}}} else {// If we reached a here, this probably means the browser has limited (or no) clipboard support.// Wait for the clipboard event to finish then fallback clean the entire editor.this.fallbackPasteCleanupDelayed();return true;}}// We should never get here - we must have received a non-paste event for some reason.// Um, just call updateOriginalDelayed() - it's safe.this.updateOriginalDelayed();return true;},/*** Calls postPasteCleanup on a short timer to allow the paste event handlers to complete, then deep clean the content.** @method postPasteCleanupDelayed* @return {object}* @chainable*/postPasteCleanupDelayed: function() {Y.soon(Y.bind(this.postPasteCleanup, this));return this;},/*** Do additional cleanup after the paste is complete.** @method postPasteCleanup* @return {object}* @chainable*/postPasteCleanup: function() {Y.log('Executing delayed post paste cleanup', 'debug', LOGNAME);// Save the current selection (cursor position).var selection = window.rangy.saveSelection();// Get, clean, and replace the content in the editable.var content = this.editor.get('innerHTML');this.editor.set('innerHTML', this._cleanHTML(content, true));// Update the textarea.this.updateOriginal();// Restore the selection (cursor position).window.rangy.restoreSelection(selection);return this;},/*** Cleanup code after a paste event if we couldn't intercept the paste content.** @method fallbackPasteCleanup* @return {object}* @chainable*/fallbackPasteCleanup: function() {Y.log('Using fallbackPasteCleanup for atto cleanup', 'debug', LOGNAME);// Save the current selection (cursor position).var selection = window.rangy.saveSelection();// Get, clean, and replace the content in the editable.var content = this.editor.get('innerHTML');this.editor.set('innerHTML', this._cleanHTML(this._cleanPasteHTML(content), true));// Update the textarea.this.updateOriginal();// Restore the selection (cursor position).window.rangy.restoreSelection(selection);return this;},/*** Calls fallbackPasteCleanup on a short timer to allow the paste event handlers to complete.** @method fallbackPasteCleanupDelayed* @chainable*/fallbackPasteCleanupDelayed: function() {Y.soon(Y.bind(this.fallbackPasteCleanup, this));return this;},/*** Cleanup html that comes from WYSIWYG paste events. These are more likely to contain messy code that we should strip.** @method _cleanPasteHTML* @private* @param {String} content The html content to clean* @return {String} The cleaned HTML*/_cleanPasteHTML: function(content) {// Return an empty string if passed an invalid or empty object.if (!content || content.length === 0) {return "";}// Rules that get rid of the real-nasties and don't care about normalize code (correct quotes, white spaces, etc).var rules = [// Stuff that is specifically from MS Word and similar office packages.// Remove all garbage after closing html tag.{regex: /<\s*\/html\s*>([\s\S]+)$/gi, replace: ""},// Remove if comment blocks.{regex: /<!--\[if[\s\S]*?endif\]-->/gi, replace: ""},// Remove start and end fragment comment blocks.{regex: /<!--(Start|End)Fragment-->/gi, replace: ""},// Remove any xml blocks.{regex: /<xml[^>]*>[\s\S]*?<\/xml>/gi, replace: ""},// Remove any <?xml><\?xml> blocks.{regex: /<\?xml[^>]*>[\s\S]*?<\\\?xml>/gi, replace: ""},// Remove <o:blah>, <\o:blah>.{regex: /<\/?\w+:[^>]*>/gi, replace: ""}];// Apply the first set of harsher rules.content = this._filterContentWithRules(content, rules);// Apply the standard rules, which mainly cleans things like headers, links, and style blocks.content = this._cleanHTML(content);// Check if the string is empty or only contains whitespace.if (content.length === 0 || !content.match(/\S/)) {return content;}// Now we let the browser normalize the code by loading it into the DOM and then get the html back.// This gives us well quoted, well formatted code to continue our work on. Word may provide very poorly formatted code.var holder = document.createElement('div');holder.innerHTML = content;content = holder.innerHTML;// Free up the DOM memory.holder.innerHTML = "";// Run some more rules that care about quotes and whitespace.rules = [// Get all class attributes so we can work on them.{regex: /(<[^>]*?class\s*?=\s*?")([^>"]*)(")/gi, replace: function(match, group1, group2, group3) {// Remove MSO classes.group2 = group2.replace(/(?:^|[\s])[\s]*MSO[_a-zA-Z0-9\-]*/gi, "");// Remove Apple- classes.group2 = group2.replace(/(?:^|[\s])[\s]*Apple-[_a-zA-Z0-9\-]*/gi, "");return group1 + group2 + group3;}},// Remove OLE_LINK# anchors that may litter the code.{regex: /<a [^>]*?name\s*?=\s*?"OLE_LINK\d*?"[^>]*?>\s*?<\/a>/gi, replace: ""}];// Clean all style attributes from the text.content = this._cleanStyles(content);// Apply the rules.content = this._filterContentWithRules(content, rules);// Reapply the standard cleaner to the content.content = this._cleanHTML(content);// Clean unused spans out of the content.content = this._cleanSpans(content);return content;},/*** Clean all inline styles from pasted text.** This code intentionally doesn't use YUI Nodes. YUI was quite a bit slower at this, so using raw DOM objects instead.** @method _cleanStyles* @private* @param {String} content The content to clean* @return {String} The cleaned HTML*/_cleanStyles: function(content) {var holder = document.createElement('div');holder.innerHTML = content;var elementsWithStyle = holder.querySelectorAll('[style]');var i = 0;for (i = 0; i < elementsWithStyle.length; i++) {elementsWithStyle[i].removeAttribute('style');}var elementsWithClass = holder.querySelectorAll('[class]');for (i = 0; i < elementsWithClass.length; i++) {elementsWithClass[i].removeAttribute('class');}return holder.innerHTML;},/*** Clean empty or un-unused spans from passed HTML.** This code intentionally doesn't use YUI Nodes. YUI was quite a bit slower at this, so using raw DOM objects instead.** @method _cleanSpans* @private* @param {String} content The content to clean* @return {String} The cleaned HTML*/_cleanSpans: function(content) {// Return an empty string if passed an invalid or empty object.if (!content || content.length === 0) {return "";}// Check if the string is empty or only contains whitespace.if (content.length === 0 || !content.match(/\S/)) {return content;}var rules = [// Remove unused class, style, or id attributes. This will make empty tag detection easier later.{regex: /(<[^>]*?)(?:[\s]*(?:class|style|id)\s*?=\s*?"\s*?")+/gi, replace: "$1"}];// Apply the rules.content = this._filterContentWithRules(content, rules);// Reference: "http://stackoverflow.com/questions/8131396/remove-nested-span-without-id"// This is better to run detached from the DOM, so the browser doesn't try to update on each change.var holder = document.createElement('div');holder.innerHTML = content;var spans = holder.getElementsByTagName('span');// Since we will be removing elements from the list, we should copy it to an array, making it static.var spansarr = Array.prototype.slice.call(spans, 0);spansarr.forEach(function(span) {if (!span.hasAttributes()) {// If no attributes (id, class, style, etc), this span is has no effect.// Move each child (if they exist) to the parent in place of this span.while (span.firstChild) {span.parentNode.insertBefore(span.firstChild, span);}// Remove the now empty span.span.parentNode.removeChild(span);}});return holder.innerHTML;},/*** This is a function that searches for, and attempts to correct certain issues with ul/ol html lists.* This is needed because these lists are used heavily in page layout, and content with bad tags can* lead to broke course pages.** The theory of operation here is to linearly process the incoming content, counting the opening and closing* of list tags, and determining when there is a mismatch.** The specific issues this should be able to correct are:* - Orphaned li elements will be wrapped in a set of ul tags.* - li elements inside li elements.* - An extra closing ul, or ol tag will be discarded.* - An extra closing li tag will have an opening tag added if appropriate, or will be discarded.* - If there is an unmatched list open tag, a matching close tag will be inserted.** It does it's best to match the case of corrected tags. Even though not required by html spec,* it seems like the safer route.** A note on parent elements of li. This code assumes that li must have a ol or ul parent.* There are two other potential other parents of li. They are menu and dir. The dir tag was deprecated in* HTML4, and removed in HTML5. The menu tag is experimental as of this writing, and basically doesn't work* in any browsers, even Firefox, which theoretically has limited support for it. If other parents of li* become viable, they will need to be added to this code.** @method _cleanHTMLLists* @private* @param {String} content The content to clean* @return {String} The cleaned content*/_cleanHTMLLists: function(content) {var output = '',toProcess = content,match = null,openTags = [],currentTag = null,previousTag = null;// Use a regular expression to find the next open or close li, ul, or ol tag.// Keep going until there are no more matching tags left.// This expression looks for whole words by employing the word boundary (\b) metacharacter.while ((match = toProcess.match(/<(\/?)(li|ul|ol)\b[^>]*>/i))) {currentTag = {tag: match[2],tagLowerCase: match[2].toLowerCase(),fullTag: match[0],isOpen: (match[1].length == 1) ? false : true};// Get the most recent open tag.previousTag = (openTags.length) ? openTags[openTags.length - 1] : null;// Slice up the content based on the match and add content before the match to output.output += toProcess.slice(0, match.index);toProcess = toProcess.slice(match.index + match[0].length);// Now the full content is in output + currentTag.fullTag + toProcess. When making fixes, it is best to push the fix and// fullTag back onto the front or toProcess, then restart the loop. This allows processing to follow the normal path// most often. But sometimes we will need to modify output to insert or remove tags in the already complete code.if (currentTag.isOpen) {// We are at the opening phase of a tag.// We have to do special processing for list items, as they can only be children of ul and ol tags.if (currentTag.tagLowerCase === 'li') {if (!previousTag) {// This means we have are opening a li, but aren't in a list. This is not allowed!// We are going to check for the count of open and close ol tags ahead to decide what to do.var closeCount = (toProcess.match(/<\/(ol)[ >]/ig) || []).length;var openCount = (toProcess.match(/<(ol)[ >]/ig) || []).length;if (closeCount > openCount) {// There are more close ol's ahead than opens ahead. So open the ol and try again.Y.log('Adding an opening ol for orphan li', 'debug', LOGNAME);toProcess = '<ol>' + currentTag.fullTag + toProcess;continue;}// For the other cases, just open a ul and try again. Later the closing ul will get matched if it exists,// or if it doesn't one will automatically get inserted.Y.log('Adding an opening ul for orphan li', 'debug', LOGNAME);toProcess = '<ul>' + currentTag.fullTag + toProcess;continue;}if (previousTag.tagLowerCase === 'li') {// You aren't allowed to nest li tags. Close the current one before starting the new one.Y.log('Adding a closing ' + previousTag.tag + ' before opening a new one.', 'debug', LOGNAME);toProcess = '</' + previousTag.tag + '>' + currentTag.fullTag + toProcess;continue;}// Previous tag must be a list at this point, so we can continue.}// If we made it this far, record the tag to the open tags list.openTags.push({tag: currentTag.tag,tagLowerCase: currentTag.tagLowerCase,position: output.length,length: currentTag.fullTag.length});} else {// We are processing a closing tag.if (openTags.length == 0) {// We are closing a tag that isn't open. That's a problem. Just discarding should be safe.Y.log('Discarding extra ' + currentTag.fullTag + ' tag.', 'debug', LOGNAME);continue;}if (previousTag.tagLowerCase === currentTag.tagLowerCase) {// Closing a tag that matches the open tag. This is the nominal case. Pop it off, and update previousTag.if (currentTag.tag != previousTag.tag) {// This would mean cases don't match between the opening and closing tag.// We are going to swap them to match, even though not required.currentTag.fullTag = currentTag.fullTag.replace(currentTag.tag, previousTag.tag);}openTags.pop();previousTag = (openTags.length) ? openTags[openTags.length - 1] : null;} else {// We are closing a tag that isn't the most recent open one open, so we have a mismatch.if (currentTag.tagLowerCase === 'li' && previousTag.liEnd && (previousTag.liEnd < output.length)) {// We are closing an unopened li, but the parent list has complete li tags more than 0 chars ago.// Assume we are missing an open li at the end of the previous li, and insert there.Y.log('Inserting opening ' + currentTag.tag + ' after previous li.', 'debug', LOGNAME);output = this._insertString(output, '<' + currentTag.tag + '>', previousTag.liEnd);} else if (currentTag.tagLowerCase === 'li' && !previousTag.liEnd &&((previousTag.position + previousTag.length) < output.length)) {// We are closing an unopened li, and the parent has no previous lis in it, but opened more than 0// chars ago. Assume we are missing a starting li, and insert it right after the list opened.Y.log('Inserting opening ' + currentTag.tag + ' at start of parent.', 'debug', LOGNAME);output = this._insertString(output, '<' + currentTag.tag + '>', previousTag.position + previousTag.length);} else if (previousTag.tagLowerCase === 'li') {// We must be trying to close a ul/ol while in a li. Just assume we are missing a closing li.Y.log('Adding a closing ' + previousTag.tag + ' before closing ' + currentTag.tag + '.', 'debug', LOGNAME);toProcess = '</' + previousTag.tag + '>' + currentTag.fullTag + toProcess;continue;} else {// Here we must be trying to close a tag that isn't open, or is open higher up. Just discard.// If there ends up being a missing close tag later on, that will get fixed separately.Y.log('Discarding incorrect ' + currentTag.fullTag + '.', 'debug', LOGNAME);continue;}}// If we have a valid closing li tag, and a list, record where the li ended.if (currentTag.tagLowerCase === 'li' && previousTag) {previousTag.liEnd = output.length + currentTag.fullTag.length;}}// Now we can add the tag to the output.output += currentTag.fullTag;}// Add anything left in toProcess to the output.output += toProcess;// Anything still in the openTags list are extra and need to be dealt with.if (openTags.length) {// Work on the list in reverse order so positions stay correct.while ((currentTag = openTags.pop())) {if (currentTag.liEnd) {// We have a position for the last list item in this element. Insert the closing it after that.output = this._insertString(output, '</' + currentTag.tag + '>', currentTag.liEnd);Y.log('Adding closing ' + currentTag.tag + ' based on last li location.', 'debug', LOGNAME);} else {// If there weren't any children list items, then we should just remove the tag where it started.// This will also remote an open li tag that runs to the end of the content, since it has no children lis.output = output.slice(0, currentTag.position) + output.slice(currentTag.position + currentTag.length);Y.log('Removing opening ' + currentTag.fullTag + ' because it was missing closing.', 'debug', LOGNAME);}}}return output;},/*** Insert a string in the middle of an existing string at the specified location.** @method _insertString* @param {String} content The subject of the insertion.* @param {String} insert The string that will be inserted.* @param {Number} position The location to make the insertion.* @return {String} The string with the new content inserted.*/_insertString: function(content, insert, position) {return content.slice(0, position) + insert + content.slice(position);}};Y.Base.mix(Y.M.editor_atto.Editor, [EditorClean]);