@ckeditor/ckeditor5-paste-from-office - npm Package Compare versions

Comparing version 35.4.0 to 36.0.0

src/normalizer.js

build/paste-from-office.js

		/*!
		* @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
		* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
		* For licensing, see LICENSE.md.
		/(()=>{var e={945:(e,t,n)=>{e.exports=n(79)("./src/clipboard.js")},704:(e,t,n)=>{e.exports=n(79)("./src/core.js")},492:(e,t,n)=>{e.exports=n(79)("./src/engine.js")},79:e=>{"use strict";e.exports=CKEditor5.dll}},t={};function n(r){var s=t[r];if(void 0!==s)return s.exports;var i=t[r]={exports:{}};return e[r](i,i.exports,n),i.exports}n.d=(e,t)=>{for(var r in t)n.o(t,r)&&!n.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:t[r]})},n.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),n.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})};var r={};(()=>{"use strict";n.r(r),n.d(r,{PasteFromOffice:()=>v});var e=n(704),t=n(945),s=n(492);function i(e,t,n,{blockElements:r,inlineObjectElements:s}){let i=n.createPositionAt(e,"forward"==t?"after":"before");return i=i.getLastMatchingPosition((({item:e})=>e.is("element")&&!r.includes(e.name)&&!s.includes(e.name)),{direction:t}),"forward"==t?i.nodeAfter:i.nodeBefore}function o(e,t){return!!e&&e.is("element")&&t.includes(e.name)}function c(e,t){if(!e.childCount)return;const n=new s.UpcastWriter(e.document),r=function(e,t){const n=t.createRangeIn(e),r=new s.Matcher({name:/^p\|h\d+$/,styles:{"mso-list":/./}}),i=[];for(const e of n)if("elementStart"===e.type&&r.match(e.item)){const t=u(e.item);i.push({element:e.item,id:t.id,order:t.order,indent:t.indent})}return i}(e,n);if(!r.length)return;let i=null,o=1;r.forEach(((e,c)=>{const u=function(e,t){if(!e)return!0;if(e.id!==t.id)return t.indent-e.indent!=1;const n=t.element.previousSibling;if(!n)return!0;return r=n,!(r.is("element","ol")\|\|r.is("element","ul"));var r}(r[c-1],e),f=u?null:r[c-1],d=(p=e,(m=f)?p.indent-m.indent:p.indent-1);var m,p;if(u&&(i=null,o=1),!i\|\|0!==d){const r=function(e,t){const n=new RegExp(`@list l${e.id}:level${e.indent}\\s({[^}])`,"gi"),r=/mso-level-number-format:([^;]{0,100});/gi,s=/mso-level-start-at:\s{0,100}([0-9]{0,10})\s{0,100};/gi,i=n.exec(t);let o="decimal",c="ol",a=null;if(i&&i[1]){const t=r.exec(i[1]);if(t&&t[1]&&(o=t[1].trim(),c="bullet"!==o&&"image"!==o?"ol":"ul"),"bullet"===o){const t=function(e){const t=function(e){if(e.getChild(0).is("$text"))return null;for(const t of e.getChildren()){if(!t.is("element","span"))continue;const e=t.getChild(0);return e.is("$text")?e:e.getChild(0)}}(e);if(!t)return null;const n=t._data;if("o"===n)return"circle";if("·"===n)return"disc";if("§"===n)return"square";return null}(e.element);t&&(o=t)}else{const e=s.exec(i[1]);e&&e[1]&&(a=parseInt(e[1]))}}return{type:c,startIndex:a,style:l(o)}}(e,t);if(i){if(e.indent>o){const e=i.getChild(i.childCount-1),t=e.getChild(e.childCount-1);i=a(r,t,n),o+=1}else if(e.indent<o){const t=o-e.indent;i=function(e,t){const n=e.getAncestors({parentFirst:!0});let r=null,s=0;for(const e of n)if("ul"!==e.name&&"ol"!==e.name\|\|s++,s===t){r=e;break}return r}(i,t),o=parseInt(e.indent)}}else i=a(r,e.element,n);e.indent<=o&&(i.is("element",r.type)\|\|(i=n.rename(r.type,i)))}const g=function(e,t){return function(e,t){const n=new s.Matcher({name:"span",styles:{"mso-list":"Ignore"}}),r=t.createRangeIn(e);for(const e of r)"elementStart"===e.type&&n.match(e.item)&&t.remove(e.item)}(e,t),t.rename("li",e)}(e.element,n);n.appendChild(g,i)}))}function l(e){if(e.startsWith("arabic-leading-zero"))return"decimal-leading-zero";switch(e){case"alpha-upper":return"upper-alpha";case"alpha-lower":return"lower-alpha";case"roman-upper":return"upper-roman";case"roman-lower":return"lower-roman";case"circle":case"disc":case"square":return e;default:return null}}function a(e,t,n){const r=t.parent,s=n.createElement(e.type),i=r.getChildIndex(t)+1;return n.insertChild(i,s,r),e.style&&n.setStyle("list-style-type",e.style,s),e.startIndex&&e.startIndex>1&&n.setAttribute("start",e.startIndex,s),s}function u(e){const t={},n=e.getStyle("mso-list");if(n){const e=n.match(/(^\|\s{1,100})l(\d+)/i),r=n.match(/\s{0,100}lfo(\d+)/i),s=n.match(/\s{0,100}level(\d+)/i);e&&r&&s&&(t.id=e[2],t.order=r[1],t.indent=s[1])}return t}const f=/id=("\|')docs-internal-guid-[-0-9a-f]+("\|')/i;class d{constructor(e){this.document=e}isActive(e){return f.test(e)}execute(e){const t=new s.UpcastWriter(this.document),{body:n}=e._parsedData;!function(e,t){for(const n of e.getChildren())if(n.is("element","b")&&"normal"===n.getStyle("font-weight")){const r=e.getChildIndex(n);t.remove(n),t.insertChild(r,n.getChildren(),e)}}(n,t),function(e,t){for(const n of t.createRangeIn(e)){const e=n.item;if(e.is("element","li")){const n=e.getChild(0);n&&n.is("element","p")&&t.unwrapElement(n)}}}(n,t),function(e,t){const n=new s.ViewDocument(t.document.stylesProcessor),r=new s.DomConverter(n,{renderingMode:"data"}),c=r.blockElements,l=r.inlineObjectElements,a=[];for(const n of t.createRangeIn(e)){const e=n.item;if(e.is("element","br")){const n=i(e,"forward",t,{blockElements:c,inlineObjectElements:l}),r=i(e,"backward",t,{blockElements:c,inlineObjectElements:l}),s=o(n,c);(o(r,c)\|\|s)&&a.push(e)}}for(const e of a)e.hasClass("Apple-interchange-newline")?t.remove(e):t.replace(e,t.createElement("p"))}(n,t),e.content=n}}function m(e,t){if(!e.childCount)return;const n=new s.UpcastWriter,r=function(e,t){const n=t.createRangeIn(e),r=new s.Matcher({name:/v:(.+)/}),i=[];for(const e of n){if("elementStart"!=e.type)continue;const t=e.item,n=t.previousSibling&&t.previousSibling.name\|\|null;r.match(t)&&t.getAttribute("o:gfxdata")&&"v:shapetype"!==n&&i.push(e.item.getAttribute("id"))}return i}(e,n);!function(e,t,n){const r=n.createRangeIn(t),i=new s.Matcher({name:"img"}),o=[];for(const t of r)if(i.match(t.item)){const n=t.item,r=n.getAttribute("v:shapes")?n.getAttribute("v:shapes").split(" "):[];r.length&&r.every((t=>e.indexOf(t)>-1))?o.push(n):n.getAttribute("src")\|\|o.push(n)}for(const e of o)n.remove(e)}(r,e,n),function(e,t){const n=t.createRangeIn(e),r=new s.Matcher({name:/v:(.+)/}),i=[];for(const e of n)"elementStart"==e.type&&r.match(e.item)&&i.push(e.item);for(const e of i)t.remove(e)}(e,n);const i=function(e,t){const n=t.createRangeIn(e),r=new s.Matcher({name:"img"}),i=[];for(const e of n)r.match(e.item)&&e.item.getAttribute("src").startsWith("file://")&&i.push(e.item);return i}(e,n);i.length&&function(e,t,n){if(e.length===t.length)for(let r=0;r<e.length;r++){const s=`data:${t[r].type};base64,${p(t[r].hex)}`;n.setAttribute("src",s,e[r])}}(i,function(e){if(!e)return[];const t=/{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\\\blipuid\s?[\da-fA-F]+)?[\s}]?/,n=new RegExp("(?:("+t.source+"))([\\da-fA-F\\s]+)\\}","g"),r=e.match(n),s=[];if(r)for(const e of r){let n=!1;e.includes("\\pngblip")?n="image/png":e.includes("\\jpegblip")&&(n="image/jpeg"),n&&s.push({hex:e.replace(t,"").replace(/[^\da-fA-F]/g,""),type:n})}return s}(t),n)}function p(e){return btoa(e.match(/\w{2}/g).map((e=>String.fromCharCode(parseInt(e,16)))).join(""))}const g=/<meta\sname="?generator"?\scontent="?microsoft\sword\s\d+"?\/?>/i,h=/xmlns:o="urn:schemas-microsoft-com/i;class y{constructor(e){this.document=e}isActive(e){return g.test(e)\|\|h.test(e)}execute(e){const{body:t,stylesString:n}=e._parsedData;c(t,n),m(t,e.dataTransfer.getData("text/rtf")),e.content=t}}function b(e){return e.replace(/<span(?: class="Apple-converted-space"\|)>(\s+)<\/span>/g,((e,t)=>1===t.length?" ":Array(t.length+1).join(" ").substr(0,t.length)))}function w(e,t){const n=new DOMParser,r=function(e){return b(b(e)).replace(/(<span\s+style=['"]mso-spacerun:yes['"]>[^\S\r\n]?)[\r\n]+([^\S\r\n]<\/span>)/g,"$1$2").replace(/<span\s+style=['"]mso-spacerun:yes['"]><\/span>/g,"").replace(/ <\//g," </").replace(/ <o:p><\/o:p>/g," <o:p></o:p>").replace(/<o:p>( \|\u00A0)<\/o:p>/g,"").replace(/>([^\S\r\n][\r\n]\s)</g,"><")}(function(e){const t="</body>",n="</html>",r=e.indexOf(t);if(r<0)return e;const s=e.indexOf(n,r+t.length);return e.substring(0,r+t.length)+(s>=0?e.substring(s):"")}(e=e.replace(/<!--\[if gte vml 1]>/g,""))),i=n.parseFromString(r,"text/html");!function(e){e.querySelectorAll("span[style*=spacerun]").forEach((e=>{const t=e.innerText.length\|\|0;e.innerText=Array(t+1).join(" ").substr(0,t)}))}(i);const o=i.body.innerHTML,c=function(e,t){const n=new s.ViewDocument(t),r=new s.DomConverter(n,{renderingMode:"data"}),i=e.createDocumentFragment(),o=e.body.childNodes;for(;o.length>0;)i.appendChild(o[0]);return r.domToView(i,{skipComments:!0})}(i,t),l=function(e){const t=[],n=[],r=Array.from(e.getElementsByTagName("style"));for(const e of r)e.sheet&&e.sheet.cssRules&&e.sheet.cssRules.length&&(t.push(e.sheet),n.push(e.innerHTML));return{styles:t,stylesString:n.join(" ")}}(i);return{body:c,bodyString:o,styles:l.styles,stylesString:l.stylesString}}class v extends e.Plugin{static get pluginName(){return"PasteFromOffice"}static get requires(){return[t.ClipboardPipeline]}init(){const e=this.editor,t=e.editing.view.document,n=[];n.push(new y(t)),n.push(new d(t)),e.plugins.get("ClipboardPipeline").on("inputTransformation",((r,s)=>{if(s._isTransformedWithPasteFromOffice)return;if(e.model.document.selection.getFirstPosition().parent.is("element","codeBlock"))return;const i=s.dataTransfer.getData("text/html"),o=n.find((e=>e.isActive(i)));o&&(s._parsedData=w(i,t.stylesProcessor),o.execute(s),s._isTransformedWithPasteFromOffice=!0)}),{priority:"high"})}}})(),(window.CKEditor5=window.CKEditor5\|\|{}).pasteFromOffice=r})();
		/(()=>{var e={945:(e,t,n)=>{e.exports=n(79)("./src/clipboard.js")},704:(e,t,n)=>{e.exports=n(79)("./src/core.js")},492:(e,t,n)=>{e.exports=n(79)("./src/engine.js")},79:e=>{"use strict";e.exports=CKEditor5.dll}},t={};function n(r){var i=t[r];if(void 0!==i)return i.exports;var s=t[r]={exports:{}};return e[r](s,s.exports,n),s.exports}n.d=(e,t)=>{for(var r in t)n.o(t,r)&&!n.o(e,r)&&Object.defineProperty(e,r,{enumerable:!0,get:t[r]})},n.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),n.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})};var r={};(()=>{"use strict";n.r(r),n.d(r,{PasteFromOffice:()=>v});var e=n(704),t=n(945),i=n(492);function s(e,t,n,{blockElements:r,inlineObjectElements:i}){let s=n.createPositionAt(e,"forward"==t?"after":"before");return s=s.getLastMatchingPosition((({item:e})=>e.is("element")&&!r.includes(e.name)&&!i.includes(e.name)),{direction:t}),"forward"==t?s.nodeAfter:s.nodeBefore}function o(e,t){return!!e&&e.is("element")&&t.includes(e.name)}function c(e,t){if(!e.childCount)return;const n=new i.UpcastWriter(e.document),r=function(e,t){const n=t.createRangeIn(e),r=new i.Matcher({name:/^p\|h\d+$/,styles:{"mso-list":/./}}),s=[];for(const e of n)if("elementStart"===e.type&&r.match(e.item)){const t=u(e.item);s.push({element:e.item,id:t.id,order:t.order,indent:t.indent})}return s}(e,n);if(!r.length)return;let s=null,o=1;r.forEach(((e,c)=>{const u=function(e,t){if(!e)return!0;if(e.id!==t.id)return t.indent-e.indent!=1;const n=t.element.previousSibling;if(!n)return!0;return r=n,!(r.is("element","ol")\|\|r.is("element","ul"));var r}(r[c-1],e),f=u?null:r[c-1],m=(p=e,(d=f)?p.indent-d.indent:p.indent-1);var d,p;if(u&&(s=null,o=1),!s\|\|0!==m){const r=function(e,t){const n=new RegExp(`@list l${e.id}:level${e.indent}\\s({[^}])`,"gi"),r=/mso-level-number-format:([^;]{0,100});/gi,i=/mso-level-start-at:\s{0,100}([0-9]{0,10})\s{0,100};/gi,s=n.exec(t);let o="decimal",c="ol",a=null;if(s&&s[1]){const t=r.exec(s[1]);if(t&&t[1]&&(o=t[1].trim(),c="bullet"!==o&&"image"!==o?"ol":"ul"),"bullet"===o){const t=function(e){const t=function(e){if(e.getChild(0).is("$text"))return null;for(const t of e.getChildren()){if(!t.is("element","span"))continue;const e=t.getChild(0);return e.is("$text")?e:e.getChild(0)}return null}(e);if(!t)return null;const n=t._data;if("o"===n)return"circle";if("·"===n)return"disc";if("§"===n)return"square";return null}(e.element);t&&(o=t)}else{const e=i.exec(s[1]);e&&e[1]&&(a=parseInt(e[1]))}}return{type:c,startIndex:a,style:l(o)}}(e,t);if(s){if(e.indent>o){const e=s.getChild(s.childCount-1),t=e.getChild(e.childCount-1);s=a(r,t,n),o+=1}else if(e.indent<o){const t=o-e.indent;s=function(e,t){const n=e.getAncestors({parentFirst:!0});let r=null,i=0;for(const e of n)if((e.is("element","ul")\|\|e.is("element","ol"))&&i++,i===t){r=e;break}return r}(s,t),o=e.indent}}else s=a(r,e.element,n);e.indent<=o&&(s.is("element",r.type)\|\|(s=n.rename(r.type,s)))}const g=function(e,t){return function(e,t){const n=new i.Matcher({name:"span",styles:{"mso-list":"Ignore"}}),r=t.createRangeIn(e);for(const e of r)"elementStart"===e.type&&n.match(e.item)&&t.remove(e.item)}(e,t),t.rename("li",e)}(e.element,n);n.appendChild(g,s)}))}function l(e){if(e.startsWith("arabic-leading-zero"))return"decimal-leading-zero";switch(e){case"alpha-upper":return"upper-alpha";case"alpha-lower":return"lower-alpha";case"roman-upper":return"upper-roman";case"roman-lower":return"lower-roman";case"circle":case"disc":case"square":return e;default:return null}}function a(e,t,n){const r=t.parent,i=n.createElement(e.type),s=r.getChildIndex(t)+1;return n.insertChild(s,i,r),e.style&&n.setStyle("list-style-type",e.style,i),e.startIndex&&e.startIndex>1&&n.setAttribute("start",e.startIndex,i),i}function u(e){const t={},n=e.getStyle("mso-list");if(n){const e=n.match(/(^\|\s{1,100})l(\d+)/i),r=n.match(/\s{0,100}lfo(\d+)/i),i=n.match(/\s{0,100}level(\d+)/i);e&&r&&i&&(t.id=e[2],t.order=r[1],t.indent=parseInt(i[1]))}return t}const f=/id=("\|')docs-internal-guid-[-0-9a-f]+("\|')/i;class m{constructor(e){this.document=e}isActive(e){return f.test(e)}execute(e){const t=new i.UpcastWriter(this.document),{body:n}=e._parsedData;!function(e,t){for(const n of e.getChildren())if(n.is("element","b")&&"normal"===n.getStyle("font-weight")){const r=e.getChildIndex(n);t.remove(n),t.insertChild(r,n.getChildren(),e)}}(n,t),function(e,t){for(const n of t.createRangeIn(e)){const e=n.item;if(e.is("element","li")){const n=e.getChild(0);n&&n.is("element","p")&&t.unwrapElement(n)}}}(n,t),function(e,t){const n=new i.ViewDocument(t.document.stylesProcessor),r=new i.DomConverter(n,{renderingMode:"data"}),c=r.blockElements,l=r.inlineObjectElements,a=[];for(const n of t.createRangeIn(e)){const e=n.item;if(e.is("element","br")){const n=s(e,"forward",t,{blockElements:c,inlineObjectElements:l}),r=s(e,"backward",t,{blockElements:c,inlineObjectElements:l}),i=o(n,c);(o(r,c)\|\|i)&&a.push(e)}}for(const e of a)e.hasClass("Apple-interchange-newline")?t.remove(e):t.replace(e,t.createElement("p"))}(n,t),e.content=n}}function d(e,t){if(!e.childCount)return;const n=new i.UpcastWriter(e.document),r=function(e,t){const n=t.createRangeIn(e),r=new i.Matcher({name:/v:(.+)/}),s=[];for(const e of n){if("elementStart"!=e.type)continue;const t=e.item,n=t.previousSibling,i=n&&n.is("element")?n.name:null;r.match(t)&&t.getAttribute("o:gfxdata")&&"v:shapetype"!==i&&s.push(e.item.getAttribute("id"))}return s}(e,n);!function(e,t,n){const r=n.createRangeIn(t),s=new i.Matcher({name:"img"}),o=[];for(const t of r)if(t.item.is("element")&&s.match(t.item)){const n=t.item,r=n.getAttribute("v:shapes")?n.getAttribute("v:shapes").split(" "):[];r.length&&r.every((t=>e.indexOf(t)>-1))?o.push(n):n.getAttribute("src")\|\|o.push(n)}for(const e of o)n.remove(e)}(r,e,n),function(e,t,n){const r=n.createRangeIn(t),i=[];for(const t of r)if("elementStart"==t.type&&t.item.is("element","v:shape")){const n=t.item.getAttribute("id");if(e.includes(n))continue;s(t.item.parent.getChildren(),n)\|\|i.push(t.item)}for(const e of i){const t={src:o(e)};e.hasAttribute("alt")&&(t.alt=e.getAttribute("alt"));const r=n.createElement("img",t);n.insertChild(e.index+1,r,e.parent)}function s(e,t){for(const n of e)if(n.is("element")){if("img"==n.name&&n.getAttribute("v:shapes")==t)return!0;if(s(n.getChildren(),t))return!0}return!1}function o(e){for(const t of e.getChildren())if(t.is("element")&&t.getAttribute("src"))return t.getAttribute("src")}}(r,e,n),function(e,t){const n=t.createRangeIn(e),r=new i.Matcher({name:/v:(.+)/}),s=[];for(const e of n)"elementStart"==e.type&&r.match(e.item)&&s.push(e.item);for(const e of s)t.remove(e)}(e,n);const s=function(e,t){const n=t.createRangeIn(e),r=new i.Matcher({name:"img"}),s=[];for(const e of n)e.item.is("element")&&r.match(e.item)&&e.item.getAttribute("src").startsWith("file://")&&s.push(e.item);return s}(e,n);s.length&&function(e,t,n){if(e.length===t.length)for(let r=0;r<e.length;r++){const i=`data:${t[r].type};base64,${p(t[r].hex)}`;n.setAttribute("src",i,e[r])}}(s,function(e){if(!e)return[];const t=/{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\\\blipuid\s?[\da-fA-F]+)?[\s}]?/,n=new RegExp("(?:("+t.source+"))([\\da-fA-F\\s]+)\\}","g"),r=e.match(n),i=[];if(r)for(const e of r){let n=!1;e.includes("\\pngblip")?n="image/png":e.includes("\\jpegblip")&&(n="image/jpeg"),n&&i.push({hex:e.replace(t,"").replace(/[^\da-fA-F]/g,""),type:n})}return i}(t),n)}function p(e){return btoa(e.match(/\w{2}/g).map((e=>String.fromCharCode(parseInt(e,16)))).join(""))}const g=/<meta\sname="?generator"?\scontent="?microsoft\sword\s\d+"?\/?>/i,h=/xmlns:o="urn:schemas-microsoft-com/i;class b{constructor(e){this.document=e}isActive(e){return g.test(e)\|\|h.test(e)}execute(e){const{body:t,stylesString:n}=e._parsedData;c(t,n),d(t,e.dataTransfer.getData("text/rtf")),e.content=t}}function y(e){return e.replace(/<span(?: class="Apple-converted-space"\|)>(\s+)<\/span>/g,((e,t)=>1===t.length?" ":Array(t.length+1).join(" ").substr(0,t.length)))}function w(e,t){const n=new DOMParser,r=function(e){return y(y(e)).replace(/(<span\s+style=['"]mso-spacerun:yes['"]>[^\S\r\n]?)[\r\n]+([^\S\r\n]<\/span>)/g,"$1$2").replace(/<span\s+style=['"]mso-spacerun:yes['"]><\/span>/g,"").replace(/ <\//g," </").replace(/ <o:p><\/o:p>/g," <o:p></o:p>").replace(/<o:p>( \|\u00A0)<\/o:p>/g,"").replace(/>([^\S\r\n][\r\n]\s)</g,"><")}(function(e){const t="</body>",n="</html>",r=e.indexOf(t);if(r<0)return e;const i=e.indexOf(n,r+t.length);return e.substring(0,r+t.length)+(i>=0?e.substring(i):"")}(e=e.replace(/<!--\[if gte vml 1]>/g,""))),s=n.parseFromString(r,"text/html");!function(e){e.querySelectorAll("span[style*=spacerun]").forEach((e=>{const t=e,n=t.innerText.length\|\|0;t.innerText=Array(n+1).join(" ").substr(0,n)}))}(s);const o=s.body.innerHTML,c=function(e,t){const n=new i.ViewDocument(t),r=new i.DomConverter(n,{renderingMode:"data"}),s=e.createDocumentFragment(),o=e.body.childNodes;for(;o.length>0;)s.appendChild(o[0]);return r.domToView(s,{skipComments:!0})}(s,t),l=function(e){const t=[],n=[],r=Array.from(e.getElementsByTagName("style"));for(const e of r)e.sheet&&e.sheet.cssRules&&e.sheet.cssRules.length&&(t.push(e.sheet),n.push(e.innerHTML));return{styles:t,stylesString:n.join(" ")}}(s);return{body:c,bodyString:o,styles:l.styles,stylesString:l.stylesString}}class v extends e.Plugin{static get pluginName(){return"PasteFromOffice"}static get requires(){return[t.ClipboardPipeline]}init(){const e=this.editor,t=e.editing.view.document,n=[];n.push(new b(t)),n.push(new m(t)),e.plugins.get("ClipboardPipeline").on("inputTransformation",((r,i)=>{if(i._isTransformedWithPasteFromOffice)return;if(e.model.document.selection.getFirstPosition().parent.is("element","codeBlock"))return;const s=i.dataTransfer.getData("text/html"),o=n.find((e=>e.isActive(s)));o&&(i._parsedData=w(s,t.stylesProcessor),o.execute(i),i._isTransformedWithPasteFromOffice=!0)}),{priority:"high"})}}})(),(window.CKEditor5=window.CKEditor5\|\|{}).pasteFromOffice=r})();

LICENSE.md

		@@ -5,3 +5,3 @@ Software License Agreement
		CKEditor 5 paste from Office feature – https://github.com/ckeditor/ckeditor5-paste-from-office <br>
		Copyright (c) 2003-2022, [CKSource Holding sp. z o.o.](https://cksource.com) All rights reserved.
		Copyright (c) 2003-2023, [CKSource Holding sp. z o.o.](https://cksource.com) All rights reserved.

		@@ -8,0 +8,0 @@ Licensed under the terms of [GNU General Public License Version 2 or later](http://www.gnu.org/licenses/gpl.html).

package.json

		{
		"name": "@ckeditor/ckeditor5-paste-from-office",
		"version": "35.4.0",
		"version": "36.0.0",
		"description": "Paste from Office feature for CKEditor 5.",
		@@ -15,25 +15,26 @@ "keywords": [
		"dependencies": {
		"ckeditor5": "^35.4.0"
		"ckeditor5": "^36.0.0"
		},
		"devDependencies": {
		"@ckeditor/ckeditor5-basic-styles": "^35.4.0",
		"@ckeditor/ckeditor5-clipboard": "^35.4.0",
		"@ckeditor/ckeditor5-cloud-services": "^35.4.0",
		"@ckeditor/ckeditor5-code-block": "^35.4.0",
		"@ckeditor/ckeditor5-core": "^35.4.0",
		"@ckeditor/ckeditor5-dev-utils": "^31.0.0",
		"@ckeditor/ckeditor5-easy-image": "^35.4.0",
		"@ckeditor/ckeditor5-engine": "^35.4.0",
		"@ckeditor/ckeditor5-editor-classic": "^35.4.0",
		"@ckeditor/ckeditor5-enter": "^35.4.0",
		"@ckeditor/ckeditor5-font": "^35.4.0",
		"@ckeditor/ckeditor5-heading": "^35.4.0",
		"@ckeditor/ckeditor5-image": "^35.4.0",
		"@ckeditor/ckeditor5-link": "^35.4.0",
		"@ckeditor/ckeditor5-list": "^35.4.0",
		"@ckeditor/ckeditor5-page-break": "^35.4.0",
		"@ckeditor/ckeditor5-paragraph": "^35.4.0",
		"@ckeditor/ckeditor5-table": "^35.4.0",
		"@ckeditor/ckeditor5-theme-lark": "^35.4.0",
		"@ckeditor/ckeditor5-utils": "^35.4.0",
		"@ckeditor/ckeditor5-basic-styles": "^36.0.0",
		"@ckeditor/ckeditor5-clipboard": "^36.0.0",
		"@ckeditor/ckeditor5-cloud-services": "^36.0.0",
		"@ckeditor/ckeditor5-code-block": "^36.0.0",
		"@ckeditor/ckeditor5-core": "^36.0.0",
		"@ckeditor/ckeditor5-dev-utils": "^32.0.0",
		"@ckeditor/ckeditor5-easy-image": "^36.0.0",
		"@ckeditor/ckeditor5-engine": "^36.0.0",
		"@ckeditor/ckeditor5-editor-classic": "^36.0.0",
		"@ckeditor/ckeditor5-enter": "^36.0.0",
		"@ckeditor/ckeditor5-font": "^36.0.0",
		"@ckeditor/ckeditor5-heading": "^36.0.0",
		"@ckeditor/ckeditor5-image": "^36.0.0",
		"@ckeditor/ckeditor5-link": "^36.0.0",
		"@ckeditor/ckeditor5-list": "^36.0.0",
		"@ckeditor/ckeditor5-page-break": "^36.0.0",
		"@ckeditor/ckeditor5-paragraph": "^36.0.0",
		"@ckeditor/ckeditor5-table": "^36.0.0",
		"@ckeditor/ckeditor5-theme-lark": "^36.0.0",
		"@ckeditor/ckeditor5-utils": "^36.0.0",
		"typescript": "^4.8.4",
		"webpack": "^5.58.1",
		@@ -57,3 +58,4 @@ "webpack-cli": "^4.9.0"
		"lang",
		"src",
		"src/*/.js",
		"src/*/.d.ts",
		"theme",
		@@ -65,4 +67,6 @@ "build",
		"scripts": {
		"dll:build": "webpack"
		"dll:build": "webpack",
		"build": "tsc -p ./tsconfig.release.json",
		"postversion": "npm run build"
		}
		}

114

src/filters/br.js

		/**
		* @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
		* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
		* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
		*/

		/**
		* @module paste-from-office/filters/br
		*/

		import { DomConverter, ViewDocument } from 'ckeditor5/src/engine';

		/**
		* Transforms `<br>` elements that are siblings to some block element into a paragraphs.
		*
		* @param {module:engine/view/documentfragment~DocumentFragment} documentFragment The view structure to be transformed.
		* @param {module:engine/view/upcastwriter~UpcastWriter} writer
		* @param documentFragment The view structure to be transformed.
		*/
		export default function transformBlockBrsToParagraphs( documentFragment, writer ) {
		const viewDocument = new ViewDocument( writer.document.stylesProcessor );
		const domConverter = new DomConverter( viewDocument, { renderingMode: 'data' } );

		const blockElements = domConverter.blockElements;
		const inlineObjectElements = domConverter.inlineObjectElements;

		const elementsToReplace = [];

		for ( const value of writer.createRangeIn( documentFragment ) ) {
		const element = value.item;

		if ( element.is( 'element', 'br' ) ) {
		const nextSibling = findSibling( element, 'forward', writer, { blockElements, inlineObjectElements } );
		const previousSibling = findSibling( element, 'backward', writer, { blockElements, inlineObjectElements } );

		const nextSiblingIsBlock = isBlockViewElement( nextSibling, blockElements );
		const previousSiblingIsBlock = isBlockViewElement( previousSibling, blockElements );

		// If the <br> is surrounded by blocks then convert it to a paragraph:
		// * <p>foo</p>[<br>]<p>bar</p> -> <p>foo</p>[<p></p>]<p>bar</p>
		// * <p>foo</p>[<br>] -> <p>foo</p>[<p></p>]
		// * [<br>]<p>foo</p> -> [<p></p>]<p>foo</p>
		if ( previousSiblingIsBlock \|\| nextSiblingIsBlock ) {
		elementsToReplace.push( element );
		}
		}
		}

		for ( const element of elementsToReplace ) {
		if ( element.hasClass( 'Apple-interchange-newline' ) ) {
		writer.remove( element );
		} else {
		writer.replace( element, writer.createElement( 'p' ) );
		}
		}
		export default function transformBlockBrsToParagraphs(documentFragment, writer) {
		const viewDocument = new ViewDocument(writer.document.stylesProcessor);
		const domConverter = new DomConverter(viewDocument, { renderingMode: 'data' });
		const blockElements = domConverter.blockElements;
		const inlineObjectElements = domConverter.inlineObjectElements;
		const elementsToReplace = [];
		for (const value of writer.createRangeIn(documentFragment)) {
		const element = value.item;
		if (element.is('element', 'br')) {
		const nextSibling = findSibling(element, 'forward', writer, { blockElements, inlineObjectElements });
		const previousSibling = findSibling(element, 'backward', writer, { blockElements, inlineObjectElements });
		const nextSiblingIsBlock = isBlockViewElement(nextSibling, blockElements);
		const previousSiblingIsBlock = isBlockViewElement(previousSibling, blockElements);
		// If the <br> is surrounded by blocks then convert it to a paragraph:
		// * <p>foo</p>[<br>]<p>bar</p> -> <p>foo</p>[<p></p>]<p>bar</p>
		// * <p>foo</p>[<br>] -> <p>foo</p>[<p></p>]
		// * [<br>]<p>foo</p> -> [<p></p>]<p>foo</p>
		if (previousSiblingIsBlock \|\| nextSiblingIsBlock) {
		elementsToReplace.push(element);
		}
		}
		}
		for (const element of elementsToReplace) {
		if (element.hasClass('Apple-interchange-newline')) {
		writer.remove(element);
		}
		else {
		writer.replace(element, writer.createElement('p'));
		}
		}
		}

		// Returns sibling node, threats inline elements as transparent (but should stop on an inline objects).
		function findSibling( viewElement, direction, writer, { blockElements, inlineObjectElements } ) {
		let position = writer.createPositionAt( viewElement, direction == 'forward' ? 'after' : 'before' );

		// Find first position that is just before a first:
		// * text node,
		// * block element,
		// * inline object element.
		// It's ignoring any inline (non-object) elements like span, strong, etc.
		position = position.getLastMatchingPosition( ( { item } ) => (
		item.is( 'element' ) &&
		!blockElements.includes( item.name ) &&
		!inlineObjectElements.includes( item.name )
		), { direction } );

		return direction == 'forward' ? position.nodeAfter : position.nodeBefore;
		/**
		* Returns sibling node, threats inline elements as transparent (but should stop on an inline objects).
		*/
		function findSibling(viewElement, direction, writer, { blockElements, inlineObjectElements }) {
		let position = writer.createPositionAt(viewElement, direction == 'forward' ? 'after' : 'before');
		// Find first position that is just before a first:
		// * text node,
		// * block element,
		// * inline object element.
		// It's ignoring any inline (non-object) elements like span, strong, etc.
		position = position.getLastMatchingPosition(({ item }) => (item.is('element') &&
		!blockElements.includes(item.name) &&
		!inlineObjectElements.includes(item.name)), { direction });
		return direction == 'forward' ? position.nodeAfter : position.nodeBefore;
		}

		// Returns true for view elements that are listed as block view elements.
		function isBlockViewElement( node, blockElements ) {
		return !!node && node.is( 'element' ) && blockElements.includes( node.name );
		/**
		* Returns true for view elements that are listed as block view elements.
		*/
		function isBlockViewElement(node, blockElements) {
		return !!node && node.is('element') && blockElements.includes(node.name);
		}

413

src/filters/image.js

		/**
		* @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
		* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
		* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
		*/

		/**
		* @module paste-from-office/filters/image
		*/

		/* globals btoa */

		import { Matcher, UpcastWriter } from 'ckeditor5/src/engine';

		/**
		@@ -18,206 +14,229 @@ * Replaces source attribute of all `<img>` elements representing regular
		*
		* @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment on which transform images.
		* @param {String} rtfData The RTF data from which images representation will be used.
		* @param documentFragment Document fragment on which transform images.
		* @param rtfData The RTF data from which images representation will be used.
		*/
		export function replaceImagesSourceWithBase64( documentFragment, rtfData ) {
		if ( !documentFragment.childCount ) {
		return;
		}

		const upcastWriter = new UpcastWriter();
		const shapesIds = findAllShapesIds( documentFragment, upcastWriter );

		removeAllImgElementsRepresentingShapes( shapesIds, documentFragment, upcastWriter );
		removeAllShapeElements( documentFragment, upcastWriter );

		const images = findAllImageElementsWithLocalSource( documentFragment, upcastWriter );

		if ( images.length ) {
		replaceImagesFileSourceWithInlineRepresentation( images, extractImageDataFromRtf( rtfData ), upcastWriter );
		}
		export function replaceImagesSourceWithBase64(documentFragment, rtfData) {
		if (!documentFragment.childCount) {
		return;
		}
		const upcastWriter = new UpcastWriter(documentFragment.document);
		const shapesIds = findAllShapesIds(documentFragment, upcastWriter);
		removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, upcastWriter);
		insertMissingImgs(shapesIds, documentFragment, upcastWriter);
		removeAllShapeElements(documentFragment, upcastWriter);
		const images = findAllImageElementsWithLocalSource(documentFragment, upcastWriter);
		if (images.length) {
		replaceImagesFileSourceWithInlineRepresentation(images, extractImageDataFromRtf(rtfData), upcastWriter);
		}
		}

		/**
		* Converts given HEX string to base64 representation.
		*
		* @protected
		* @param {String} hexString The HEX string to be converted.
		* @returns {String} Base64 representation of a given HEX string.
		* @internal
		* @param hexString The HEX string to be converted.
		* @returns Base64 representation of a given HEX string.
		*/
		export function _convertHexToBase64( hexString ) {
		return btoa( hexString.match( /\w{2}/g ).map( char => {
		return String.fromCharCode( parseInt( char, 16 ) );
		} ).join( '' ) );
		export function _convertHexToBase64(hexString) {
		return btoa(hexString.match(/\w{2}/g).map(char => {
		return String.fromCharCode(parseInt(char, 16));
		}).join(''));
		}

		// Finds all shapes (`<v:>...</v:>`) ids. Shapes can represent images (canvas)
		// or Word shapes (which does not have RTF or Blob representation).
		//
		// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment
		// from which to extract shape ids.
		// @param {module:engine/view/upcastwriter~UpcastWriter} writer
		// @returns {Array.<String>} Array of shape ids.
		function findAllShapesIds( documentFragment, writer ) {
		const range = writer.createRangeIn( documentFragment );

		const shapeElementsMatcher = new Matcher( {
		name: /v:(.+)/
		} );

		const shapesIds = [];

		for ( const value of range ) {
		if ( value.type != 'elementStart' ) {
		continue;
		}

		const el = value.item;
		const prevSiblingName = el.previousSibling && el.previousSibling.name \|\| null;

		// If shape element have 'o:gfxdata' attribute and is not directly before `<v:shapetype>` element it means it represent Word shape.
		if ( shapeElementsMatcher.match( el ) && el.getAttribute( 'o:gfxdata' ) && prevSiblingName !== 'v:shapetype' ) {
		shapesIds.push( value.item.getAttribute( 'id' ) );
		}
		}

		return shapesIds;
		/**
		* Finds all shapes (`<v:>...</v:>`) ids. Shapes can represent images (canvas)
		* or Word shapes (which does not have RTF or Blob representation).
		*
		* @param documentFragment Document fragment from which to extract shape ids.
		* @returns Array of shape ids.
		*/
		function findAllShapesIds(documentFragment, writer) {
		const range = writer.createRangeIn(documentFragment);
		const shapeElementsMatcher = new Matcher({
		name: /v:(.+)/
		});
		const shapesIds = [];
		for (const value of range) {
		if (value.type != 'elementStart') {
		continue;
		}
		const el = value.item;
		const previousSibling = el.previousSibling;
		const prevSiblingName = previousSibling && previousSibling.is('element') ? previousSibling.name : null;
		// If shape element have 'o:gfxdata' attribute and is not directly before `<v:shapetype>` element it means it represent Word shape.
		if (shapeElementsMatcher.match(el) && el.getAttribute('o:gfxdata') && prevSiblingName !== 'v:shapetype') {
		shapesIds.push(value.item.getAttribute('id'));
		}
		}
		return shapesIds;
		}

		// Removes all `<img>` elements which represents Word shapes and not regular images.
		//
		// @param {Array.<String>} shapesIds Shape ids which will be checked against `<img>` elements.
		// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment from which to remove `<img>` elements.
		// @param {module:engine/view/upcastwriter~UpcastWriter} writer
		function removeAllImgElementsRepresentingShapes( shapesIds, documentFragment, writer ) {
		const range = writer.createRangeIn( documentFragment );

		const imageElementsMatcher = new Matcher( {
		name: 'img'
		} );

		const imgs = [];

		for ( const value of range ) {
		if ( imageElementsMatcher.match( value.item ) ) {
		const el = value.item;
		const shapes = el.getAttribute( 'v:shapes' ) ? el.getAttribute( 'v:shapes' ).split( ' ' ) : [];

		if ( shapes.length && shapes.every( shape => shapesIds.indexOf( shape ) > -1 ) ) {
		imgs.push( el );
		// Shapes may also have empty source while content is paste in some browsers (Safari).
		} else if ( !el.getAttribute( 'src' ) ) {
		imgs.push( el );
		}
		}
		}

		for ( const img of imgs ) {
		writer.remove( img );
		}
		/**
		* Removes all `<img>` elements which represents Word shapes and not regular images.
		*
		* @param shapesIds Shape ids which will be checked against `<img>` elements.
		* @param documentFragment Document fragment from which to remove `<img>` elements.
		*/
		function removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, writer) {
		const range = writer.createRangeIn(documentFragment);
		const imageElementsMatcher = new Matcher({
		name: 'img'
		});
		const imgs = [];
		for (const value of range) {
		if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
		const el = value.item;
		const shapes = el.getAttribute('v:shapes') ? el.getAttribute('v:shapes').split(' ') : [];
		if (shapes.length && shapes.every(shape => shapesIds.indexOf(shape) > -1)) {
		imgs.push(el);
		// Shapes may also have empty source while content is paste in some browsers (Safari).
		}
		else if (!el.getAttribute('src')) {
		imgs.push(el);
		}
		}
		}
		for (const img of imgs) {
		writer.remove(img);
		}
		}

		// Removes all shape elements (`<v:>...</v:>`) so they do not pollute the output structure.
		//
		// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment from which to remove shape elements.
		// @param {module:engine/view/upcastwriter~UpcastWriter} writer
		function removeAllShapeElements( documentFragment, writer ) {
		const range = writer.createRangeIn( documentFragment );

		const shapeElementsMatcher = new Matcher( {
		name: /v:(.+)/
		} );

		const shapes = [];

		for ( const value of range ) {
		if ( value.type == 'elementStart' && shapeElementsMatcher.match( value.item ) ) {
		shapes.push( value.item );
		}
		}

		for ( const shape of shapes ) {
		writer.remove( shape );
		}
		/**
		* Removes all shape elements (`<v:>...</v:>`) so they do not pollute the output structure.
		*
		* @param documentFragment Document fragment from which to remove shape elements.
		*/
		function removeAllShapeElements(documentFragment, writer) {
		const range = writer.createRangeIn(documentFragment);
		const shapeElementsMatcher = new Matcher({
		name: /v:(.+)/
		});
		const shapes = [];
		for (const value of range) {
		if (value.type == 'elementStart' && shapeElementsMatcher.match(value.item)) {
		shapes.push(value.item);
		}
		}
		for (const shape of shapes) {
		writer.remove(shape);
		}
		}

		// Finds all `<img>` elements in a given document fragment which have source pointing to local `file://` resource.
		//
		// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment in which to look for `<img>` elements.
		// @param {module:engine/view/upcastwriter~UpcastWriter} writer
		// @returns {Object} result All found images grouped by source type.
		// @returns {Array.<module:engine/view/element~Element>} result.file Array of found `<img>` elements with `file://` source.
		// @returns {Array.<module:engine/view/element~Element>} result.blob Array of found `<img>` elements with `blob:` source.
		function findAllImageElementsWithLocalSource( documentFragment, writer ) {
		const range = writer.createRangeIn( documentFragment );

		const imageElementsMatcher = new Matcher( {
		name: 'img'
		} );

		const imgs = [];

		for ( const value of range ) {
		if ( imageElementsMatcher.match( value.item ) ) {
		if ( value.item.getAttribute( 'src' ).startsWith( 'file://' ) ) {
		imgs.push( value.item );
		}
		}
		}

		return imgs;
		/**
		* Inserts `img` tags if there is none after a shape.
		*/
		function insertMissingImgs(shapeIds, documentFragment, writer) {
		const range = writer.createRangeIn(documentFragment);
		const shapes = [];
		for (const value of range) {
		if (value.type == 'elementStart' && value.item.is('element', 'v:shape')) {
		const id = value.item.getAttribute('id');
		if (shapeIds.includes(id)) {
		continue;
		}
		if (!containsMatchingImg(value.item.parent.getChildren(), id)) {
		shapes.push(value.item);
		}
		}
		}
		for (const shape of shapes) {
		const attrs = {
		src: findSrc(shape)
		};
		if (shape.hasAttribute('alt')) {
		attrs.alt = shape.getAttribute('alt');
		}
		const img = writer.createElement('img', attrs);
		writer.insertChild(shape.index + 1, img, shape.parent);
		}
		function containsMatchingImg(nodes, id) {
		for (const node of nodes) {
		/* istanbul ignore else */
		if (node.is('element')) {
		if (node.name == 'img' && node.getAttribute('v:shapes') == id) {
		return true;
		}
		if (containsMatchingImg(node.getChildren(), id)) {
		return true;
		}
		}
		}
		return false;
		}
		function findSrc(shape) {
		for (const child of shape.getChildren()) {
		/* istanbul ignore else */
		if (child.is('element') && child.getAttribute('src')) {
		return child.getAttribute('src');
		}
		}
		}
		}

		// Extracts all images HEX representations from a given RTF data.
		//
		// @param {String} rtfData The RTF data from which to extract images HEX representation.
		// @returns {Array.<Object>} Array of found HEX representations. Each array item is an object containing:
		//
		// * {String} hex Image representation in HEX format.
		// * {string} type Type of image, `image/png` or `image/jpeg`.
		function extractImageDataFromRtf( rtfData ) {
		if ( !rtfData ) {
		return [];
		}

		const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\\\blipuid\s?[\da-fA-F]+)?[\s}]?/;
		const regexPicture = new RegExp( '(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g' );
		const images = rtfData.match( regexPicture );
		const result = [];

		if ( images ) {
		for ( const image of images ) {
		let imageType = false;

		if ( image.includes( '\\pngblip' ) ) {
		imageType = 'image/png';
		} else if ( image.includes( '\\jpegblip' ) ) {
		imageType = 'image/jpeg';
		}

		if ( imageType ) {
		result.push( {
		hex: image.replace( regexPictureHeader, '' ).replace( /[^\da-fA-F]/g, '' ),
		type: imageType
		} );
		}
		}
		}

		return result;
		/**
		* Finds all `<img>` elements in a given document fragment which have source pointing to local `file://` resource.
		*
		* @param documentFragment Document fragment in which to look for `<img>` elements.
		* @returns result All found images grouped by source type.
		*/
		function findAllImageElementsWithLocalSource(documentFragment, writer) {
		const range = writer.createRangeIn(documentFragment);
		const imageElementsMatcher = new Matcher({
		name: 'img'
		});
		const imgs = [];
		for (const value of range) {
		if (value.item.is('element') && imageElementsMatcher.match(value.item)) {
		if (value.item.getAttribute('src').startsWith('file://')) {
		imgs.push(value.item);
		}
		}
		}
		return imgs;
		}

		// Replaces `src` attribute value of all given images with the corresponding base64 image representation.
		//
		// @param {Array.<module:engine/view/element~Element>} imageElements Array of image elements which will have its source replaced.
		// @param {Array.<Object>} imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function).
		// The array should be the same length as `imageElements` parameter.
		// @param {module:engine/view/upcastwriter~UpcastWriter} writer
		function replaceImagesFileSourceWithInlineRepresentation( imageElements, imagesHexSources, writer ) {
		// Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order.
		if ( imageElements.length === imagesHexSources.length ) {
		for ( let i = 0; i < imageElements.length; i++ ) {
		const newSrc = `data:${ imagesHexSources[ i ].type };base64,${ _convertHexToBase64( imagesHexSources[ i ].hex ) }`;
		writer.setAttribute( 'src', newSrc, imageElements[ i ] );
		}
		}
		/**
		* Extracts all images HEX representations from a given RTF data.
		*
		* @param rtfData The RTF data from which to extract images HEX representation.
		* @returns Array of found HEX representations. Each array item is an object containing:
		*
		* * hex Image representation in HEX format.
		* * type Type of image, `image/png` or `image/jpeg`.
		*/
		function extractImageDataFromRtf(rtfData) {
		if (!rtfData) {
		return [];
		}
		const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\\\blipuid\s?[\da-fA-F]+)?[\s}]?/;
		const regexPicture = new RegExp('(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g');
		const images = rtfData.match(regexPicture);
		const result = [];
		if (images) {
		for (const image of images) {
		let imageType = false;
		if (image.includes('\\pngblip')) {
		imageType = 'image/png';
		}
		else if (image.includes('\\jpegblip')) {
		imageType = 'image/jpeg';
		}
		if (imageType) {
		result.push({
		hex: image.replace(regexPictureHeader, '').replace(/[^\da-fA-F]/g, ''),
		type: imageType
		});
		}
		}
		}
		return result;
		}
		/**
		* Replaces `src` attribute value of all given images with the corresponding base64 image representation.
		*
		* @param imageElements Array of image elements which will have its source replaced.
		* @param imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function).
		* The array should be the same length as `imageElements` parameter.
		*/
		function replaceImagesFileSourceWithInlineRepresentation(imageElements, imagesHexSources, writer) {
		// Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order.
		if (imageElements.length === imagesHexSources.length) {
		for (let i = 0; i < imageElements.length; i++) {
		const newSrc = `data:${imagesHexSources[i].type};base64,${_convertHexToBase64(imagesHexSources[i].hex)}`;
		writer.setAttribute('src', newSrc, imageElements[i]);
		}
		}
		}

788

src/filters/list.js

		/**
		* @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
		* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
		* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
		*/

		/**
		* @module paste-from-office/filters/list
		*/

		import { Matcher, UpcastWriter } from 'ckeditor5/src/engine';

		/**
		@@ -17,446 +14,379 @@ * Transforms Word specific list-like elements to the semantic HTML lists.
		*
		* <p class=MsoListParagraphCxSpFirst style='mso-list:l1 level1 lfo1'>...</p> // Paragraph based list.
		* <h1 style='mso-list:l0 level1 lfo1'>...</h1> // Heading 1 based list.
		* ```xml
		* <p class=MsoListParagraphCxSpFirst style='mso-list:l1 level1 lfo1'>...</p> // Paragraph based list.
		* <h1 style='mso-list:l0 level1 lfo1'>...</h1> // Heading 1 based list.
		* ```
		*
		* @param {module:engine/view/documentfragment~DocumentFragment} documentFragment The view structure to be transformed.
		* @param {String} stylesString Styles from which list-like elements styling will be extracted.
		* @param documentFragment The view structure to be transformed.
		* @param stylesString Styles from which list-like elements styling will be extracted.
		*/
		export function transformListItemLikeElementsIntoLists( documentFragment, stylesString ) {
		if ( !documentFragment.childCount ) {
		return;
		}

		const writer = new UpcastWriter( documentFragment.document );
		const itemLikeElements = findAllItemLikeElements( documentFragment, writer );

		if ( !itemLikeElements.length ) {
		return;
		}

		let currentList = null;
		let currentIndentation = 1;

		itemLikeElements.forEach( ( itemLikeElement, i ) => {
		const isDifferentList = isNewListNeeded( itemLikeElements[ i - 1 ], itemLikeElement );
		const previousItemLikeElement = isDifferentList ? null : itemLikeElements[ i - 1 ];
		const indentationDifference = getIndentationDifference( previousItemLikeElement, itemLikeElement );

		if ( isDifferentList ) {
		currentList = null;
		currentIndentation = 1;
		}

		if ( !currentList \|\| indentationDifference !== 0 ) {
		const listStyle = detectListStyle( itemLikeElement, stylesString );

		if ( !currentList ) {
		currentList = insertNewEmptyList( listStyle, itemLikeElement.element, writer );
		} else if ( itemLikeElement.indent > currentIndentation ) {
		const lastListItem = currentList.getChild( currentList.childCount - 1 );
		const lastListItemChild = lastListItem.getChild( lastListItem.childCount - 1 );

		currentList = insertNewEmptyList( listStyle, lastListItemChild, writer );
		currentIndentation += 1;
		} else if ( itemLikeElement.indent < currentIndentation ) {
		const differentIndentation = currentIndentation - itemLikeElement.indent;

		currentList = findParentListAtLevel( currentList, differentIndentation );
		currentIndentation = parseInt( itemLikeElement.indent );
		}

		if ( itemLikeElement.indent <= currentIndentation ) {
		if ( !currentList.is( 'element', listStyle.type ) ) {
		currentList = writer.rename( listStyle.type, currentList );
		}
		}
		}

		const listItem = transformElementIntoListItem( itemLikeElement.element, writer );

		writer.appendChild( listItem, currentList );
		} );
		export function transformListItemLikeElementsIntoLists(documentFragment, stylesString) {
		if (!documentFragment.childCount) {
		return;
		}
		const writer = new UpcastWriter(documentFragment.document);
		const itemLikeElements = findAllItemLikeElements(documentFragment, writer);
		if (!itemLikeElements.length) {
		return;
		}
		let currentList = null;
		let currentIndentation = 1;
		itemLikeElements.forEach((itemLikeElement, i) => {
		const isDifferentList = isNewListNeeded(itemLikeElements[i - 1], itemLikeElement);
		const previousItemLikeElement = isDifferentList ? null : itemLikeElements[i - 1];
		const indentationDifference = getIndentationDifference(previousItemLikeElement, itemLikeElement);
		if (isDifferentList) {
		currentList = null;
		currentIndentation = 1;
		}
		if (!currentList \|\| indentationDifference !== 0) {
		const listStyle = detectListStyle(itemLikeElement, stylesString);
		if (!currentList) {
		currentList = insertNewEmptyList(listStyle, itemLikeElement.element, writer);
		}
		else if (itemLikeElement.indent > currentIndentation) {
		const lastListItem = currentList.getChild(currentList.childCount - 1);
		const lastListItemChild = lastListItem.getChild(lastListItem.childCount - 1);
		currentList = insertNewEmptyList(listStyle, lastListItemChild, writer);
		currentIndentation += 1;
		}
		else if (itemLikeElement.indent < currentIndentation) {
		const differentIndentation = currentIndentation - itemLikeElement.indent;
		currentList = findParentListAtLevel(currentList, differentIndentation);
		currentIndentation = itemLikeElement.indent;
		}
		if (itemLikeElement.indent <= currentIndentation) {
		if (!currentList.is('element', listStyle.type)) {
		currentList = writer.rename(listStyle.type, currentList);
		}
		}
		}
		const listItem = transformElementIntoListItem(itemLikeElement.element, writer);
		writer.appendChild(listItem, currentList);
		});
		}

		/**
		* Removes paragraph wrapping content inside a list item.
		*/
		export function unwrapParagraphInListItem(documentFragment, writer) {
		for (const value of writer.createRangeIn(documentFragment)) {
		const element = value.item;
		if (element.is('element', 'li')) {
		// Google Docs allows for single paragraph inside LI.
		const firstChild = element.getChild(0);
		if (firstChild && firstChild.is('element', 'p')) {
		writer.unwrapElement(firstChild);
		}
		}
		}
		}
		/**
		* Finds all list-like elements in a given document fragment.
		*
		* @param {module:engine/view/documentfragment~DocumentFragment} documentFragment
		* @param {module:engine/view/upcastwriter~UpcastWriter} writer
		* @param documentFragment Document fragment in which to look for list-like nodes.
		* @returns Array of found list-like items. Each item is an object containing:
		*/
		export function unwrapParagraphInListItem( documentFragment, writer ) {
		for ( const value of writer.createRangeIn( documentFragment ) ) {
		const element = value.item;

		if ( element.is( 'element', 'li' ) ) {
		// Google Docs allows for single paragraph inside LI.
		const firstChild = element.getChild( 0 );

		if ( firstChild && firstChild.is( 'element', 'p' ) ) {
		writer.unwrapElement( firstChild );
		}
		}
		}
		function findAllItemLikeElements(documentFragment, writer) {
		const range = writer.createRangeIn(documentFragment);
		// Matcher for finding list-like elements.
		const itemLikeElementsMatcher = new Matcher({
		name: /^p\|h\d+$/,
		styles: {
		'mso-list': /.*/
		}
		});
		const itemLikeElements = [];
		for (const value of range) {
		if (value.type === 'elementStart' && itemLikeElementsMatcher.match(value.item)) {
		const itemData = getListItemData(value.item);
		itemLikeElements.push({
		element: value.item,
		id: itemData.id,
		order: itemData.order,
		indent: itemData.indent
		});
		}
		}
		return itemLikeElements;
		}

		// Finds all list-like elements in a given document fragment.
		//
		// @param {module:engine/view/documentfragment~DocumentFragment} documentFragment Document fragment
		// in which to look for list-like nodes.
		// @param {module:engine/view/upcastwriter~UpcastWriter} writer
		// @returns {Array.<Object>} Array of found list-like items. Each item is an object containing:
		//
		// * {module:engine/src/view/element~Element} element List-like element.
		// * {Number} id List item id parsed from `mso-list` style (see `getListItemData()` function).
		// * {Number} order List item creation order parsed from `mso-list` style (see `getListItemData()` function).
		// * {Number} indent List item indentation level parsed from `mso-list` style (see `getListItemData()` function).
		function findAllItemLikeElements( documentFragment, writer ) {
		const range = writer.createRangeIn( documentFragment );

		// Matcher for finding list-like elements.
		const itemLikeElementsMatcher = new Matcher( {
		name: /^p\|h\d+$/,
		styles: {
		'mso-list': /.*/
		}
		} );

		const itemLikeElements = [];

		for ( const value of range ) {
		if ( value.type === 'elementStart' && itemLikeElementsMatcher.match( value.item ) ) {
		const itemData = getListItemData( value.item );

		itemLikeElements.push( {
		element: value.item,
		id: itemData.id,
		order: itemData.order,
		indent: itemData.indent
		} );
		}
		}

		return itemLikeElements;
		/**
		* Extracts list item style from the provided CSS.
		*
		* List item style is extracted from the CSS stylesheet. Each list with its specific style attribute
		* value (`mso-list:l1 level1 lfo1`) has its dedicated properties in a CSS stylesheet defined with a selector like:
		*
		* ```css
		* @list l1:level1 { ... }
		* ```
		*
		* It contains `mso-level-number-format` property which defines list numbering/bullet style. If this property
		* is not defined it means default `decimal` numbering.
		*
		* Here CSS string representation is used as `mso-level-number-format` property is an invalid CSS property
		* and will be removed during CSS parsing.
		*
		* @param listLikeItem List-like item for which list style will be searched for. Usually
		* a result of `findAllItemLikeElements()` function.
		* @param stylesString CSS stylesheet.
		* @returns An object with properties:
		*
		* * type - List type, could be `ul` or `ol`.
		* * startIndex - List start index, valid only for ordered lists.
		* * style - List style, for example: `decimal`, `lower-roman`, etc. It is extracted
		* directly from Word stylesheet and adjusted to represent proper values for the CSS `list-style-type` property.
		* If it cannot be adjusted, the `null` value is returned.
		*/
		function detectListStyle(listLikeItem, stylesString) {
		const listStyleRegexp = new RegExp(`@list l${listLikeItem.id}:level${listLikeItem.indent}\\s({[^}])`, 'gi');
		const listStyleTypeRegex = /mso-level-number-format:([^;]{0,100});/gi;
		const listStartIndexRegex = /mso-level-start-at:\s{0,100}([0-9]{0,10})\s{0,100};/gi;
		const listStyleMatch = listStyleRegexp.exec(stylesString);
		let listStyleType = 'decimal'; // Decimal is default one.
		let type = 'ol'; // <ol> is default list.
		let startIndex = null;
		if (listStyleMatch && listStyleMatch[1]) {
		const listStyleTypeMatch = listStyleTypeRegex.exec(listStyleMatch[1]);
		if (listStyleTypeMatch && listStyleTypeMatch[1]) {
		listStyleType = listStyleTypeMatch[1].trim();
		type = listStyleType !== 'bullet' && listStyleType !== 'image' ? 'ol' : 'ul';
		}
		// Styles for the numbered lists are always defined in the Word CSS stylesheet.
		// Unordered lists MAY contain a value for the Word CSS definition `mso-level-text` but sometimes
		// this tag is missing. And because of that, we cannot depend on that. We need to predict the list style value
		// based on the list style marker element.
		if (listStyleType === 'bullet') {
		const bulletedStyle = findBulletedListStyle(listLikeItem.element);
		if (bulletedStyle) {
		listStyleType = bulletedStyle;
		}
		}
		else {
		const listStartIndexMatch = listStartIndexRegex.exec(listStyleMatch[1]);
		if (listStartIndexMatch && listStartIndexMatch[1]) {
		startIndex = parseInt(listStartIndexMatch[1]);
		}
		}
		}
		return {
		type,
		startIndex,
		style: mapListStyleDefinition(listStyleType)
		};
		}

		// Extracts list item style from the provided CSS.
		//
		// List item style is extracted from the CSS stylesheet. Each list with its specific style attribute
		// value (`mso-list:l1 level1 lfo1`) has its dedicated properties in a CSS stylesheet defined with a selector like:
		//
		// @list l1:level1 { ... }
		//
		// It contains `mso-level-number-format` property which defines list numbering/bullet style. If this property
		// is not defined it means default `decimal` numbering.
		//
		// Here CSS string representation is used as `mso-level-number-format` property is an invalid CSS property
		// and will be removed during CSS parsing.
		//
		// @param {Object} listLikeItem List-like item for which list style will be searched for. Usually
		// a result of `findAllItemLikeElements()` function.
		// @param {String} stylesString CSS stylesheet.
		// @returns {Object} result
		// @returns {String} result.type List type, could be `ul` or `ol`.
		// @returns {Number} result.startIndex List start index, valid only for ordered lists.
		// @returns {String\|null} result.style List style, for example: `decimal`, `lower-roman`, etc. It is extracted
		// directly from Word stylesheet and adjusted to represent proper values for the CSS `list-style-type` property.
		// If it cannot be adjusted, the `null` value is returned.
		function detectListStyle( listLikeItem, stylesString ) {
		const listStyleRegexp = new RegExp( `@list l${ listLikeItem.id }:level${ listLikeItem.indent }\\s({[^}])`, 'gi' );
		const listStyleTypeRegex = /mso-level-number-format:([^;]{0,100});/gi;
		const listStartIndexRegex = /mso-level-start-at:\s{0,100}([0-9]{0,10})\s{0,100};/gi;

		const listStyleMatch = listStyleRegexp.exec( stylesString );

		let listStyleType = 'decimal'; // Decimal is default one.
		let type = 'ol'; // <ol> is default list.
		let startIndex = null;

		if ( listStyleMatch && listStyleMatch[ 1 ] ) {
		const listStyleTypeMatch = listStyleTypeRegex.exec( listStyleMatch[ 1 ] );

		if ( listStyleTypeMatch && listStyleTypeMatch[ 1 ] ) {
		listStyleType = listStyleTypeMatch[ 1 ].trim();
		type = listStyleType !== 'bullet' && listStyleType !== 'image' ? 'ol' : 'ul';
		}

		// Styles for the numbered lists are always defined in the Word CSS stylesheet.
		// Unordered lists MAY contain a value for the Word CSS definition `mso-level-text` but sometimes
		// this tag is missing. And because of that, we cannot depend on that. We need to predict the list style value
		// based on the list style marker element.
		if ( listStyleType === 'bullet' ) {
		const bulletedStyle = findBulletedListStyle( listLikeItem.element );

		if ( bulletedStyle ) {
		listStyleType = bulletedStyle;
		}
		} else {
		const listStartIndexMatch = listStartIndexRegex.exec( listStyleMatch[ 1 ] );

		if ( listStartIndexMatch && listStartIndexMatch[ 1 ] ) {
		startIndex = parseInt( listStartIndexMatch[ 1 ] );
		}
		}
		}

		return {
		type,
		startIndex,
		style: mapListStyleDefinition( listStyleType )
		};
		/**
		* Tries to extract the `list-style-type` value based on the marker element for bulleted list.
		*/
		function findBulletedListStyle(element) {
		const listMarkerElement = findListMarkerNode(element);
		if (!listMarkerElement) {
		return null;
		}
		const listMarker = listMarkerElement._data;
		if (listMarker === 'o') {
		return 'circle';
		}
		else if (listMarker === '·') {
		return 'disc';
		}
		// Word returns '§' instead of '■' for the square list style.
		else if (listMarker === '§') {
		return 'square';
		}
		return null;
		}

		// Tries to extract the `list-style-type` value based on the marker element for bulleted list.
		//
		// @param {module:engine/view/element~Element} element
		// @returns {String\|null}
		function findBulletedListStyle( element ) {
		const listMarkerElement = findListMarkerNode( element );

		if ( !listMarkerElement ) {
		return null;
		}

		const listMarker = listMarkerElement._data;

		if ( listMarker === 'o' ) {
		return 'circle';
		} else if ( listMarker === '·' ) {
		return 'disc';
		}
		// Word returns '§' instead of '■' for the square list style.
		else if ( listMarker === '§' ) {
		return 'square';
		}

		return null;
		/**
		* Tries to find a text node that represents the marker element (list-style-type).
		*/
		function findListMarkerNode(element) {
		// If the first child is a text node, it is the data for the element.
		// The list-style marker is not present here.
		if (element.getChild(0).is('$text')) {
		return null;
		}
		for (const childNode of element.getChildren()) {
		// The list-style marker will be inside the `<span>` element. Let's ignore all non-span elements.
		// It may happen that the `<a>` element is added as the first child. Most probably, it's an anchor element.
		if (!childNode.is('element', 'span')) {
		continue;
		}
		const textNodeOrElement = childNode.getChild(0);
		// If already found the marker element, use it.
		if (textNodeOrElement.is('$text')) {
		return textNodeOrElement;
		}
		return textNodeOrElement.getChild(0);
		}
		/* istanbul ignore next */
		return null;
		}

		// Tries to find a text node that represents the marker element (list-style-type).
		//
		// @param {module:engine/view/element~Element} element
		// @returns {module:engine/view/text~Text\|null}
		function findListMarkerNode( element ) {
		// If the first child is a text node, it is the data for the element.
		// The list-style marker is not present here.
		if ( element.getChild( 0 ).is( '$text' ) ) {
		return null;
		}

		for ( const childNode of element.getChildren() ) {
		// The list-style marker will be inside the `<span>` element. Let's ignore all non-span elements.
		// It may happen that the `<a>` element is added as the first child. Most probably, it's an anchor element.
		if ( !childNode.is( 'element', 'span' ) ) {
		continue;
		}

		const textNodeOrElement = childNode.getChild( 0 );

		// If already found the marker element, use it.
		if ( textNodeOrElement.is( '$text' ) ) {
		return textNodeOrElement;
		}

		return textNodeOrElement.getChild( 0 );
		}
		/**
		* Parses the `list-style-type` value extracted directly from the Word CSS stylesheet and returns proper CSS definition.
		*/
		function mapListStyleDefinition(value) {
		if (value.startsWith('arabic-leading-zero')) {
		return 'decimal-leading-zero';
		}
		switch (value) {
		case 'alpha-upper':
		return 'upper-alpha';
		case 'alpha-lower':
		return 'lower-alpha';
		case 'roman-upper':
		return 'upper-roman';
		case 'roman-lower':
		return 'lower-roman';
		case 'circle':
		case 'disc':
		case 'square':
		return value;
		default:
		return null;
		}
		}

		// Parses the `list-style-type` value extracted directly from the Word CSS stylesheet and returns proper CSS definition.
		//
		// @param {String\|null} value
		// @returns {String\|null}
		function mapListStyleDefinition( value ) {
		if ( value.startsWith( 'arabic-leading-zero' ) ) {
		return 'decimal-leading-zero';
		}

		switch ( value ) {
		case 'alpha-upper':
		return 'upper-alpha';
		case 'alpha-lower':
		return 'lower-alpha';
		case 'roman-upper':
		return 'upper-roman';
		case 'roman-lower':
		return 'lower-roman';
		case 'circle':
		case 'disc':
		case 'square':
		return value;
		default:
		return null;
		}
		/**
		* Creates an empty list of a given type and inserts it after a specified element.
		*
		* @param listStyle List style object which determines the type of newly created list.
		* Usually a result of `detectListStyle()` function.
		* @param element Element after which list is inserted.
		* @returns Newly created list element.
		*/
		function insertNewEmptyList(listStyle, element, writer) {
		const parent = element.parent;
		const list = writer.createElement(listStyle.type);
		const position = parent.getChildIndex(element) + 1;
		writer.insertChild(position, list, parent);
		// We do not support modifying the marker for a particular list item.
		// Set the value for the `list-style-type` property directly to the list container.
		if (listStyle.style) {
		writer.setStyle('list-style-type', listStyle.style, list);
		}
		if (listStyle.startIndex && listStyle.startIndex > 1) {
		writer.setAttribute('start', listStyle.startIndex, list);
		}
		return list;
		}

		// Creates an empty list of a given type and inserts it after a specified element.
		//
		// @param {Object} listStyle List style object which determines the type of newly created list.
		// Usually a result of `detectListStyle()` function.
		// @param {module:engine/view/element~Element} element Element after which list is inserted.
		// @param {module:engine/view/upcastwriter~UpcastWriter} writer
		// @returns {module:engine/view/element~Element} Newly created list element.

		function insertNewEmptyList( listStyle, element, writer ) {
		const parent = element.parent;
		const list = writer.createElement( listStyle.type );
		const position = parent.getChildIndex( element ) + 1;

		writer.insertChild( position, list, parent );

		// We do not support modifying the marker for a particular list item.
		// Set the value for the `list-style-type` property directly to the list container.
		if ( listStyle.style ) {
		writer.setStyle( 'list-style-type', listStyle.style, list );
		}

		if ( listStyle.startIndex && listStyle.startIndex > 1 ) {
		writer.setAttribute( 'start', listStyle.startIndex, list );
		}

		return list;
		/**
		* Transforms a given element into a semantic list item. As the function operates on a provided
		* {module:engine/src/view/element~Element element} it will modify the view structure to which this element belongs.
		*
		* @param element Element which will be transformed into a list item.
		* @returns New element to which the given one was transformed. It is
		* inserted in place of the old element (the reference to the old element is lost due to renaming).
		*/
		function transformElementIntoListItem(element, writer) {
		removeBulletElement(element, writer);
		return writer.rename('li', element);
		}

		// Transforms a given element into a semantic list item. As the function operates on a provided
		// {module:engine/src/view/element~Element element} it will modify the view structure to which this element belongs.
		//
		// @param {module:engine/view/element~Element} element Element which will be transformed into a list item.
		// @param {module:engine/view/upcastwriter~UpcastWriter} writer
		// @returns {module:engine/view/element~Element} New element to which the given one was transformed. It is
		// inserted in place of the old element (the reference to the old element is lost due to renaming).
		function transformElementIntoListItem( element, writer ) {
		removeBulletElement( element, writer );

		return writer.rename( 'li', element );
		/**
		* Extracts list item information from Word specific list-like element style:
		*
		* ```
		* `style="mso-list:l1 level1 lfo1"`
		* ```
		*
		* where:
		*
		* ```
		* * `l1` is a list id (however it does not mean this is a continuous list - see #43),
		* * `level1` is a list item indentation level,
		* * `lfo1` is a list insertion order in a document.
		* ```
		*
		* @param element Element from which style data is extracted.
		*/
		function getListItemData(element) {
		const data = {};
		const listStyle = element.getStyle('mso-list');
		if (listStyle) {
		const idMatch = listStyle.match(/(^\|\s{1,100})l(\d+)/i);
		const orderMatch = listStyle.match(/\s{0,100}lfo(\d+)/i);
		const indentMatch = listStyle.match(/\s{0,100}level(\d+)/i);
		if (idMatch && orderMatch && indentMatch) {
		data.id = idMatch[2];
		data.order = orderMatch[1];
		data.indent = parseInt(indentMatch[1]);
		}
		}
		return data;
		}

		// Extracts list item information from Word specific list-like element style:
		//
		// `style="mso-list:l1 level1 lfo1"`
		//
		// where:
		//
		// * `l1` is a list id (however it does not mean this is a continuous list - see #43),
		// * `level1` is a list item indentation level,
		// * `lfo1` is a list insertion order in a document.
		//
		// @param {module:engine/view/element~Element} element Element from which style data is extracted.
		// @returns {Object} result
		// @returns {Number} result.id Parent list id.
		// @returns {Number} result.order List item creation order.
		// @returns {Number} result.indent List item indentation level.
		function getListItemData( element ) {
		const data = {};
		const listStyle = element.getStyle( 'mso-list' );

		if ( listStyle ) {
		const idMatch = listStyle.match( /(^\|\s{1,100})l(\d+)/i );
		const orderMatch = listStyle.match( /\s{0,100}lfo(\d+)/i );
		const indentMatch = listStyle.match( /\s{0,100}level(\d+)/i );

		if ( idMatch && orderMatch && indentMatch ) {
		data.id = idMatch[ 2 ];
		data.order = orderMatch[ 1 ];
		data.indent = indentMatch[ 1 ];
		}
		}

		return data;
		/**
		* Removes span with a numbering/bullet from a given element.
		*/
		function removeBulletElement(element, writer) {
		// Matcher for finding `span` elements holding lists numbering/bullets.
		const bulletMatcher = new Matcher({
		name: 'span',
		styles: {
		'mso-list': 'Ignore'
		}
		});
		const range = writer.createRangeIn(element);
		for (const value of range) {
		if (value.type === 'elementStart' && bulletMatcher.match(value.item)) {
		writer.remove(value.item);
		}
		}
		}

		// Removes span with a numbering/bullet from a given element.
		//
		// @param {module:engine/view/element~Element} element
		// @param {module:engine/view/upcastwriter~UpcastWriter} writer
		function removeBulletElement( element, writer ) {
		// Matcher for finding `span` elements holding lists numbering/bullets.
		const bulletMatcher = new Matcher( {
		name: 'span',
		styles: {
		'mso-list': 'Ignore'
		}
		} );

		const range = writer.createRangeIn( element );

		for ( const value of range ) {
		if ( value.type === 'elementStart' && bulletMatcher.match( value.item ) ) {
		writer.remove( value.item );
		}
		}
		/**
		* Whether the previous and current items belong to the same list. It is determined based on `item.id`
		* (extracted from `mso-list` style, see #getListItemData) and a previous sibling of the current item.
		*
		* However, it's quite easy to change the `id` attribute for nested lists in Word. It will break the list feature while pasting.
		* Let's check also the `indent` attribute. If the difference between those two elements is equal to 1, we can assume that
		* the `currentItem` is a beginning of the nested list because lists in CKEditor 5 always start with the `indent=0` attribute.
		* See: https://github.com/ckeditor/ckeditor5/issues/7805.
		*/
		function isNewListNeeded(previousItem, currentItem) {
		if (!previousItem) {
		return true;
		}
		if (previousItem.id !== currentItem.id) {
		// See: https://github.com/ckeditor/ckeditor5/issues/7805.
		//
		// * List item 1.
		// - Nested list item 1.
		if (currentItem.indent - previousItem.indent === 1) {
		return false;
		}
		return true;
		}
		const previousSibling = currentItem.element.previousSibling;
		if (!previousSibling) {
		return true;
		}
		// Even with the same id the list does not have to be continuous (#43).
		return !isList(previousSibling);
		}

		// Whether the previous and current items belong to the same list. It is determined based on `item.id`
		// (extracted from `mso-list` style, see #getListItemData) and a previous sibling of the current item.
		//
		// However, it's quite easy to change the `id` attribute for nested lists in Word. It will break the list feature while pasting.
		// Let's check also the `indent` attribute. If the difference between those two elements is equal to 1, we can assume that
		// the `currentItem` is a beginning of the nested list because lists in CKEditor 5 always start with the `indent=0` attribute.
		// See: https://github.com/ckeditor/ckeditor5/issues/7805.
		//
		// @param {Object} previousItem
		// @param {Object} currentItem
		// @returns {Boolean}
		function isNewListNeeded( previousItem, currentItem ) {
		if ( !previousItem ) {
		return true;
		}

		if ( previousItem.id !== currentItem.id ) {
		// See: https://github.com/ckeditor/ckeditor5/issues/7805.
		//
		// * List item 1.
		// - Nested list item 1.
		if ( currentItem.indent - previousItem.indent === 1 ) {
		return false;
		}

		return true;
		}

		const previousSibling = currentItem.element.previousSibling;

		if ( !previousSibling ) {
		return true;
		}

		// Even with the same id the list does not have to be continuous (#43).
		return !isList( previousSibling );
		function isList(element) {
		return element.is('element', 'ol') \|\| element.is('element', 'ul');
		}

		function isList( element ) {
		return element.is( 'element', 'ol' ) \|\| element.is( 'element', 'ul' );
		/**
		* Calculates the indentation difference between two given list items (based on the indent attribute
		* extracted from the `mso-list` style, see #getListItemData).
		*/
		function getIndentationDifference(previousItem, currentItem) {
		return previousItem ? currentItem.indent - previousItem.indent : currentItem.indent - 1;
		}

		// Calculates the indentation difference between two given list items (based on the indent attribute
		// extracted from the `mso-list` style, see #getListItemData).
		//
		// @param {Object} previousItem
		// @param {Object} currentItem
		// @returns {Number}
		function getIndentationDifference( previousItem, currentItem ) {
		return previousItem ? currentItem.indent - previousItem.indent : currentItem.indent - 1;
		/**
		* Finds the parent list element (ul/ol) of a given list element with indentation level lower by a given value.
		*
		* @param listElement List element from which to start looking for a parent list.
		* @param indentationDifference Indentation difference between lists.
		* @returns Found list element with indentation level lower by a given value.
		*/
		function findParentListAtLevel(listElement, indentationDifference) {
		const ancestors = listElement.getAncestors({ parentFirst: true });
		let parentList = null;
		let levelChange = 0;
		for (const ancestor of ancestors) {
		if (ancestor.is('element', 'ul') \|\| ancestor.is('element', 'ol')) {
		levelChange++;
		}
		if (levelChange === indentationDifference) {
		parentList = ancestor;
		break;
		}
		}
		return parentList;
		}

		// Finds the parent list element (ul/ol) of a given list element with indentation level lower by a given value.
		//
		// @param {module:engine/view/element~Element} listElement List element from which to start looking for a parent list.
		// @param {Number} indentationDifference Indentation difference between lists.
		// @returns {module:engine/view/element~Element} Found list element with indentation level lower by a given value.
		function findParentListAtLevel( listElement, indentationDifference ) {
		const ancestors = listElement.getAncestors( { parentFirst: true } );

		let parentList = null;
		let levelChange = 0;

		for ( const ancestor of ancestors ) {
		if ( ancestor.name === 'ul' \|\| ancestor.name === 'ol' ) {
		levelChange++;
		}

		if ( levelChange === indentationDifference ) {
		parentList = ancestor;
		break;
		}
		}

		return parentList;
		}

182

src/filters/parse.js

		/**
		* @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
		* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
		* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
		*/

		/**
		* @module paste-from-office/filters/parse
		*/

		/* globals DOMParser */

		import { DomConverter, ViewDocument } from 'ckeditor5/src/engine';

		import { normalizeSpacing, normalizeSpacerunSpans } from './space';

		/**
		* Parses provided HTML extracting contents of `<body>` and `<style>` tags.
		*
		* @param {String} htmlString HTML string to be parsed.
		* @param {module:engine/view/stylesmap~StylesProcessor} stylesProcessor
		* @returns {Object} result
		* @returns {module:engine/view/documentfragment~DocumentFragment} result.body Parsed body
		* content as a traversable structure.
		* @returns {String} result.bodyString Entire body content as a string.
		* @returns {Array.<CSSStyleSheet>} result.styles Array of native `CSSStyleSheet` objects, each representing
		* separate `style` tag from the source HTML.
		* @returns {String} result.stylesString All `style` tags contents combined in the order of occurrence into one string.
		* @param htmlString HTML string to be parsed.
		*/
		export function parseHtml( htmlString, stylesProcessor ) {
		const domParser = new DOMParser();

		// Remove Word specific "if comments" so content inside is not omitted by the parser.
		htmlString = htmlString.replace( /<!--\[if gte vml 1]>/g, '' );

		const normalizedHtml = normalizeSpacing( cleanContentAfterBody( htmlString ) );

		// Parse htmlString as native Document object.
		const htmlDocument = domParser.parseFromString( normalizedHtml, 'text/html' );

		normalizeSpacerunSpans( htmlDocument );

		// Get `innerHTML` first as transforming to View modifies the source document.
		const bodyString = htmlDocument.body.innerHTML;

		// Transform document.body to View.
		const bodyView = documentToView( htmlDocument, stylesProcessor );

		// Extract stylesheets.
		const stylesObject = extractStyles( htmlDocument );

		return {
		body: bodyView,
		bodyString,
		styles: stylesObject.styles,
		stylesString: stylesObject.stylesString
		};
		export function parseHtml(htmlString, stylesProcessor) {
		const domParser = new DOMParser();
		// Remove Word specific "if comments" so content inside is not omitted by the parser.
		htmlString = htmlString.replace(/<!--\[if gte vml 1]>/g, '');
		const normalizedHtml = normalizeSpacing(cleanContentAfterBody(htmlString));
		// Parse htmlString as native Document object.
		const htmlDocument = domParser.parseFromString(normalizedHtml, 'text/html');
		normalizeSpacerunSpans(htmlDocument);
		// Get `innerHTML` first as transforming to View modifies the source document.
		const bodyString = htmlDocument.body.innerHTML;
		// Transform document.body to View.
		const bodyView = documentToView(htmlDocument, stylesProcessor);
		// Extract stylesheets.
		const stylesObject = extractStyles(htmlDocument);
		return {
		body: bodyView,
		bodyString,
		styles: stylesObject.styles,
		stylesString: stylesObject.stylesString
		};
		}

		// Transforms native `Document` object into {@link module:engine/view/documentfragment~DocumentFragment}. Comments are skipped.
		//
		// @param {Document} htmlDocument Native `Document` object to be transformed.
		// @param {module:engine/view/stylesmap~StylesProcessor} stylesProcessor
		// @returns {module:engine/view/documentfragment~DocumentFragment}
		function documentToView( htmlDocument, stylesProcessor ) {
		const viewDocument = new ViewDocument( stylesProcessor );
		const domConverter = new DomConverter( viewDocument, { renderingMode: 'data' } );
		const fragment = htmlDocument.createDocumentFragment();
		const nodes = htmlDocument.body.childNodes;

		while ( nodes.length > 0 ) {
		fragment.appendChild( nodes[ 0 ] );
		}

		return domConverter.domToView( fragment, { skipComments: true } );
		/**
		* Transforms native `Document` object into {@link module:engine/view/documentfragment~DocumentFragment}. Comments are skipped.
		*
		* @param htmlDocument Native `Document` object to be transformed.
		*/
		function documentToView(htmlDocument, stylesProcessor) {
		const viewDocument = new ViewDocument(stylesProcessor);
		const domConverter = new DomConverter(viewDocument, { renderingMode: 'data' });
		const fragment = htmlDocument.createDocumentFragment();
		const nodes = htmlDocument.body.childNodes;
		while (nodes.length > 0) {
		fragment.appendChild(nodes[0]);
		}
		return domConverter.domToView(fragment, { skipComments: true });
		}

		// Extracts both `CSSStyleSheet` and string representation from all `style` elements available in a provided `htmlDocument`.
		//
		// @param {Document} htmlDocument Native `Document` object from which styles will be extracted.
		// @returns {Object} result
		// @returns {Array.<CSSStyleSheet>} result.styles Array of native `CSSStyleSheet` object, each representing
		// separate `style` tag from the source object.
		// @returns {String} result.stylesString All `style` tags contents combined in the order of occurrence as one string.
		function extractStyles( htmlDocument ) {
		const styles = [];
		const stylesString = [];
		const styleTags = Array.from( htmlDocument.getElementsByTagName( 'style' ) );

		for ( const style of styleTags ) {
		if ( style.sheet && style.sheet.cssRules && style.sheet.cssRules.length ) {
		styles.push( style.sheet );
		stylesString.push( style.innerHTML );
		}
		}

		return {
		styles,
		stylesString: stylesString.join( ' ' )
		};
		/**
		* Extracts both `CSSStyleSheet` and string representation from all `style` elements available in a provided `htmlDocument`.
		*
		* @param htmlDocument Native `Document` object from which styles will be extracted.
		*/
		function extractStyles(htmlDocument) {
		const styles = [];
		const stylesString = [];
		const styleTags = Array.from(htmlDocument.getElementsByTagName('style'));
		for (const style of styleTags) {
		if (style.sheet && style.sheet.cssRules && style.sheet.cssRules.length) {
		styles.push(style.sheet);
		stylesString.push(style.innerHTML);
		}
		}
		return {
		styles,
		stylesString: stylesString.join(' ')
		};
		}

		// Removes leftover content from between closing </body> and closing </html> tag:
		//
		// <html><body><p>Foo Bar</p></body><span>Fo</span></html> -> <html><body><p>Foo Bar</p></body></html>
		//
		// This function is used as specific browsers (Edge) add some random content after `body` tag when pasting from Word.
		// @param {String} htmlString The HTML string to be cleaned.
		// @returns {String} The HTML string with leftover content removed.
		function cleanContentAfterBody( htmlString ) {
		const bodyCloseTag = '</body>';
		const htmlCloseTag = '</html>';

		const bodyCloseIndex = htmlString.indexOf( bodyCloseTag );

		if ( bodyCloseIndex < 0 ) {
		return htmlString;
		}

		const htmlCloseIndex = htmlString.indexOf( htmlCloseTag, bodyCloseIndex + bodyCloseTag.length );

		return htmlString.substring( 0, bodyCloseIndex + bodyCloseTag.length ) +
		( htmlCloseIndex >= 0 ? htmlString.substring( htmlCloseIndex ) : '' );
		/**
		* Removes leftover content from between closing </body> and closing </html> tag:
		*
		* ```html
		* <html><body><p>Foo Bar</p></body><span>Fo</span></html> -> <html><body><p>Foo Bar</p></body></html>
		* ```
		*
		* This function is used as specific browsers (Edge) add some random content after `body` tag when pasting from Word.
		* @param htmlString The HTML string to be cleaned.
		* @returns The HTML string with leftover content removed.
		*/
		function cleanContentAfterBody(htmlString) {
		const bodyCloseTag = '</body>';
		const htmlCloseTag = '</html>';
		const bodyCloseIndex = htmlString.indexOf(bodyCloseTag);
		if (bodyCloseIndex < 0) {
		return htmlString;
		}
		const htmlCloseIndex = htmlString.indexOf(htmlCloseTag, bodyCloseIndex + bodyCloseTag.length);
		return htmlString.substring(0, bodyCloseIndex + bodyCloseTag.length) +
		(htmlCloseIndex >= 0 ? htmlString.substring(htmlCloseIndex) : '');
		}

src/filters/removeboldwrapper.js

		/**
		* @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
		* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
		* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
		*/

		/**
		* @module paste-from-office/filters/removeboldwrapper
		*/

		/**
		* Removes `<b>` tag wrapper added by Google Docs to a copied content.
		*
		* @param {module:engine/view/documentfragment~DocumentFragment} documentFragment element `data.content` obtained from clipboard
		* @param {module:engine/view/upcastwriter~UpcastWriter} writer
		* @param documentFragment element `data.content` obtained from clipboard
		*/
		export default function removeBoldWrapper( documentFragment, writer ) {
		for ( const child of documentFragment.getChildren() ) {
		if ( child.is( 'element', 'b' ) && child.getStyle( 'font-weight' ) === 'normal' ) {
		const childIndex = documentFragment.getChildIndex( child );

		writer.remove( child );
		writer.insertChild( childIndex, child.getChildren(), documentFragment );
		}
		}
		export default function removeBoldWrapper(documentFragment, writer) {
		for (const child of documentFragment.getChildren()) {
		if (child.is('element', 'b') && child.getStyle('font-weight') === 'normal') {
		const childIndex = documentFragment.getChildIndex(child);
		writer.remove(child);
		writer.insertChild(childIndex, child.getChildren(), documentFragment);
		}
		}
		}

src/filters/space.js

		/**
		* @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
		* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
		* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
		*/

		/**
		* @module paste-from-office/filters/space
		*/

		/**
		@@ -16,19 +14,18 @@ * Replaces last space preceding elements closing tag with ` `. Such operation prevents spaces from being removed
		*
		* @param {String} htmlString HTML string in which spacing should be normalized.
		* @returns {String} Input HTML with spaces normalized.
		* @param htmlString HTML string in which spacing should be normalized.
		* @returns Input HTML with spaces normalized.
		*/
		export function normalizeSpacing( htmlString ) {
		// Run normalizeSafariSpaceSpans() two times to cover nested spans.
		return normalizeSafariSpaceSpans( normalizeSafariSpaceSpans( htmlString ) )
		// Remove all \r\n from "spacerun spans" so the last replace line doesn't strip all whitespaces.
		.replace( /(<span\s+style=['"]mso-spacerun:yes['"]>[^\S\r\n]?)[\r\n]+([^\S\r\n]<\/span>)/g, '$1$2' )
		.replace( /<span\s+style=['"]mso-spacerun:yes['"]><\/span>/g, '' )
		.replace( / <\//g, '\u00A0</' )
		.replace( / <o:p><\/o:p>/g, '\u00A0<o:p></o:p>' )
		// Remove <o:p> block filler from empty paragraph. Safari uses \u00A0 instead of  .
		.replace( /<o:p>( \|\u00A0)<\/o:p>/g, '' )
		// Remove all whitespaces when they contain any \r or \n.
		.replace( />([^\S\r\n][\r\n]\s)</g, '><' );
		export function normalizeSpacing(htmlString) {
		// Run normalizeSafariSpaceSpans() two times to cover nested spans.
		return normalizeSafariSpaceSpans(normalizeSafariSpaceSpans(htmlString))
		// Remove all \r\n from "spacerun spans" so the last replace line doesn't strip all whitespaces.
		.replace(/(<span\s+style=['"]mso-spacerun:yes['"]>[^\S\r\n]?)[\r\n]+([^\S\r\n]<\/span>)/g, '$1$2')
		.replace(/<span\s+style=['"]mso-spacerun:yes['"]><\/span>/g, '')
		.replace(/ <\//g, '\u00A0</')
		.replace(/ <o:p><\/o:p>/g, '\u00A0<o:p></o:p>')
		// Remove <o:p> block filler from empty paragraph. Safari uses \u00A0 instead of  .
		.replace(/<o:p>( \|\u00A0)<\/o:p>/g, '')
		// Remove all whitespaces when they contain any \r or \n.
		.replace(/>([^\S\r\n][\r\n]\s)</g, '><');
		}

		/**
		@@ -39,25 +36,26 @@ * Normalizes spacing in special Word `spacerun spans` (`<span style='mso-spacerun:yes'>\s+</span>`) by replacing
		*
		* @param {Document} htmlDocument Native `Document` object in which spacing should be normalized.
		* @param htmlDocument Native `Document` object in which spacing should be normalized.
		*/
		export function normalizeSpacerunSpans( htmlDocument ) {
		htmlDocument.querySelectorAll( 'span[style*=spacerun]' ).forEach( el => {
		const innerTextLength = el.innerText.length \|\| 0;

		el.innerText = Array( innerTextLength + 1 ).join( '\u00A0 ' ).substr( 0, innerTextLength );
		} );
		export function normalizeSpacerunSpans(htmlDocument) {
		htmlDocument.querySelectorAll('span[style*=spacerun]').forEach(el => {
		const htmlElement = el;
		const innerTextLength = htmlElement.innerText.length \|\| 0;
		htmlElement.innerText = Array(innerTextLength + 1).join('\u00A0 ').substr(0, innerTextLength);
		});
		}

		// Normalizes specific spacing generated by Safari when content pasted from Word (`<span class="Apple-converted-space"> </span>`)
		// by replacing all spaces sequences longer than 1 space with `  ` pairs. This prevents spaces from being removed during
		// further DOM/View processing (see especially {@link module:engine/view/domconverter~DomConverter#_processDataFromDomText}).
		//
		// This function is similar to {@link module:clipboard/utils/normalizeclipboarddata normalizeClipboardData util} but uses
		// regular spaces /   sequence for replacement.
		//
		// @param {String} htmlString HTML string in which spacing should be normalized
		// @returns {String} Input HTML with spaces normalized.
		function normalizeSafariSpaceSpans( htmlString ) {
		return htmlString.replace( /<span(?: class="Apple-converted-space"\|)>(\s+)<\/span>/g, ( fullMatch, spaces ) => {
		return spaces.length === 1 ? ' ' : Array( spaces.length + 1 ).join( '\u00A0 ' ).substr( 0, spaces.length );
		} );
		/**
		* Normalizes specific spacing generated by Safari when content pasted from Word (`<span class="Apple-converted-space"> </span>`)
		* by replacing all spaces sequences longer than 1 space with `  ` pairs. This prevents spaces from being removed during
		* further DOM/View processing (see especially {@link module:engine/view/domconverter~DomConverter#_processDataFromDomText}).
		*
		* This function is similar to {@link module:clipboard/utils/normalizeclipboarddata normalizeClipboardData util} but uses
		* regular spaces /   sequence for replacement.
		*
		* @param htmlString HTML string in which spacing should be normalized
		* @returns Input HTML with spaces normalized.
		*/
		function normalizeSafariSpaceSpans(htmlString) {
		return htmlString.replace(/<span(?: class="Apple-converted-space"\|)>(\s+)<\/span>/g, (fullMatch, spaces) => {
		return spaces.length === 1 ? ' ' : Array(spaces.length + 1).join('\u00A0 ').substr(0, spaces.length);
		});
		}

src/index.js

		/**
		* @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
		* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
		* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
		*/

		/**
		* @module paste-from-office
		*/

		export { default as PasteFromOffice } from './pastefromoffice';

src/normalizers/googledocsnormalizer.js

		/**
		* @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
		* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
		* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
		*/

		/**
		* @module paste-from-office/normalizers/googledocsnormalizer
		*/

		import { UpcastWriter } from 'ckeditor5/src/engine';

		import removeBoldWrapper from '../filters/removeboldwrapper';
		import transformBlockBrsToParagraphs from '../filters/br';
		import { unwrapParagraphInListItem } from '../filters/list';

		const googleDocsMatch = /id=("\|')docs-internal-guid-[-0-9a-f]+("\|')/i;

		/**
		* Normalizer for the content pasted from Google Docs.
		*
		* @implements module:paste-from-office/normalizer~Normalizer
		*/
		export default class GoogleDocsNormalizer {
		/**
		* Creates a new `GoogleDocsNormalizer` instance.
		*
		* @param {module:engine/view/document~Document} document View document.
		*/
		constructor( document ) {
		/**
		* @readonly
		* @type {module:engine/view/document~Document}
		*/
		this.document = document;
		}

		/**
		* @inheritDoc
		*/
		isActive( htmlString ) {
		return googleDocsMatch.test( htmlString );
		}

		/**
		* @inheritDoc
		*/
		execute( data ) {
		const writer = new UpcastWriter( this.document );
		const { body: documentFragment } = data._parsedData;

		removeBoldWrapper( documentFragment, writer );
		unwrapParagraphInListItem( documentFragment, writer );
		transformBlockBrsToParagraphs( documentFragment, writer );

		data.content = documentFragment;
		}
		/**
		* Creates a new `GoogleDocsNormalizer` instance.
		*
		* @param document View document.
		*/
		constructor(document) {
		this.document = document;
		}
		/**
		* @inheritDoc
		*/
		isActive(htmlString) {
		return googleDocsMatch.test(htmlString);
		}
		/**
		* @inheritDoc
		*/
		execute(data) {
		const writer = new UpcastWriter(this.document);
		const { body: documentFragment } = data._parsedData;
		removeBoldWrapper(documentFragment, writer);
		unwrapParagraphInListItem(documentFragment, writer);
		transformBlockBrsToParagraphs(documentFragment, writer);
		data.content = documentFragment;
		}
		}

src/normalizers/mswordnormalizer.js

		/**
		* @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
		* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
		* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
		*/

		/**
		* @module paste-from-office/normalizers/mswordnormalizer
		*/

		import { transformListItemLikeElementsIntoLists } from '../filters/list';
		import { replaceImagesSourceWithBase64 } from '../filters/image';

		const msWordMatch1 = /<meta\sname="?generator"?\scontent="?microsoft\sword\s\d+"?\/?>/i;
		const msWordMatch2 = /xmlns:o="urn:schemas-microsoft-com/i;

		/**
		* Normalizer for the content pasted from Microsoft Word.
		*
		* @implements module:paste-from-office/normalizer~Normalizer
		*/
		export default class MSWordNormalizer {
		/**
		* Creates a new `MSWordNormalizer` instance.
		*
		* @param {module:engine/view/document~Document} document View document.
		*/
		constructor( document ) {
		/**
		* @readonly
		* @type {module:engine/view/document~Document}
		*/
		this.document = document;
		}

		/**
		* @inheritDoc
		*/
		isActive( htmlString ) {
		return msWordMatch1.test( htmlString ) \|\| msWordMatch2.test( htmlString );
		}

		/**
		* @inheritDoc
		*/
		execute( data ) {
		const { body: documentFragment, stylesString } = data._parsedData;

		transformListItemLikeElementsIntoLists( documentFragment, stylesString );
		replaceImagesSourceWithBase64( documentFragment, data.dataTransfer.getData( 'text/rtf' ) );

		data.content = documentFragment;
		}
		/**
		* Creates a new `MSWordNormalizer` instance.
		*
		* @param document View document.
		*/
		constructor(document) {
		this.document = document;
		}
		/**
		* @inheritDoc
		*/
		isActive(htmlString) {
		return msWordMatch1.test(htmlString) \|\| msWordMatch2.test(htmlString);
		}
		/**
		* @inheritDoc
		*/
		execute(data) {
		const { body: documentFragment, stylesString } = data._parsedData;
		transformListItemLikeElementsIntoLists(documentFragment, stylesString);
		replaceImagesSourceWithBase64(documentFragment, data.dataTransfer.getData('text/rtf'));
		data.content = documentFragment;
		}
		}

103

src/pastefromoffice.js

		/**
		* @license Copyright (c) 2003-2022, CKSource Holding sp. z o.o. All rights reserved.
		* @license Copyright (c) 2003-2023, CKSource Holding sp. z o.o. All rights reserved.
		* For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license
		*/

		/**
		* @module paste-from-office/pastefromoffice
		*/

		import { Plugin } from 'ckeditor5/src/core';
		import { ClipboardPipeline } from 'ckeditor5/src/clipboard';

		import GoogleDocsNormalizer from './normalizers/googledocsnormalizer';
		import MSWordNormalizer from './normalizers/mswordnormalizer';

		import { parseHtml } from './filters/parse';

		/**
		@@ -26,62 +21,46 @@ * The Paste from Office plugin.
		* This plugin includes following normalizers:
		* * {@link module:paste-from-office/normalizers/mswordnormalizer~MSWordNormalizer Microsoft Word normalizer}
		* * {@link module:paste-from-office/normalizers/googledocsnormalizer~GoogleDocsNormalizer Google Docs normalizer}
		* * {@link module:paste-from-office/normalizers/mswordnormalizer~MSWordNormalizer Microsoft Word normalizer}
		* * {@link module:paste-from-office/normalizers/googledocsnormalizer~GoogleDocsNormalizer Google Docs normalizer}
		*
		* For more information about this feature check the {@glink api/paste-from-office package page}.
		*
		* @extends module:core/plugin~Plugin
		*/
		export default class PasteFromOffice extends Plugin {
		/**
		* @inheritDoc
		*/
		static get pluginName() {
		return 'PasteFromOffice';
		}

		/**
		* @inheritDoc
		*/
		static get requires() {
		return [ ClipboardPipeline ];
		}

		/**
		* @inheritDoc
		*/
		init() {
		const editor = this.editor;
		const viewDocument = editor.editing.view.document;
		const normalizers = [];

		normalizers.push( new MSWordNormalizer( viewDocument ) );
		normalizers.push( new GoogleDocsNormalizer( viewDocument ) );

		editor.plugins.get( 'ClipboardPipeline' ).on(
		'inputTransformation',
		( evt, data ) => {
		if ( data._isTransformedWithPasteFromOffice ) {
		return;
		}

		const codeBlock = editor.model.document.selection.getFirstPosition().parent;

		if ( codeBlock.is( 'element', 'codeBlock' ) ) {
		return;
		}

		const htmlString = data.dataTransfer.getData( 'text/html' );
		const activeNormalizer = normalizers.find( normalizer => normalizer.isActive( htmlString ) );

		if ( activeNormalizer ) {
		data._parsedData = parseHtml( htmlString, viewDocument.stylesProcessor );

		activeNormalizer.execute( data );

		data._isTransformedWithPasteFromOffice = true;
		}
		},
		{ priority: 'high' }
		);
		}
		/**
		* @inheritDoc
		*/
		static get pluginName() {
		return 'PasteFromOffice';
		}
		/**
		* @inheritDoc
		*/
		static get requires() {
		return [ClipboardPipeline];
		}
		/**
		* @inheritDoc
		*/
		init() {
		const editor = this.editor;
		const viewDocument = editor.editing.view.document;
		const normalizers = [];
		normalizers.push(new MSWordNormalizer(viewDocument));
		normalizers.push(new GoogleDocsNormalizer(viewDocument));
		editor.plugins.get('ClipboardPipeline').on('inputTransformation', (evt, data) => {
		if (data._isTransformedWithPasteFromOffice) {
		return;
		}
		const codeBlock = editor.model.document.selection.getFirstPosition().parent;
		if (codeBlock.is('element', 'codeBlock')) {
		return;
		}
		const htmlString = data.dataTransfer.getData('text/html');
		const activeNormalizer = normalizers.find(normalizer => normalizer.isActive(htmlString));
		if (activeNormalizer) {
		data._parsedData = parseHtml(htmlString, viewDocument.stylesProcessor);
		activeNormalizer.execute(data);
		data._isTransformedWithPasteFromOffice = true;
		}
		}, { priority: 'high' });
		}
		}

src/normalizer.jsdoc

@ckeditor/ckeditor5-paste-from-office - npm Package Compare versions

New alerts

Fixed alerts

Improved metrics

Worsened metrics

Dependency changes