@cocreate/sitemap
Advanced tools
Comparing version 1.1.0 to 1.2.0
@@ -0,1 +1,15 @@ | ||
# [1.2.0](https://github.com/CoCreate-app/CoCreate-sitemap/compare/v1.1.0...v1.2.0) (2024-08-30) | ||
### Bug Fixes | ||
* add sitemap to sitemapindex if not already in index ([0ff6911](https://github.com/CoCreate-app/CoCreate-sitemap/commit/0ff6911fe81eeb05295639fb984401d3e76b90ad)) | ||
* noindex metatag return and handling sitemap generation ([058116a](https://github.com/CoCreate-app/CoCreate-sitemap/commit/058116a34c44100bfa4ea1b20afe13fd489bc642)) | ||
### Features | ||
* added node-html-parser ([397c590](https://github.com/CoCreate-app/CoCreate-sitemap/commit/397c5908af1149d2d60d7c431a8a0f1bd60789f4)) | ||
* parseHtml function to read and generate nested sitemaps using sitemap-* attributes and metatags ([64edbdb](https://github.com/CoCreate-app/CoCreate-sitemap/commit/64edbdbdcee6f64bc5080997557f9691f072381c)) | ||
# [1.1.0](https://github.com/CoCreate-app/CoCreate-sitemap/compare/v1.0.0...v1.1.0) (2024-08-24) | ||
@@ -2,0 +16,0 @@ |
{ | ||
"name": "@cocreate/sitemap", | ||
"version": "1.1.0", | ||
"version": "1.2.0", | ||
"description": "A simple sitemap component in vanilla javascript. Easily configured using HTML5 data-attributes and/or JavaScript API.", | ||
@@ -46,3 +46,6 @@ "keywords": [ | ||
}, | ||
"main": "./src/index.js" | ||
"main": "./src/index.js", | ||
"dependencies": { | ||
"node-html-parser": "^6.1.13" | ||
} | ||
} |
205
src/index.js
@@ -23,2 +23,4 @@ /******************************************************************************** | ||
const { parse } = require("node-html-parser"); | ||
class CoCreateSitemap { | ||
@@ -39,3 +41,4 @@ constructor(crud) { | ||
// Check if the file is HTML and contains a noindex meta tag | ||
if (file['content-type'] === 'text/html' && file.src.includes('<meta name="robots" content="noindex">')) | ||
if (file['content-type'] === 'text/html' | ||
&& /<meta\s+name=["']robots["']\s+content=["'][^"']*noindex[^"']*["']/i.test(file.src)) | ||
return; | ||
@@ -60,5 +63,5 @@ | ||
if (file.sitemap.pathname) { | ||
if (file.pathname) { | ||
// Perform regex search starting at the pathname | ||
const regexPattern = `<url>\\s*<loc>.*?${file.sitemap.pathname.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}.*?</loc>[\\s\\S]*?</url>`; | ||
const regexPattern = `<url>\\s*<loc>.*?${file.pathname.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}.*?</loc>[\\s\\S]*?</url>`; | ||
const match = sitemap.src.match(new RegExp(regexPattern)); | ||
@@ -91,31 +94,39 @@ | ||
createEntry(file) { | ||
const depth = (file.pathname.match(/\//g) || []).length; | ||
const priority = Math.max(0.1, 1.0 - (depth - 1) * 0.1).toFixed(1); | ||
const defaultKeys = { | ||
loc: file.pathname, | ||
lastmod: file.modified.on, | ||
changefreq: 'monthly', // Example default value | ||
priority: priority, | ||
}; | ||
// Merge default keys with file.sitemap, prioritizing file.sitemap values | ||
file.sitemap = { ...defaultKeys, ...file.sitemap }; | ||
file.sitemap.loc = file.pathname; | ||
file.sitemap.lastmod = file.modified.on; | ||
if (file['content-type'] === 'text/html') { | ||
parseHtml(file) | ||
if (file.sitemap.type !== 'news') { | ||
if (file.sitemap.changefreq) | ||
file.sitemap.changefreq = 'monthly'; | ||
if (!file.sitemap.priority) { | ||
const depth = (file.pathname.match(/\//g) || []).length; | ||
file.sitemap.priority = Math.max(0.1, 1.0 - (depth - 1) * 0.1).toFixed(1); | ||
} | ||
} | ||
} | ||
let entry = `\t<url>\n`; | ||
for (const key of Object.keys(file.sitemap)) { | ||
if (key === 'pathname') | ||
if (key === 'pathname' || key === 'type') | ||
continue | ||
const value = file.sitemap[key]; | ||
let value = file.sitemap[key]; | ||
if (typeof value === 'object' && value !== null) { | ||
entry += `\t\t<${key}:${key}>\n`; | ||
if (typeof value === 'object' && value !== null && !(value instanceof Date)) { | ||
if (!Array.isArray(value)) | ||
value = [value] | ||
for (let i = 0; i < value.length; i++) { | ||
entry += `\t\t<${key}:${key}>\n`; | ||
for (const nestedKey of Object.keys(value)) { | ||
const nestedValue = value[nestedKey]; | ||
entry += `\t\t\t<${key}:${nestedKey}>${nestedValue}</${key}:${nestedKey}>\n`; | ||
for (const nestedKey of Object.keys(value[i])) { | ||
const nestedValue = value[nestedKey]; | ||
entry += `\t\t\t<${key}:${nestedKey}>${nestedValue}</${key}:${nestedKey}>\n`; | ||
} | ||
entry += `\t\t</${key}:${key}>\n`; | ||
} | ||
entry += `\t\t</${key}:${key}>\n`; | ||
} else { | ||
@@ -174,4 +185,4 @@ entry += `\t\t<${key}>${value}</${key}>\n`; | ||
type = 'video'; | ||
} else if (file.sitemap.news) { | ||
// type = 'news'; | ||
} else if (file.sitemap.type === 'news') { | ||
type = 'news'; | ||
} | ||
@@ -188,2 +199,4 @@ | ||
sitemap = await this.readSitemap(sitemap, host); | ||
} else { | ||
index = 1 | ||
} | ||
@@ -195,10 +208,8 @@ | ||
index += 1 | ||
else | ||
sitemap.src = this.createSitemap(type); | ||
sitemap.name = `${name}${index}.xml` | ||
sitemap.pathname = `/${name}${index}.xml` | ||
sitemap.src = this.createSitemap(type); | ||
// Add the new sitemap entry | ||
const indexEntry = `\n<sitemap>\n\t<loc>{{$host}}/${name}${index}.xml</loc>\n</sitemap>`; | ||
mainSitemap.src = mainSitemap.src.replace('</sitemapindex>', `${indexEntry}\n</sitemapindex>`); | ||
} | ||
@@ -208,2 +219,15 @@ | ||
// Create the regex pattern to match the <sitemap> block containing the specific <loc> for the pathname | ||
const regexPattern = `<sitemap>\\s*<loc>[^<]*${sitemap.pathname}[^<]*</loc>[\\s\\S]*?</sitemap>`; | ||
// Execute the regex match against the sitemap index source | ||
const match = mainSitemap.src.match(new RegExp(regexPattern)); | ||
// Check if a match is found | ||
if (!match) { | ||
//TODO: if sitemap found but not in index should we add to sitemap pathname to index or should we check the sitmap for the next index available see if room add or create new index. | ||
const indexEntry = `\t<sitemap>\n\t\t<loc>{{$host}}${sitemap.pathname}</loc>\n</sitemap>`; | ||
mainSitemap.src = mainSitemap.src.replace('</sitemapindex>', `${indexEntry}\n</sitemapindex>`); | ||
} | ||
return { mainSitemap, sitemap } | ||
@@ -234,8 +258,20 @@ } | ||
createSitemap(type) { | ||
if (type === 'main') | ||
return `<?xml version="1.0" encoding="UTF-8"?>\n<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n</sitemapindex>`; | ||
else if (type === 'image' || type === 'video' || type === 'news') | ||
return `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:${type}="http://www.google.com/schemas/sitemap-${type}/1.1">\n</urlset>`; | ||
else | ||
return `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n</urlset>`; | ||
const xmlDeclaration = `<?xml version="1.0" encoding="UTF-8"?>\n`; | ||
const sitemapNamespace = 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"'; | ||
if (type === 'main') { | ||
return `${xmlDeclaration}<sitemapindex ${sitemapNamespace}>\n</sitemapindex>`; | ||
} else { | ||
const imageNamespace = 'xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"'; | ||
const videoNamespace = 'xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"'; | ||
const newsNamespace = 'xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"'; | ||
if (type === 'image') { | ||
return `${xmlDeclaration}<urlset ${sitemapNamespace} ${imageNamespace}>\n</urlset>`; | ||
} else if (type === 'video') { | ||
return `${xmlDeclaration}<urlset ${sitemapNamespace} ${videoNamespace}>\n</urlset>`; | ||
} else { // For 'news' type or any other types | ||
return `${xmlDeclaration}<urlset ${sitemapNamespace} ${newsNamespace} ${imageNamespace} ${videoNamespace}>\n</urlset>`; | ||
} | ||
} | ||
} | ||
@@ -271,3 +307,3 @@ | ||
return matches ? matches.length : 0; | ||
return matches ? matches.length : null; | ||
} catch (err) { | ||
@@ -306,4 +342,99 @@ console.error(`Error determining next sitemap index for ${filename}:`, err); | ||
parseHtml(file) { | ||
const dom = parse(html); | ||
const entries = dom.querySelectorAll('[sitemap="true"]'); | ||
let types = ['image', 'video', 'news'] | ||
const previousEntries = {} | ||
for (let i = 0; i < types.length; i++) { | ||
if (!file.sitemap[types[i]]) | ||
continue | ||
if (Array.isArray(file.sitemap[types[i]])) | ||
previousEntries[types[i]] = file.sitemap[types[i]] | ||
else | ||
previousEntries[types[i]] = [file.sitemap[types[i]]] | ||
delete file.sitemap[types[i]] | ||
} | ||
for (let i = 0; i < entries.length; i++) { | ||
let type = '', query = ''; | ||
let existingObject | ||
let entryObject = {}; | ||
if (entries[i].tagName === 'IMG') { // Corrected to 'IMG' for images | ||
type = 'image'; | ||
query = 'loc' | ||
entryObject.loc = entries[i].src; | ||
entryObject.title = entries[i].getAttribute('sitemap-title') || entries[i].getAttribute('title') || entries[i].getAttribute('alt'); | ||
entryObject.caption = entries[i].getAttribute('sitemap-caption') || entries[i].getAttribute('alt') || entryObject.title; | ||
entryObject.geo_location = entries[i].getAttribute('sitemap-geo-location'); | ||
} else if (entries[i].tagName === 'VIDEO') { | ||
type = 'video'; | ||
query = 'content_loc' | ||
entryObject.content_loc = entries[i].src; | ||
entryObject.title = entries[i].getAttribute('sitemap-title') || entries[i].getAttribute('title'); | ||
entryObject.description = entries[i].getAttribute('description'); // 'description' if available | ||
entryObject.thumbnail_loc = entries[i].getAttribute('sitemap-thumbnail') || entries[i].getAttribute('poster'); | ||
entryObject.duration = entries[i].getAttribute('sitemap-duration'); | ||
} else { | ||
type = 'news'; | ||
file.sitemap.type = 'news'; | ||
query = 'title' | ||
entryObject.title = entries[i].getAttribute('sitemap-title'); | ||
if (!entryObject.title) { | ||
const title = dom.querySelector('title'); | ||
entryObject.title = title ? title.text : ''; | ||
} | ||
entryObject.publication = { | ||
name: entries[i].getAttribute('sitemap-publication-name'), // Use proper attribute | ||
language: entries[i].getAttribute('sitemap-publication-language') // Use proper attribute | ||
}; | ||
if (!entryObject.publication.language) { | ||
// Fallback to HTML lang attribute | ||
const htmlElement = dom.querySelector('html'); | ||
entryObject.publication.language = htmlElement ? htmlElement.getAttribute('lang') : null; | ||
} | ||
entryObject.publication_date = entries[i].getAttribute('sitemap-publication-date') || file.modified.on; | ||
entryObject.keywords = entries[i].getAttribute('sitemap-keywords'); | ||
if (!entryObject.keywords) { | ||
const keywords = dom.querySelector('meta[name="keywords"]'); | ||
entryObject.keywords = keywords ? keywords.getAttribute('content') : ''; | ||
} | ||
entryObject.genres = entries[i].getAttribute('sitemap-genres'); | ||
} | ||
if (previousEntries[type]) { | ||
existingObject = previousEntries[type].find(item => item[query] === entryObject[query]); | ||
entryObject = { ...existingObject, ...entryObject } | ||
} | ||
Object.keys(entryObject).forEach(key => { | ||
if (!entryObject[key]) | ||
delete entryObject[key] | ||
}); | ||
if (!file.sitemap[type]) | ||
file.sitemap[type] = [] | ||
file.sitemap[type].push(entryObject) | ||
} | ||
if (file.sitemap.type !== 'news') { | ||
const priorityMeta = dom.querySelector('meta[name="sitemap-priority"]'); | ||
const changefreqMeta = dom.querySelector('meta[name="sitemap-changefreq"]'); | ||
file.sitemap.priority = priorityMeta ? priorityMeta.getAttribute('content') : file.sitemap.priority; // Default priority if not specified | ||
file.sitemap.changefreq = changefreqMeta ? changefreqMeta.getAttribute('content') : file.sitemap.changefreq; // Default changefreq if not specified | ||
} | ||
} | ||
} | ||
module.exports = CoCreateSitemap; |
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
80216
395
0
1
+ Addednode-html-parser@^6.1.13
+ Addedboolbase@1.0.0(transitive)
+ Addedcss-select@5.1.0(transitive)
+ Addedcss-what@6.1.0(transitive)
+ Addeddom-serializer@2.0.0(transitive)
+ Addeddomelementtype@2.3.0(transitive)
+ Addeddomhandler@5.0.3(transitive)
+ Addeddomutils@3.2.2(transitive)
+ Addedentities@4.5.0(transitive)
+ Addedhe@1.2.0(transitive)
+ Addednode-html-parser@6.1.13(transitive)
+ Addednth-check@2.1.1(transitive)