New Case Study:See how Anthropic automated 95% of dependency reviews with Socket.Learn More
Socket
Sign inDemoInstall
Socket

@cocreate/sitemap

Package Overview
Dependencies
Maintainers
0
Versions
9
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

@cocreate/sitemap - npm Package Compare versions

Comparing version 1.1.0 to 1.2.0

14

CHANGELOG.md

@@ -0,1 +1,15 @@

# [1.2.0](https://github.com/CoCreate-app/CoCreate-sitemap/compare/v1.1.0...v1.2.0) (2024-08-30)
### Bug Fixes
* add sitemap to sitemapindex if not already in index ([0ff6911](https://github.com/CoCreate-app/CoCreate-sitemap/commit/0ff6911fe81eeb05295639fb984401d3e76b90ad))
* noindex metatag return and handling sitemap generation ([058116a](https://github.com/CoCreate-app/CoCreate-sitemap/commit/058116a34c44100bfa4ea1b20afe13fd489bc642))
### Features
* added node-html-parser ([397c590](https://github.com/CoCreate-app/CoCreate-sitemap/commit/397c5908af1149d2d60d7c431a8a0f1bd60789f4))
* parseHtml function to read and generate nested sitemaps using sitemap-* attributes and metatags ([64edbdb](https://github.com/CoCreate-app/CoCreate-sitemap/commit/64edbdbdcee6f64bc5080997557f9691f072381c))
# [1.1.0](https://github.com/CoCreate-app/CoCreate-sitemap/compare/v1.0.0...v1.1.0) (2024-08-24)

@@ -2,0 +16,0 @@

7

package.json
{
"name": "@cocreate/sitemap",
"version": "1.1.0",
"version": "1.2.0",
"description": "A simple sitemap component in vanilla javascript. Easily configured using HTML5 data-attributes and/or JavaScript API.",

@@ -46,3 +46,6 @@ "keywords": [

},
"main": "./src/index.js"
"main": "./src/index.js",
"dependencies": {
"node-html-parser": "^6.1.13"
}
}

@@ -23,2 +23,4 @@ /********************************************************************************

const { parse } = require("node-html-parser");
class CoCreateSitemap {

@@ -39,3 +41,4 @@ constructor(crud) {

// Check if the file is HTML and contains a noindex meta tag
if (file['content-type'] === 'text/html' && file.src.includes('<meta name="robots" content="noindex">'))
if (file['content-type'] === 'text/html'
&& /<meta\s+name=["']robots["']\s+content=["'][^"']*noindex[^"']*["']/i.test(file.src))
return;

@@ -60,5 +63,5 @@

if (file.sitemap.pathname) {
if (file.pathname) {
// Perform regex search starting at the pathname
const regexPattern = `<url>\\s*<loc>.*?${file.sitemap.pathname.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}.*?</loc>[\\s\\S]*?</url>`;
const regexPattern = `<url>\\s*<loc>.*?${file.pathname.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}.*?</loc>[\\s\\S]*?</url>`;
const match = sitemap.src.match(new RegExp(regexPattern));

@@ -91,31 +94,39 @@

createEntry(file) {
const depth = (file.pathname.match(/\//g) || []).length;
const priority = Math.max(0.1, 1.0 - (depth - 1) * 0.1).toFixed(1);
const defaultKeys = {
loc: file.pathname,
lastmod: file.modified.on,
changefreq: 'monthly', // Example default value
priority: priority,
};
// Merge default keys with file.sitemap, prioritizing file.sitemap values
file.sitemap = { ...defaultKeys, ...file.sitemap };
file.sitemap.loc = file.pathname;
file.sitemap.lastmod = file.modified.on;
if (file['content-type'] === 'text/html') {
parseHtml(file)
if (file.sitemap.type !== 'news') {
if (file.sitemap.changefreq)
file.sitemap.changefreq = 'monthly';
if (!file.sitemap.priority) {
const depth = (file.pathname.match(/\//g) || []).length;
file.sitemap.priority = Math.max(0.1, 1.0 - (depth - 1) * 0.1).toFixed(1);
}
}
}
let entry = `\t<url>\n`;
for (const key of Object.keys(file.sitemap)) {
if (key === 'pathname')
if (key === 'pathname' || key === 'type')
continue
const value = file.sitemap[key];
let value = file.sitemap[key];
if (typeof value === 'object' && value !== null) {
entry += `\t\t<${key}:${key}>\n`;
if (typeof value === 'object' && value !== null && !(value instanceof Date)) {
if (!Array.isArray(value))
value = [value]
for (let i = 0; i < value.length; i++) {
entry += `\t\t<${key}:${key}>\n`;
for (const nestedKey of Object.keys(value)) {
const nestedValue = value[nestedKey];
entry += `\t\t\t<${key}:${nestedKey}>${nestedValue}</${key}:${nestedKey}>\n`;
for (const nestedKey of Object.keys(value[i])) {
const nestedValue = value[nestedKey];
entry += `\t\t\t<${key}:${nestedKey}>${nestedValue}</${key}:${nestedKey}>\n`;
}
entry += `\t\t</${key}:${key}>\n`;
}
entry += `\t\t</${key}:${key}>\n`;
} else {

@@ -174,4 +185,4 @@ entry += `\t\t<${key}>${value}</${key}>\n`;

type = 'video';
} else if (file.sitemap.news) {
// type = 'news';
} else if (file.sitemap.type === 'news') {
type = 'news';
}

@@ -188,2 +199,4 @@

sitemap = await this.readSitemap(sitemap, host);
} else {
index = 1
}

@@ -195,10 +208,8 @@

index += 1
else
sitemap.src = this.createSitemap(type);
sitemap.name = `${name}${index}.xml`
sitemap.pathname = `/${name}${index}.xml`
sitemap.src = this.createSitemap(type);
// Add the new sitemap entry
const indexEntry = `\n<sitemap>\n\t<loc>{{$host}}/${name}${index}.xml</loc>\n</sitemap>`;
mainSitemap.src = mainSitemap.src.replace('</sitemapindex>', `${indexEntry}\n</sitemapindex>`);
}

@@ -208,2 +219,15 @@

// Create the regex pattern to match the <sitemap> block containing the specific <loc> for the pathname
const regexPattern = `<sitemap>\\s*<loc>[^<]*${sitemap.pathname}[^<]*</loc>[\\s\\S]*?</sitemap>`;
// Execute the regex match against the sitemap index source
const match = mainSitemap.src.match(new RegExp(regexPattern));
// Check if a match is found
if (!match) {
//TODO: if sitemap found but not in index should we add to sitemap pathname to index or should we check the sitmap for the next index available see if room add or create new index.
const indexEntry = `\t<sitemap>\n\t\t<loc>{{$host}}${sitemap.pathname}</loc>\n</sitemap>`;
mainSitemap.src = mainSitemap.src.replace('</sitemapindex>', `${indexEntry}\n</sitemapindex>`);
}
return { mainSitemap, sitemap }

@@ -234,8 +258,20 @@ }

createSitemap(type) {
if (type === 'main')
return `<?xml version="1.0" encoding="UTF-8"?>\n<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n</sitemapindex>`;
else if (type === 'image' || type === 'video' || type === 'news')
return `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:${type}="http://www.google.com/schemas/sitemap-${type}/1.1">\n</urlset>`;
else
return `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">\n</urlset>`;
const xmlDeclaration = `<?xml version="1.0" encoding="UTF-8"?>\n`;
const sitemapNamespace = 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"';
if (type === 'main') {
return `${xmlDeclaration}<sitemapindex ${sitemapNamespace}>\n</sitemapindex>`;
} else {
const imageNamespace = 'xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"';
const videoNamespace = 'xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"';
const newsNamespace = 'xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"';
if (type === 'image') {
return `${xmlDeclaration}<urlset ${sitemapNamespace} ${imageNamespace}>\n</urlset>`;
} else if (type === 'video') {
return `${xmlDeclaration}<urlset ${sitemapNamespace} ${videoNamespace}>\n</urlset>`;
} else { // For 'news' type or any other types
return `${xmlDeclaration}<urlset ${sitemapNamespace} ${newsNamespace} ${imageNamespace} ${videoNamespace}>\n</urlset>`;
}
}
}

@@ -271,3 +307,3 @@

return matches ? matches.length : 0;
return matches ? matches.length : null;
} catch (err) {

@@ -306,4 +342,99 @@ console.error(`Error determining next sitemap index for ${filename}:`, err);

parseHtml(file) {
const dom = parse(html);
const entries = dom.querySelectorAll('[sitemap="true"]');
let types = ['image', 'video', 'news']
const previousEntries = {}
for (let i = 0; i < types.length; i++) {
if (!file.sitemap[types[i]])
continue
if (Array.isArray(file.sitemap[types[i]]))
previousEntries[types[i]] = file.sitemap[types[i]]
else
previousEntries[types[i]] = [file.sitemap[types[i]]]
delete file.sitemap[types[i]]
}
for (let i = 0; i < entries.length; i++) {
let type = '', query = '';
let existingObject
let entryObject = {};
if (entries[i].tagName === 'IMG') { // Corrected to 'IMG' for images
type = 'image';
query = 'loc'
entryObject.loc = entries[i].src;
entryObject.title = entries[i].getAttribute('sitemap-title') || entries[i].getAttribute('title') || entries[i].getAttribute('alt');
entryObject.caption = entries[i].getAttribute('sitemap-caption') || entries[i].getAttribute('alt') || entryObject.title;
entryObject.geo_location = entries[i].getAttribute('sitemap-geo-location');
} else if (entries[i].tagName === 'VIDEO') {
type = 'video';
query = 'content_loc'
entryObject.content_loc = entries[i].src;
entryObject.title = entries[i].getAttribute('sitemap-title') || entries[i].getAttribute('title');
entryObject.description = entries[i].getAttribute('description'); // 'description' if available
entryObject.thumbnail_loc = entries[i].getAttribute('sitemap-thumbnail') || entries[i].getAttribute('poster');
entryObject.duration = entries[i].getAttribute('sitemap-duration');
} else {
type = 'news';
file.sitemap.type = 'news';
query = 'title'
entryObject.title = entries[i].getAttribute('sitemap-title');
if (!entryObject.title) {
const title = dom.querySelector('title');
entryObject.title = title ? title.text : '';
}
entryObject.publication = {
name: entries[i].getAttribute('sitemap-publication-name'), // Use proper attribute
language: entries[i].getAttribute('sitemap-publication-language') // Use proper attribute
};
if (!entryObject.publication.language) {
// Fallback to HTML lang attribute
const htmlElement = dom.querySelector('html');
entryObject.publication.language = htmlElement ? htmlElement.getAttribute('lang') : null;
}
entryObject.publication_date = entries[i].getAttribute('sitemap-publication-date') || file.modified.on;
entryObject.keywords = entries[i].getAttribute('sitemap-keywords');
if (!entryObject.keywords) {
const keywords = dom.querySelector('meta[name="keywords"]');
entryObject.keywords = keywords ? keywords.getAttribute('content') : '';
}
entryObject.genres = entries[i].getAttribute('sitemap-genres');
}
if (previousEntries[type]) {
existingObject = previousEntries[type].find(item => item[query] === entryObject[query]);
entryObject = { ...existingObject, ...entryObject }
}
Object.keys(entryObject).forEach(key => {
if (!entryObject[key])
delete entryObject[key]
});
if (!file.sitemap[type])
file.sitemap[type] = []
file.sitemap[type].push(entryObject)
}
if (file.sitemap.type !== 'news') {
const priorityMeta = dom.querySelector('meta[name="sitemap-priority"]');
const changefreqMeta = dom.querySelector('meta[name="sitemap-changefreq"]');
file.sitemap.priority = priorityMeta ? priorityMeta.getAttribute('content') : file.sitemap.priority; // Default priority if not specified
file.sitemap.changefreq = changefreqMeta ? changefreqMeta.getAttribute('content') : file.sitemap.changefreq; // Default changefreq if not specified
}
}
}
module.exports = CoCreateSitemap;
SocketSocket SOC 2 Logo

Product

  • Package Alerts
  • Integrations
  • Docs
  • Pricing
  • FAQ
  • Roadmap
  • Changelog

Packages

npm

Stay in touch

Get open source security insights delivered straight into your inbox.


  • Terms
  • Privacy
  • Security

Made with ⚡️ by Socket Inc