website-scraper
Advanced tools
Comparing version 2.4.1 to 3.0.0
@@ -9,5 +9,8 @@ var _ = require('lodash'); | ||
var resourceUrl = resource.getUrl(); | ||
var host = utils.getHostFromUrl(resourceUrl); | ||
var filePath = utils.getFilepathFromUrl(resourceUrl); | ||
var extension = utils.getFilenameExtension(filePath); | ||
filePath = path.join(host.replace(':', '_'), filePath); | ||
// If we have HTML from 'http://example.com/path' => set 'path/index.html' as filepath | ||
@@ -14,0 +17,0 @@ if (resource.isHtml()) { |
@@ -75,2 +75,12 @@ var url = require('url'); | ||
/** | ||
* Returns host with port from given url | ||
* Example: http://example.com:8080/some/path/file.js => example.com:8080 | ||
* @param {string} u - url | ||
* @returns {string} host with port | ||
*/ | ||
function getHostFromUrl (u) { | ||
return url.parse(u).host; | ||
} | ||
/** | ||
* Returns extension for given filepath | ||
@@ -139,2 +149,3 @@ * Example: some/path/file.js => .js | ||
getHashFromUrl, | ||
getHostFromUrl, | ||
shortenFilename, | ||
@@ -141,0 +152,0 @@ waitAllFulfilled, |
{ | ||
"name": "website-scraper", | ||
"version": "2.4.1", | ||
"version": "3.0.0", | ||
"description": "Download website to a local directory (including all css, images, js, etc.)", | ||
@@ -5,0 +5,0 @@ "readmeFilename": "README.md", |
@@ -172,5 +172,5 @@ ## Introduction | ||
When the `bySiteStructure` filenameGenerator is used the downloaded files are saved in `directory` using same structure as on the website: | ||
- `/` => `DIRECTORY/index.html` | ||
- `/about` => `DIRECTORY/about/index.html` | ||
- `/resources/javascript/libraries/jquery.min.js` => `DIRECTORY/resources/javascript/libraries/jquery.min.js` | ||
- `/` => `DIRECTORY/example.com/index.html` | ||
- `/about` => `DIRECTORY/example.com/about/index.html` | ||
- `//cdn.example.com/resources/jquery.min.js` => `DIRECTORY/cdn.example.com/resources/jquery.min.js` | ||
@@ -177,0 +177,0 @@ ```javascript |
49953
988