Comparing version 0.1.1 to 0.1.2
21
index.js
@@ -15,7 +15,12 @@ #!/usr/bin/env node | ||
.option('-s --overseas','包含海外地区') | ||
.option('-y --pinyin','输出pinyin') | ||
.option('-i --ignore [value]','忽略 \'省|市|区|地区|县\'') | ||
.option('-c --code','包含地区编码(身份证前6位)') | ||
.option('-u --update','重新抓取原始数据') | ||
.option('-k --key [value]','对象键名 name,children,code') | ||
// .option('-t --type','输出类型,array/object') | ||
// .option('-f --flatten','扁平化输出') | ||
.option('-u --update','重新抓取原始数据,或在citydata.json丢失的情况下使用') | ||
.option('-k --key [value]','对象键名 name,children,code,pinyin,顺序不能改变,例如n,s,c,p') | ||
.option('-f --flat [value]','扁平化输出,可以指定parentId参数名称') | ||
.option('--closed','地区树默认为关闭状态') | ||
.option('--father [value]','转出指定父节点的所有子节点,不含父节点') | ||
.option('-r --reverse','转出以子节点为根的树,便于倒查父节点') | ||
// .option('-t --type','输出类型,array/object') | ||
// .option('-z --zipcode','包含邮编') | ||
@@ -37,5 +42,11 @@ .parse(process.argv); | ||
pretty:commander.pretty, | ||
keys:commander.key | ||
keys:commander.key, | ||
pinyin:commander.pinyin, | ||
ignore:commander.ignore, | ||
flat:commander.flat, | ||
closed:commander.closed, | ||
father:commander.father, | ||
reverse:commander.reverse | ||
}; | ||
require('./lib/generator.js').generate(options); | ||
} |
@@ -0,1 +1,11 @@ | ||
var pinyin = require('pinyin'); | ||
var util = require('util'); | ||
var convertToPinyin = function(str) { | ||
var pinyins = pinyin(str, { style: pinyin.STYLE_NORMAL }); | ||
return pinyins.map(function(word) { | ||
return word[0]; | ||
}).join('_').toLowerCase(); | ||
}; | ||
exports.generate = function(options) { | ||
@@ -7,32 +17,117 @@ | ||
if(!options.keys || typeof options.keys !== 'string' || !options.keys.length){ | ||
options.keys = ['name','children','code']; | ||
options.keys = ['name','children','code','pinyin']; | ||
}else{ | ||
options.keys = options.keys.split(','); | ||
} | ||
options.ignore = options.ignore || ''; | ||
options.ignoreRegexp = new RegExp(util.format('(%s)$', options.ignore), 'i'); | ||
var ret = null; | ||
var fillRet = function(arr){ | ||
currLevel++; | ||
var ret = arr.map(function(item){ | ||
if(item.name === '海外' && !options.overseas){ | ||
return null; | ||
} | ||
var tmpObj = {}; | ||
tmpObj[options.keys[0]] = item.name; | ||
if(options.code && item.code){ | ||
tmpObj[options.keys[2]] = item.code; | ||
} | ||
if(level > currLevel && item.children){ | ||
tmpObj[options.keys[1]] = fillRet(item.children); | ||
} | ||
return tmpObj; | ||
}); | ||
currLevel--; | ||
return ret; | ||
}; | ||
var findSon = function (arr) { | ||
if(!options.father) return []; | ||
for (var i=0; i<arr.length; ++i) { | ||
var area = arr[i]; | ||
if(area.children) { | ||
if (area.code == options.father) return area.children; | ||
else { | ||
var result = findSon(area.children); | ||
if (result) return result; | ||
} | ||
} | ||
} | ||
}; | ||
var ret = fillRet(cityData).filter(function(item){ | ||
return item; | ||
}); | ||
if(options.father) cityData = findSon(cityData, options.father); | ||
if(options.reverse) { | ||
ret = {}; | ||
var reverseData = function (arr, parent) { | ||
arr = arr || []; | ||
parent = parent || null; | ||
if(parent) { | ||
arr.map(function (item) { | ||
var parentData = {}; | ||
parentData[options.keys[2]] = parent.code; | ||
parentData[options.keys[0]] = parent.name.replace(options.ignoreRegexp, ''); | ||
ret[item.code] = parentData; | ||
}) | ||
} | ||
arr.map(function (item) { | ||
if(item.children) { | ||
reverseData(item.children, item); | ||
} | ||
}) | ||
}; | ||
reverseData(cityData); | ||
} else if(options.flat) { | ||
ret = []; | ||
var flatData = function (arr, parentCode) { | ||
arr = arr || []; | ||
parentCode = parentCode || 0; | ||
arr.map(function (item) { | ||
if (item.name === '海外' && !options.overseas) { | ||
return null; | ||
} | ||
var tmpObj = {}; | ||
tmpObj[options.keys[0]] = item.name.replace(options.ignoreRegexp, ''); | ||
if (options.code && item.code) { | ||
tmpObj[options.keys[2]] = item.code; | ||
} | ||
// 输出拼音 | ||
if (options.pinyin) { | ||
tmpObj[options.keys[3]] = convertToPinyin(tmpObj[options.keys[0]]); | ||
} | ||
if(parentCode > 0) { | ||
tmpObj[options.flat] = parentCode; | ||
} | ||
if(options.closed && item.children) { | ||
tmpObj['state'] = "closed"; | ||
} | ||
ret.push(tmpObj); | ||
if (item.children) { | ||
flatData(item.children, item.code); | ||
} | ||
}); | ||
}; | ||
flatData(cityData); | ||
} else { | ||
var fillRet = function (arr) { | ||
currLevel++; | ||
var ret = arr.map(function (item) { | ||
if (item.name === '海外' && !options.overseas) { | ||
return null; | ||
} | ||
var tmpObj = {}; | ||
if(item.children && options.father && item.code == options.father) { | ||
} | ||
tmpObj[options.keys[0]] = item.name.replace(options.ignoreRegexp, ''); | ||
if (options.code && item.code) { | ||
tmpObj[options.keys[2]] = item.code; | ||
} | ||
// 输出拼音 | ||
if (options.pinyin) { | ||
tmpObj[options.keys[3]] = convertToPinyin(tmpObj[options.keys[0]]); | ||
} | ||
if(options.closed && item.children) { | ||
tmpObj['state'] = "closed"; | ||
} | ||
if (level > currLevel && item.children) { | ||
tmpObj[options.keys[1]] = fillRet(item.children); | ||
} | ||
return tmpObj; | ||
}); | ||
currLevel--; | ||
return ret; | ||
}; | ||
ret = fillRet(cityData).filter(function (item) { | ||
return item; | ||
}); | ||
} | ||
if(options.amd){ | ||
@@ -47,3 +142,2 @@ output(ret,options.pretty,options.output,'amd'); | ||
} | ||
}; | ||
@@ -90,3 +184,2 @@ | ||
} | ||
} |
var restler = require('restler'); | ||
var cheerio = require('cheerio'); | ||
var xzqh_url = 'http://www.mca.gov.cn/article/sj/xzqh/2020/2020/202003301019.html'; | ||
var overseas_url = 'https://zh.wikipedia.org/zh/%E6%8C%89%E5%A4%A7%E6%B4%B2%E6%8E%92%E5%88%97%E7%9A%84%E5%9B%BD%E5%AE%B6%E5%88%97%E8%A1%A8'; | ||
exports.update = function(){ | ||
var cityDataListUrl = 'http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/'; | ||
getCityDataUrl(cityDataListUrl,function(err,cityDataUrl){ | ||
if(err){ | ||
console.log('loading city data list page failed.'); | ||
exports.update = function () { | ||
getCityRawData(xzqh_url, function(err, cityRawData) { | ||
if (err) { | ||
console.log('loading city raw data failed.'); | ||
console.log(err.message); | ||
@@ -14,19 +16,9 @@ return; | ||
getCityRawData(cityDataUrl,function(err,cityRawData){ | ||
if(err){ | ||
console.log('loading city raw data failed.'); | ||
var cityData = parseCityData(cityRawData); | ||
getOverseasRawData(overseas_url, function (err,overseasRawData) { | ||
if (err) { | ||
console.log('loading overseas raw data failed.'); | ||
console.log(err.message); | ||
return; | ||
} | ||
var cityData = parseCityData(cityRawData); | ||
var overseasDataListUrl = 'http://zh.wikipedia.org/zh/%E5%9B%BD%E5%AE%B6%E5%88%97%E8%A1%A8_(%E6%8C%89%E6%B4%B2%E6%8E%92%E5%88%97)'; | ||
getOverseasRawData(overseasDataListUrl,function(err,overseasRawData){ | ||
if(err){ | ||
console.log('loading overseas raw data failed.'); | ||
console.log(err.message); | ||
return; | ||
} | ||
} else { | ||
console.log('mixing overseas data to city data...'); | ||
@@ -41,10 +33,9 @@ var overseasObj = {name:'海外'}; | ||
console.log('mixing overseas data to city data successed.'); | ||
} | ||
console.log('writing data file...'); | ||
var fs = require('fs'); | ||
fs.writeFileSync('./lib/citydata.json',JSON.stringify(cityData)); | ||
console.log('writing data file successed.'); | ||
}); | ||
console.log('writing data file...'); | ||
var fs = require('fs'); | ||
fs.writeFileSync('./lib/citydata.json',JSON.stringify(cityData)); | ||
console.log('writing data file successed.'); | ||
}); | ||
}); | ||
@@ -54,104 +45,132 @@ | ||
function getCityRawData (url, callback) { | ||
console.log('loading city raw data page...'); | ||
restler.get(url) | ||
.on('success',function(data){ | ||
console.log('loading successed.'); | ||
console.log('parsing to get city raw data.'); | ||
var $ = cheerio.load(data); | ||
function getCityDataUrl(cityDataListUrl,callback){ | ||
console.log('loading citydata list page...'); | ||
restler.get(cityDataListUrl).on('success',function(data){ | ||
console.log('loading successed.'); | ||
console.log('parsing newest city data page url.'); | ||
var $ = cheerio.load(data); | ||
var cityDataUrl = $('.center_list_contlist a').eq(0).attr('href'); | ||
var url = require('url'); | ||
console.log('parsing successed, got newest citydata url:'+cityDataUrl); | ||
callback(null,url.resolve(cityDataListUrl,cityDataUrl)); | ||
}).on('fail',function(){ | ||
callback(new Error('fail')); | ||
}).on('error',callback); | ||
} | ||
var $validRows = $('tr').filter(function (_, tr) { | ||
return $(tr).attr('height') === '19' | ||
}); | ||
function getCityRawData(url,callback){ | ||
console.log('loading city raw data page...'); | ||
restler.get(url).on('success',function(data){ | ||
console.log('loading successed.'); | ||
console.log('parsing to get city raw data.'); | ||
var $ = cheerio.load(data); | ||
var cityRawData = $('.TRS_Editor').last().find('style').remove().end().text(); | ||
console.log('parsing successed.'); | ||
callback(null,cityRawData); | ||
}).on('fail',function(){ | ||
callback(new Error('fail')); | ||
}).on('error',callback); | ||
var rowsContent = []; | ||
$validRows.each(function (_, tr) { | ||
var $validTds = $(tr).find('td').filter(function (_, td) { | ||
return $(td).html().trim() !== ''; | ||
}); | ||
var colsContent = []; | ||
$validTds.each(function (_, td) { | ||
colsContent.push($(td).text()); | ||
}); | ||
rowsContent.push(colsContent); | ||
}); | ||
callback(null, rowsContent); | ||
}).on('fail',function () { | ||
callback(new Error('fail')); | ||
}).on('error',callback); | ||
} | ||
function parseCityData(rawData){ | ||
function parseCityData (rawData) { | ||
console.log('start parsing city data'); | ||
var cityRegExp = /(\d{6})\s+([\D]+)/g; | ||
var matchResult = cityRegExp.exec(rawData); | ||
var cityArr = []; | ||
while(matchResult){ | ||
cityArr.push({ | ||
code:matchResult[1], | ||
name:matchResult[2].trim() | ||
var provinceIndexes = rawData.map(function (row, index) { | ||
if (!row[1].match(/\s/)) { | ||
return index; | ||
} else { | ||
return -1; | ||
} | ||
}).filter(function (idx) { | ||
return idx > -1; | ||
}) | ||
var groups = provinceIndexes.map(function (idx, index) { | ||
var topLevel = { | ||
code: rawData[idx][0].trim(), | ||
name: rawData[idx][1].trim(), | ||
children: [] | ||
}; | ||
var children = rawData.slice(idx + 1, provinceIndexes[index + 1]); | ||
var hasCity = children.some(function (row) { | ||
return row[1].length - row[1].trim().length === 1; | ||
}); | ||
matchResult = cityRegExp.exec(rawData); | ||
} | ||
var cityData = []; | ||
var currPrefix = ''; | ||
var currItem = []; | ||
cityArr.forEach(function(cityItem){ | ||
var thisPrefix = cityItem.code.replace(/0*$/,''); | ||
var thisLevel = Math.ceil(thisPrefix.length / 2); | ||
if (hasCity) { | ||
var cityIndexes = children.map(function (row, index) { | ||
if (row[1].length - row[1].trim().length === 1) { | ||
return index; | ||
} else { | ||
return -1; | ||
} | ||
}).filter(function (idx) { | ||
return idx > -1; | ||
}); | ||
// 直辖市去掉二级(市辖区、县),直接把三级提升 | ||
var skipCodeArr = ['1101','1102','1201','1202','3101','3102','5001','5002']; | ||
if(skipCodeArr.indexOf(thisPrefix) > -1){ | ||
return; | ||
}else if(thisLevel === 3){ | ||
var shouldLevelUp = false; | ||
if(skipCodeArr.some(function(codePrefix){ | ||
return thisPrefix.indexOf(codePrefix) === 0; | ||
})){ | ||
thisLevel--; | ||
} | ||
} | ||
var cityGroups = cityIndexes.map(function (idx, index) { | ||
var cityLevel = { | ||
code: children[idx][0].trim(), | ||
name: children[idx][1].trim(), | ||
children: [] | ||
}; | ||
if(thisLevel === 1){ | ||
cityData.push(cityItem); | ||
currItem[0] = cityItem; | ||
}else{ | ||
var parent = currItem[thisLevel - 2]; | ||
if(!parent.children){ | ||
parent.children = []; | ||
} | ||
parent.children.push(cityItem); | ||
currItem[thisLevel - 1] = cityItem; | ||
var countyList = children.slice(idx + 1, cityIndexes[index + 1]).map(function (row) { | ||
return { | ||
code: row[0].trim(), | ||
name: row[1].trim() | ||
} | ||
}); | ||
cityLevel.children = countyList.slice(); | ||
return cityLevel; | ||
}); | ||
topLevel.children = cityGroups.slice(); | ||
} else { // 直辖市 | ||
var _children = children.map(function (child) { | ||
return { | ||
code: child[0].trim(), | ||
name: child[1].trim() | ||
} | ||
}); | ||
topLevel.children = _children.slice(); | ||
} | ||
return topLevel; | ||
}); | ||
console.log('parsing successed.'); | ||
return cityData; | ||
return groups; | ||
} | ||
function getOverseasRawData(url,callback){ | ||
function getOverseasRawData (url, callback) { | ||
console.log('loading overseas raw data page...'); | ||
restler.get(url).on('success',function(data){ | ||
console.log('loading successed.'); | ||
console.log('parsing to get overseas raw data.'); | ||
var $ = cheerio.load(data); | ||
var overseasRawData = $('.wikitable tr td b a').map(function(){ | ||
return $(this).text().trim(); | ||
}).get().filter(function(item){ | ||
var filterArr = [ | ||
'中华人民共和国', | ||
'香港', | ||
'澳门' | ||
]; | ||
return item && filterArr.indexOf(item) === -1; | ||
}).sort(function(item1,item2){ | ||
// todo:按拼音排序 | ||
}); | ||
console.log('parsing successed.'); | ||
callback(null,overseasRawData); | ||
}).on('fail',function(){ | ||
callback(new Error('fail')); | ||
}).on('error',callback); | ||
restler.get(url) | ||
.on('success', function (data) { | ||
console.log('loading successed.'); | ||
console.log('parsing to get overseas raw data.'); | ||
var $ = cheerio.load(data); | ||
var overseasRawData = $('.wikitable tr td b a').map(function () { | ||
return $(this).text().trim(); | ||
}).get().filter(function (item) { | ||
var filterArr = [ | ||
'中华人民共和国', | ||
'香港', | ||
'澳门', | ||
'澳門', | ||
'中华民国', | ||
'中華民國' | ||
]; | ||
return item && filterArr.indexOf(item) === -1; | ||
}); | ||
// @todo: 拼音支持 | ||
console.log('parsing successed.'); | ||
callback(null, overseasRawData); | ||
}) | ||
.on('fail', function () { | ||
callback(new Error('fail')); | ||
}) | ||
.on('error', callback); | ||
} |
{ | ||
"name": "city", | ||
"description": "中国国家行政区划数据", | ||
"version": "0.1.1", | ||
"version": "0.1.2", | ||
"author": "basecss <i@basecss.net>", | ||
@@ -16,2 +16,3 @@ "contributors": [ | ||
"commander": "~2.2.0", | ||
"pinyin": "^2.6.2", | ||
"restler": "^3.2.2" | ||
@@ -18,0 +19,0 @@ }, |
@@ -17,4 +17,4 @@ city | ||
"children":[{ | ||
"name":"和平区" | ||
}] | ||
"name":"和平区" | ||
}] | ||
}] | ||
@@ -32,2 +32,4 @@ ``` | ||
- 支持输出地区代码(身份证前6位) | ||
- 支持拼音输出 | ||
- 支持自定义去除 省|市|区|地区|县 等后缀 | ||
@@ -62,3 +64,26 @@ ### 使用方法 | ||
- `-u` `--update` 重新抓取原始数据 | ||
- `-y` `--pinyin` 输出pinyin | ||
- `-i` `--ignore '省|市|区|地区|县'` 忽略 省|市|区|地区|县 | ||
- `-k` `--key name,children,code,pinyin` 导出的键名(顺序不能改变,例如n,s,c,p) | ||
- `-f` `--flat 'parentId'` 扁平化输出,可以指定parentId参数名称 | ||
- `--closed` 地区树结点默认为关闭状态 | ||
- `--father` 转出指定父节点的所有子节点,不含父节点 | ||
- `-r` `--reserve` 转出以子节点为根的树,便于倒查父节点 | ||
示例: | ||
1. 嵌套方式列出所有地区,name改为n,children改为s,code改为c,并带上地区编码 | ||
```sh | ||
city -k n,s,c -o ./area.json -c | ||
``` | ||
2. 平面方式列出所有地区,name改为n,children改为s,code改为c,parentId为p,并带上地区编码 | ||
```sh | ||
city -k n,s,c -o ./flat-area.json -c -f p | ||
``` | ||
3. 倒树方式列出所有地区,子地区的code为key,val为父地区的{code:code, name:name}; | ||
```sh | ||
city -k n,s,c -o ./reverse-area.js -j reverse_area -r | ||
``` | ||
### Todo | ||
@@ -68,1 +93,8 @@ | ||
- [ ] 支持更多格式的输出<https://github.com/TooBug/city/issues/2> | ||
### Changelog | ||
2020.06.12 | ||
- 更新内置 `citydata.json` 数据 | ||
- 移除 `市辖区` 数据项 |
Sorry, the diff of this file is too big to display
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
New author
Supply chain riskA new npm collaborator published a version of the package for the first time. New collaborators are usually benign additions to a project, but do indicate a change to the security surface area of a package.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
363
96
148234
4
1
+ Addedpinyin@^2.6.2
+ Added@mapbox/node-pre-gyp@1.0.11(transitive)
+ Addedabbrev@1.1.1(transitive)
+ Addedagent-base@6.0.2(transitive)
+ Addedansi-regex@5.0.1(transitive)
+ Addedaproba@2.0.0(transitive)
+ Addedare-we-there-yet@2.0.0(transitive)
+ Addedbalanced-match@1.0.2(transitive)
+ Addedbrace-expansion@1.1.11(transitive)
+ Addedchownr@2.0.0(transitive)
+ Addedcolor-support@1.1.3(transitive)
+ Addedcommander@1.1.1(transitive)
+ Addedconcat-map@0.0.1(transitive)
+ Addedconsole-control-strings@1.1.0(transitive)
+ Addeddebug@4.3.7(transitive)
+ Addeddelegates@1.0.0(transitive)
+ Addeddetect-libc@2.0.3(transitive)
+ Addedemoji-regex@8.0.0(transitive)
+ Addedfs-minipass@2.1.0(transitive)
+ Addedfs.realpath@1.0.0(transitive)
+ Addedgauge@3.0.2(transitive)
+ Addedglob@7.2.3(transitive)
+ Addedhas-unicode@2.0.1(transitive)
+ Addedhttps-proxy-agent@5.0.1(transitive)
+ Addedinflight@1.0.6(transitive)
+ Addedis-fullwidth-code-point@3.0.0(transitive)
+ Addedkeypress@0.1.0(transitive)
+ Addedmake-dir@3.1.0(transitive)
+ Addedminimatch@3.1.2(transitive)
+ Addedminipass@3.3.65.0.0(transitive)
+ Addedminizlib@2.1.2(transitive)
+ Addedmkdirp@1.0.4(transitive)
+ Addedms@2.1.3(transitive)
+ Addednode-addon-api@3.2.1(transitive)
+ Addednode-fetch@2.7.0(transitive)
+ Addednodejieba@2.5.2(transitive)
+ Addednopt@5.0.0(transitive)
+ Addednpmlog@5.0.1(transitive)
+ Addedobject-assign@4.1.1(transitive)
+ Addedonce@1.4.0(transitive)
+ Addedpath-is-absolute@1.0.1(transitive)
+ Addedpinyin@2.11.2(transitive)
+ Addedreadable-stream@3.6.2(transitive)
+ Addedrimraf@3.0.2(transitive)
+ Addedsafe-buffer@5.2.1(transitive)
+ Addedsemver@6.3.17.6.3(transitive)
+ Addedset-blocking@2.0.0(transitive)
+ Addedsignal-exit@3.0.7(transitive)
+ Addedstring-width@4.2.3(transitive)
+ Addedstring_decoder@1.3.0(transitive)
+ Addedstrip-ansi@6.0.1(transitive)
+ Addedtar@6.2.1(transitive)
+ Addedtr46@0.0.3(transitive)
+ Addedutil-deprecate@1.0.2(transitive)
+ Addedwebidl-conversions@3.0.1(transitive)
+ Addedwhatwg-url@5.0.0(transitive)
+ Addedwide-align@1.1.5(transitive)
+ Addedwrappy@1.0.2(transitive)
+ Addedyallist@4.0.0(transitive)