Comparing version 0.1.2 to 0.1.3
@@ -6,2 +6,3 @@ declare module "academia" { | ||
const citeRegExp: RegExp; | ||
const yearRegExp: RegExp; | ||
/** | ||
@@ -11,2 +12,3 @@ Given the text of a paper, extract the `Cite`s using regular expressions. | ||
function parseCites(body: string): types.AuthorYearCite[]; | ||
const referenceRegExp: RegExp; | ||
/** | ||
@@ -20,3 +22,3 @@ Given a list of strings representing individual references in a bibliography, | ||
a unique match from `references` is found. | ||
TODO: handle multiple matches somehow. | ||
@@ -43,5 +45,20 @@ */ | ||
'Zhao et al.' -> | ||
['Zhao', 'et al.'] | ||
['Zhao', 'al.'] | ||
TODO: handle last-name-first swaps, e.g., | ||
'Levy, R., & Daumé III, H.' -> 'R. Levy, H. Daumé III' -> ['R. Levy', 'H. Daumé III'] | ||
Or: | ||
'Liu, F., Tian, F., & Zhu, Q.' -> 'F. Liu, F. Tian, & Q. Zhu' -> ['F. Liu', 'F. Tian', 'Q. Zhu'] | ||
Technically, this is ambiguous, since we could support lists of only last names | ||
(e.g., 'Liu, Tian'; is this ['Tian Liu'] or ['Liu', 'Tian']?), but heuristics are better than nothing. | ||
Example chunks: | ||
[FIRST MIDDLE LAST] SEP | ||
[FIRST LAST] SEP | ||
[LAST SEP FIRST] SEP | ||
[LAST SEP INITIAL] [LAST2 SEP INITIAL2] | ||
*/ | ||
function splitNames(input: string): string[]; | ||
function parseNames(input: string): types.Name[]; | ||
/** | ||
@@ -51,26 +68,15 @@ Typically, in-paper citations (`Cite`s) only have the last names of the authors, | ||
initials and last names. | ||
This method determines whether a `Cite`'s names match a `Reference`'s authors. | ||
authorsMatch(['Joshi'], ['Aravind K Joshi']) -> true | ||
authorsMatch(['Diab', 'Kamboj'], ['Mona Diab', 'Ankit Kamboj']) -> true | ||
'et al.' gets special treatment. 'et al.' is a match if and only if there are | ||
more reference authors beyond the one parallel to the 'et al.' citation author. | ||
In other words, 'et al.' cannot stand in for a single author. | ||
authorsMatch(['Blei', 'et al.'], ['David M Blei', 'Andrew Y Ng', 'Michael I Jordan']) -> true | ||
*/ | ||
function authorsMatch(citeAuthors: types.Name[], referenceAuthors: types.Name[]): boolean; | ||
/** | ||
Given a name represented by a single string, parse it into first name, middle | ||
name, and last name. | ||
parseAuthor('Leonardo da Vinci') -> { first: 'Leonardo', last: 'da Vinci' } | ||
parseAuthor('Chris Callison-Burch') -> { first: 'Chris', last: 'Callison-Burch' } | ||
parseAuthor('Hanna M Wallach') -> { first: 'Hanna', middle: 'M', last: 'Wallach' } | ||
parseAuthor('Zhou') -> { last: 'Zhou' } | ||
parseAuthor('McCallum, Andrew') -> { first: 'Andrew', last: 'McCallum' } | ||
*/ | ||
function parseName(input: string): types.Name; | ||
} | ||
@@ -100,3 +106,3 @@ module types { | ||
interface AuthorYearCite extends Cite { | ||
/** usually only last names, one of which may be 'et al.' */ | ||
/** usually only last names, one of which may be 'al.' (from 'et al.') */ | ||
authors: Name[]; | ||
@@ -136,3 +142,3 @@ /** not necessarily a number, if there is a letter suffix */ | ||
paragraph distinctions. | ||
`sections` is a flat list; abstracts / subsections / references all count at | ||
@@ -139,0 +145,0 @@ the same level. |
185
index.js
@@ -0,1 +1,3 @@ | ||
/// <reference path="./type_declarations/index.d.ts" /> | ||
var lexing = require('lexing'); | ||
var academia; | ||
@@ -30,3 +32,3 @@ (function (academia) { | ||
acl.citeRegExp = new RegExp(citeSources.join('|'), 'g'); | ||
var yearRegExp = new RegExp(year); | ||
acl.yearRegExp = new RegExp(year); | ||
var citeCleanRegExp = new RegExp("[(),]|" + year, 'g'); | ||
@@ -41,3 +43,3 @@ /** | ||
return (body.match(acl.citeRegExp) || []).map(function (cite) { | ||
var year_match = cite.match(yearRegExp); | ||
var year_match = cite.match(acl.yearRegExp); | ||
// we cull it down to just the names by removing parentheses, commas, | ||
@@ -47,3 +49,3 @@ // and years (with optional suffixes), and trimming any extra whitespace | ||
return { | ||
authors: names.splitNames(names_string).map(names.parseName), | ||
authors: names.parseNames(names_string), | ||
year: year_match ? year_match[0] : null, | ||
@@ -55,3 +57,3 @@ style: types.CiteStyle.Textual, | ||
acl.parseCites = parseCites; | ||
var referenceRegExp = new RegExp("^(.+?)\\.\\s*(" + year + ")\\.\\s*(.+?)\\."); | ||
acl.referenceRegExp = new RegExp("^(.+?)[.,]\\s*\\(?(" + year + ")\\)?\\.\\s*(.+?)\\."); | ||
/** | ||
@@ -63,4 +65,4 @@ Given a list of strings representing individual references in a bibliography, | ||
return references.map(function (reference) { | ||
var match = reference.match(referenceRegExp); | ||
var authors = match ? names.splitNames(match[1]).map(names.parseName) : []; | ||
var match = reference.match(acl.referenceRegExp); | ||
var authors = match ? names.parseNames(match[1]) : []; | ||
return { | ||
@@ -94,4 +96,44 @@ authors: authors, | ||
var names; | ||
(function (names) { | ||
(function (names_1) { | ||
var Token = lexing.Token; | ||
/** | ||
Given a name represented by a single string, parse it into first name, middle | ||
name, and last name. | ||
makeName('Leonardo da Vinci') -> { first: 'Leonardo', last: 'da Vinci' } | ||
makeName('Chris Callison-Burch') -> { first: 'Chris', last: 'Callison-Burch' } | ||
makeName('Hanna M Wallach') -> { first: 'Hanna', middle: 'M', last: 'Wallach' } | ||
makeName('Zhou') -> { last: 'Zhou' } | ||
makeName('McCallum, Andrew') -> { first: 'Andrew', last: 'McCallum' } | ||
TODO: handle 'van', 'von', 'da', etc. | ||
*/ | ||
function makeName(parts) { | ||
var n = parts.length; | ||
if (n >= 3) { | ||
return { | ||
first: parts[0], | ||
middle: parts.slice(1, n - 1).join(' '), | ||
last: parts[n - 1], | ||
}; | ||
} | ||
else if (n == 2) { | ||
return { | ||
first: parts[0], | ||
last: parts[1], | ||
}; | ||
} | ||
return { | ||
last: parts[0] | ||
}; | ||
} | ||
var default_rules = [ | ||
[/^$/, function (match) { return Token('EOF'); }], | ||
[/^\s+/, function (match) { return null; }], | ||
[/^,\s+/, function (match) { return Token('SEPARATOR', match[0]); }], | ||
[/^(and|et|&)/, function (match) { return Token('CONJUNCTION', match[0]); }], | ||
[/^[A-Z](\.|\b)/, function (match) { return Token('INITIAL', match[0]); }], | ||
[/^((van|von|da|de)\s+)?[A-Z][^,\s]+(\s+[IVX]+)?/i, function (match) { return Token('NAME', match[0]); }], | ||
]; | ||
/** | ||
1. Typical list of 3+ | ||
@@ -111,10 +153,72 @@ 'David Mimno, Hanna M Wallach, and Andrew McCallum' -> | ||
'Zhao et al.' -> | ||
['Zhao', 'et al.'] | ||
['Zhao', 'al.'] | ||
TODO: handle last-name-first swaps, e.g., | ||
'Levy, R., & Daumé III, H.' -> 'R. Levy, H. Daumé III' -> ['R. Levy', 'H. Daumé III'] | ||
Or: | ||
'Liu, F., Tian, F., & Zhu, Q.' -> 'F. Liu, F. Tian, & Q. Zhu' -> ['F. Liu', 'F. Tian', 'Q. Zhu'] | ||
Technically, this is ambiguous, since we could support lists of only last names | ||
(e.g., 'Liu, Tian'; is this ['Tian Liu'] or ['Liu', 'Tian']?), but heuristics are better than nothing. | ||
Example chunks: | ||
[FIRST MIDDLE LAST] SEP | ||
[FIRST LAST] SEP | ||
[LAST SEP FIRST] SEP | ||
[LAST SEP INITIAL] [LAST2 SEP INITIAL2] | ||
*/ | ||
function splitNames(input) { | ||
// three split options: (, and ) or ( and ) or (, ) | ||
// TODO: fix the 'et al.' hack | ||
return input.replace(/\s+et al\./, ', et al.').split(/,\s*(?:and|&)\s+|\s*(?:and|&)\s+|,\s*/); | ||
function parseNames(input) { | ||
var input_iterable = new lexing.StringIterator(input); | ||
var tokenizer = new lexing.Tokenizer(default_rules); | ||
var token_iterator = tokenizer.map(input_iterable); | ||
var names = []; | ||
var buffer = []; | ||
var buffer_swap = false; | ||
function flush() { | ||
if (buffer_swap) { | ||
// move the first item to the last item | ||
buffer.push(buffer.shift()); | ||
} | ||
var name = makeName(buffer); | ||
names.push(name); | ||
// reset | ||
buffer = []; | ||
buffer_swap = false; | ||
} | ||
while (1) { | ||
var token = token_iterator.next(); | ||
// tokens: EOF NAME INITIAL SEPARATOR CONJUNCTION | ||
if (token.name === 'EOF') { | ||
break; | ||
} | ||
else if (token.name === 'NAME') { | ||
// the first long name after | ||
if (buffer.length > 0 && buffer_swap) { | ||
flush(); | ||
} | ||
buffer.push(token.value); | ||
} | ||
else if (token.name === 'INITIAL') { | ||
// console.log('INITIAL=%s', token.value); | ||
buffer.push(token.value); | ||
} | ||
else if (token.name === 'SEPARATOR' || token.name === 'CONJUNCTION') { | ||
if (buffer.length === 1) { | ||
buffer_swap = true; | ||
} | ||
else if (buffer.length > 1) { | ||
flush(); | ||
} | ||
else { | ||
} | ||
} | ||
} | ||
// finish up | ||
if (buffer.length > 0) { | ||
flush(); | ||
} | ||
return names; | ||
} | ||
names.splitNames = splitNames; | ||
names_1.parseNames = parseNames; | ||
/** | ||
@@ -137,52 +241,17 @@ Typically, in-paper citations (`Cite`s) only have the last names of the authors, | ||
function authorsMatch(citeAuthors, referenceAuthors) { | ||
return citeAuthors.every(function (citeAuthor, i) { | ||
if (referenceAuthors[i] && citeAuthor.last === referenceAuthors[i].last) { | ||
for (var i = 0, l = Math.max(citeAuthors.length, referenceAuthors.length); i < l; i++) { | ||
var citeAuthor = citeAuthors[i]; | ||
var referenceAuthor = referenceAuthors[i]; | ||
// the et al. handling has to precede the normal name-checking conditional below | ||
if (citeAuthor && citeAuthor.last === 'al.' && referenceAuthors.length > (i + 1)) { | ||
// early exit: ignore the rest of the reference authors | ||
return true; | ||
} | ||
if (citeAuthor.last === 'et al.' && referenceAuthors.length > (i + 1)) { | ||
return true; | ||
if (citeAuthor === undefined || referenceAuthor === undefined || citeAuthor.last !== referenceAuthor.last) { | ||
return false; | ||
} | ||
return false; | ||
}); | ||
} | ||
names.authorsMatch = authorsMatch; | ||
/** | ||
Given a name represented by a single string, parse it into first name, middle | ||
name, and last name. | ||
parseAuthor('Leonardo da Vinci') -> { first: 'Leonardo', last: 'da Vinci' } | ||
parseAuthor('Chris Callison-Burch') -> { first: 'Chris', last: 'Callison-Burch' } | ||
parseAuthor('Hanna M Wallach') -> { first: 'Hanna', middle: 'M', last: 'Wallach' } | ||
parseAuthor('Zhou') -> { last: 'Zhou' } | ||
parseAuthor('McCallum, Andrew') -> { first: 'Andrew', last: 'McCallum' } | ||
*/ | ||
function parseName(input) { | ||
// 0. 'et al.' is a special case | ||
if (input === 'et al.') { | ||
return { last: input }; | ||
} | ||
// 1. normalize the comma out | ||
input = input.split(/,\s*/).reverse().join(' '); | ||
// 2. split on whitespace | ||
var parts = input.split(/\s+/); | ||
var n = parts.length; | ||
// 3. TODO: handle 'van', 'von', 'da', etc. | ||
if (n >= 3) { | ||
return { | ||
first: parts[0], | ||
middle: parts.slice(1, n - 1).join(' '), | ||
last: parts[n - 1], | ||
}; | ||
} | ||
else if (n == 2) { | ||
return { | ||
first: parts[0], | ||
last: parts[1], | ||
}; | ||
} | ||
return { | ||
last: parts[0] | ||
}; | ||
return true; | ||
} | ||
names.parseName = parseName; | ||
names_1.authorsMatch = authorsMatch; | ||
})(names = academia.names || (academia.names = {})); | ||
@@ -189,0 +258,0 @@ var types; |
171
names.js
@@ -0,2 +1,44 @@ | ||
/// <reference path="./type_declarations/index.d.ts" /> | ||
var lexing = require('lexing'); | ||
var Token = lexing.Token; | ||
/** | ||
Given a name represented by a single string, parse it into first name, middle | ||
name, and last name. | ||
makeName('Leonardo da Vinci') -> { first: 'Leonardo', last: 'da Vinci' } | ||
makeName('Chris Callison-Burch') -> { first: 'Chris', last: 'Callison-Burch' } | ||
makeName('Hanna M Wallach') -> { first: 'Hanna', middle: 'M', last: 'Wallach' } | ||
makeName('Zhou') -> { last: 'Zhou' } | ||
makeName('McCallum, Andrew') -> { first: 'Andrew', last: 'McCallum' } | ||
TODO: handle 'van', 'von', 'da', etc. | ||
*/ | ||
function makeName(parts) { | ||
var n = parts.length; | ||
if (n >= 3) { | ||
return { | ||
first: parts[0], | ||
middle: parts.slice(1, n - 1).join(' '), | ||
last: parts[n - 1], | ||
}; | ||
} | ||
else if (n == 2) { | ||
return { | ||
first: parts[0], | ||
last: parts[1], | ||
}; | ||
} | ||
return { | ||
last: parts[0] | ||
}; | ||
} | ||
var default_rules = [ | ||
[/^$/, function (match) { return Token('EOF'); }], | ||
[/^\s+/, function (match) { return null; }], | ||
[/^,\s+/, function (match) { return Token('SEPARATOR', match[0]); }], | ||
[/^(and|et|&)/, function (match) { return Token('CONJUNCTION', match[0]); }], | ||
[/^[A-Z](\.|\b)/, function (match) { return Token('INITIAL', match[0]); }], | ||
[/^((van|von|da|de)\s+)?[A-Z][^,\s]+(\s+[IVX]+)?/i, function (match) { return Token('NAME', match[0]); }], | ||
]; | ||
/** | ||
1. Typical list of 3+ | ||
@@ -16,10 +58,72 @@ 'David Mimno, Hanna M Wallach, and Andrew McCallum' -> | ||
'Zhao et al.' -> | ||
['Zhao', 'et al.'] | ||
['Zhao', 'al.'] | ||
TODO: handle last-name-first swaps, e.g., | ||
'Levy, R., & Daumé III, H.' -> 'R. Levy, H. Daumé III' -> ['R. Levy', 'H. Daumé III'] | ||
Or: | ||
'Liu, F., Tian, F., & Zhu, Q.' -> 'F. Liu, F. Tian, & Q. Zhu' -> ['F. Liu', 'F. Tian', 'Q. Zhu'] | ||
Technically, this is ambiguous, since we could support lists of only last names | ||
(e.g., 'Liu, Tian'; is this ['Tian Liu'] or ['Liu', 'Tian']?), but heuristics are better than nothing. | ||
Example chunks: | ||
[FIRST MIDDLE LAST] SEP | ||
[FIRST LAST] SEP | ||
[LAST SEP FIRST] SEP | ||
[LAST SEP INITIAL] [LAST2 SEP INITIAL2] | ||
*/ | ||
function splitNames(input) { | ||
// three split options: (, and ) or ( and ) or (, ) | ||
// TODO: fix the 'et al.' hack | ||
return input.replace(/\s+et al\./, ', et al.').split(/,\s*(?:and|&)\s+|\s*(?:and|&)\s+|,\s*/); | ||
function parseNames(input) { | ||
var input_iterable = new lexing.StringIterator(input); | ||
var tokenizer = new lexing.Tokenizer(default_rules); | ||
var token_iterator = tokenizer.map(input_iterable); | ||
var names = []; | ||
var buffer = []; | ||
var buffer_swap = false; | ||
function flush() { | ||
if (buffer_swap) { | ||
// move the first item to the last item | ||
buffer.push(buffer.shift()); | ||
} | ||
var name = makeName(buffer); | ||
names.push(name); | ||
// reset | ||
buffer = []; | ||
buffer_swap = false; | ||
} | ||
while (1) { | ||
var token = token_iterator.next(); | ||
// tokens: EOF NAME INITIAL SEPARATOR CONJUNCTION | ||
if (token.name === 'EOF') { | ||
break; | ||
} | ||
else if (token.name === 'NAME') { | ||
// the first long name after | ||
if (buffer.length > 0 && buffer_swap) { | ||
flush(); | ||
} | ||
buffer.push(token.value); | ||
} | ||
else if (token.name === 'INITIAL') { | ||
// console.log('INITIAL=%s', token.value); | ||
buffer.push(token.value); | ||
} | ||
else if (token.name === 'SEPARATOR' || token.name === 'CONJUNCTION') { | ||
if (buffer.length === 1) { | ||
buffer_swap = true; | ||
} | ||
else if (buffer.length > 1) { | ||
flush(); | ||
} | ||
else { | ||
} | ||
} | ||
} | ||
// finish up | ||
if (buffer.length > 0) { | ||
flush(); | ||
} | ||
return names; | ||
} | ||
exports.splitNames = splitNames; | ||
exports.parseNames = parseNames; | ||
/** | ||
@@ -42,51 +146,16 @@ Typically, in-paper citations (`Cite`s) only have the last names of the authors, | ||
function authorsMatch(citeAuthors, referenceAuthors) { | ||
return citeAuthors.every(function (citeAuthor, i) { | ||
if (referenceAuthors[i] && citeAuthor.last === referenceAuthors[i].last) { | ||
for (var i = 0, l = Math.max(citeAuthors.length, referenceAuthors.length); i < l; i++) { | ||
var citeAuthor = citeAuthors[i]; | ||
var referenceAuthor = referenceAuthors[i]; | ||
// the et al. handling has to precede the normal name-checking conditional below | ||
if (citeAuthor && citeAuthor.last === 'al.' && referenceAuthors.length > (i + 1)) { | ||
// early exit: ignore the rest of the reference authors | ||
return true; | ||
} | ||
if (citeAuthor.last === 'et al.' && referenceAuthors.length > (i + 1)) { | ||
return true; | ||
if (citeAuthor === undefined || referenceAuthor === undefined || citeAuthor.last !== referenceAuthor.last) { | ||
return false; | ||
} | ||
return false; | ||
}); | ||
} | ||
return true; | ||
} | ||
exports.authorsMatch = authorsMatch; | ||
/** | ||
Given a name represented by a single string, parse it into first name, middle | ||
name, and last name. | ||
parseAuthor('Leonardo da Vinci') -> { first: 'Leonardo', last: 'da Vinci' } | ||
parseAuthor('Chris Callison-Burch') -> { first: 'Chris', last: 'Callison-Burch' } | ||
parseAuthor('Hanna M Wallach') -> { first: 'Hanna', middle: 'M', last: 'Wallach' } | ||
parseAuthor('Zhou') -> { last: 'Zhou' } | ||
parseAuthor('McCallum, Andrew') -> { first: 'Andrew', last: 'McCallum' } | ||
*/ | ||
function parseName(input) { | ||
// 0. 'et al.' is a special case | ||
if (input === 'et al.') { | ||
return { last: input }; | ||
} | ||
// 1. normalize the comma out | ||
input = input.split(/,\s*/).reverse().join(' '); | ||
// 2. split on whitespace | ||
var parts = input.split(/\s+/); | ||
var n = parts.length; | ||
// 3. TODO: handle 'van', 'von', 'da', etc. | ||
if (n >= 3) { | ||
return { | ||
first: parts[0], | ||
middle: parts.slice(1, n - 1).join(' '), | ||
last: parts[n - 1], | ||
}; | ||
} | ||
else if (n == 2) { | ||
return { | ||
first: parts[0], | ||
last: parts[1], | ||
}; | ||
} | ||
return { | ||
last: parts[0] | ||
}; | ||
} | ||
exports.parseName = parseName; |
178
names.ts
@@ -0,4 +1,49 @@ | ||
/// <reference path="./type_declarations/index.d.ts" /> | ||
import lexing = require('lexing'); | ||
var Token = lexing.Token; | ||
import types = require('./types'); | ||
/** | ||
Given a name represented by a single string, parse it into first name, middle | ||
name, and last name. | ||
makeName('Leonardo da Vinci') -> { first: 'Leonardo', last: 'da Vinci' } | ||
makeName('Chris Callison-Burch') -> { first: 'Chris', last: 'Callison-Burch' } | ||
makeName('Hanna M Wallach') -> { first: 'Hanna', middle: 'M', last: 'Wallach' } | ||
makeName('Zhou') -> { last: 'Zhou' } | ||
makeName('McCallum, Andrew') -> { first: 'Andrew', last: 'McCallum' } | ||
TODO: handle 'van', 'von', 'da', etc. | ||
*/ | ||
function makeName(parts: string[]): types.Name { | ||
var n = parts.length; | ||
if (n >= 3) { | ||
return { | ||
first: parts[0], | ||
middle: parts.slice(1, n - 1).join(' '), | ||
last: parts[n - 1], | ||
}; | ||
} | ||
else if (n == 2) { | ||
return { | ||
first: parts[0], | ||
last: parts[1], | ||
}; | ||
} | ||
return { | ||
last: parts[0] | ||
}; | ||
} | ||
var default_rules: lexing.RegexRule<string>[] = [ | ||
[/^$/, match => Token('EOF') ], | ||
[/^\s+/, match => null ], | ||
[/^,\s+/, match => Token('SEPARATOR', match[0]) ], | ||
[/^(and|et|&)/, match => Token('CONJUNCTION', match[0]) ], | ||
[/^[A-Z](\.|\b)/, match => Token('INITIAL', match[0]) ], | ||
[/^((van|von|da|de)\s+)?[A-Z][^,\s]+(\s+[IVX]+)?/i, match => Token('NAME', match[0]) ], | ||
]; | ||
/** | ||
1. Typical list of 3+ | ||
@@ -18,8 +63,78 @@ 'David Mimno, Hanna M Wallach, and Andrew McCallum' -> | ||
'Zhao et al.' -> | ||
['Zhao', 'et al.'] | ||
['Zhao', 'al.'] | ||
TODO: handle last-name-first swaps, e.g., | ||
'Levy, R., & Daumé III, H.' -> 'R. Levy, H. Daumé III' -> ['R. Levy', 'H. Daumé III'] | ||
Or: | ||
'Liu, F., Tian, F., & Zhu, Q.' -> 'F. Liu, F. Tian, & Q. Zhu' -> ['F. Liu', 'F. Tian', 'Q. Zhu'] | ||
Technically, this is ambiguous, since we could support lists of only last names | ||
(e.g., 'Liu, Tian'; is this ['Tian Liu'] or ['Liu', 'Tian']?), but heuristics are better than nothing. | ||
Example chunks: | ||
[FIRST MIDDLE LAST] SEP | ||
[FIRST LAST] SEP | ||
[LAST SEP FIRST] SEP | ||
[LAST SEP INITIAL] [LAST2 SEP INITIAL2] | ||
*/ | ||
export function splitNames(input: string): string[] { | ||
// three split options: (, and ) or ( and ) or (, ) | ||
// TODO: fix the 'et al.' hack | ||
return input.replace(/\s+et al\./, ', et al.').split(/,\s*(?:and|&)\s+|\s*(?:and|&)\s+|,\s*/); | ||
export function parseNames(input: string): types.Name[] { | ||
var input_iterable = new lexing.StringIterator(input); | ||
var tokenizer = new lexing.Tokenizer(default_rules); | ||
var token_iterator = tokenizer.map(input_iterable); | ||
var names: types.Name[] = []; | ||
var buffer: string[] = []; | ||
var buffer_swap = false; | ||
function flush() { | ||
if (buffer_swap) { | ||
// move the first item to the last item | ||
buffer.push(buffer.shift()); | ||
} | ||
var name = makeName(buffer); | ||
names.push(name); | ||
// reset | ||
buffer = []; | ||
buffer_swap = false; | ||
} | ||
while (1) { | ||
var token = token_iterator.next(); | ||
// tokens: EOF NAME INITIAL SEPARATOR CONJUNCTION | ||
if (token.name === 'EOF') { | ||
break; | ||
} | ||
else if (token.name === 'NAME') { | ||
// the first long name after | ||
if (buffer.length > 0 && buffer_swap) { | ||
flush(); | ||
} | ||
buffer.push(token.value); | ||
} | ||
else if (token.name === 'INITIAL') { | ||
// console.log('INITIAL=%s', token.value); | ||
buffer.push(token.value); | ||
} | ||
else if (token.name === 'SEPARATOR' || token.name === 'CONJUNCTION') { | ||
if (buffer.length === 1) { | ||
buffer_swap = true; | ||
} | ||
else if (buffer.length > 1) { | ||
flush(); | ||
} | ||
else { | ||
// a second separator without anything to separate | ||
} | ||
} | ||
} | ||
// finish up | ||
if (buffer.length > 0) { | ||
flush(); | ||
} | ||
return names; | ||
} | ||
@@ -44,50 +159,15 @@ | ||
export function authorsMatch(citeAuthors: types.Name[], referenceAuthors: types.Name[]) { | ||
return citeAuthors.every((citeAuthor, i) => { | ||
if (referenceAuthors[i] && citeAuthor.last === referenceAuthors[i].last) { | ||
for (var i = 0, l = Math.max(citeAuthors.length, referenceAuthors.length); i < l; i++) { | ||
var citeAuthor = citeAuthors[i]; | ||
var referenceAuthor = referenceAuthors[i]; | ||
// the et al. handling has to precede the normal name-checking conditional below | ||
if (citeAuthor && citeAuthor.last === 'al.' && referenceAuthors.length > (i + 1)) { | ||
// early exit: ignore the rest of the reference authors | ||
return true; | ||
} | ||
if (citeAuthor.last === 'et al.' && referenceAuthors.length > (i + 1)) { | ||
return true; | ||
if (citeAuthor === undefined || referenceAuthor === undefined || citeAuthor.last !== referenceAuthor.last) { | ||
return false; | ||
} | ||
return false; | ||
}); | ||
} | ||
/** | ||
Given a name represented by a single string, parse it into first name, middle | ||
name, and last name. | ||
parseAuthor('Leonardo da Vinci') -> { first: 'Leonardo', last: 'da Vinci' } | ||
parseAuthor('Chris Callison-Burch') -> { first: 'Chris', last: 'Callison-Burch' } | ||
parseAuthor('Hanna M Wallach') -> { first: 'Hanna', middle: 'M', last: 'Wallach' } | ||
parseAuthor('Zhou') -> { last: 'Zhou' } | ||
parseAuthor('McCallum, Andrew') -> { first: 'Andrew', last: 'McCallum' } | ||
*/ | ||
export function parseName(input: string): types.Name { | ||
// 0. 'et al.' is a special case | ||
if (input === 'et al.') { | ||
return {last: input}; | ||
} | ||
// 1. normalize the comma out | ||
input = input.split(/,\s*/).reverse().join(' '); | ||
// 2. split on whitespace | ||
var parts = input.split(/\s+/); | ||
var n = parts.length; | ||
// 3. TODO: handle 'van', 'von', 'da', etc. | ||
if (n >= 3) { | ||
return { | ||
first: parts[0], | ||
middle: parts.slice(1, n - 1).join(' '), | ||
last: parts[n - 1], | ||
}; | ||
} | ||
else if (n == 2) { | ||
return { | ||
first: parts[0], | ||
last: parts[1], | ||
}; | ||
} | ||
return { | ||
last: parts[0] | ||
}; | ||
return true; | ||
} |
@@ -11,3 +11,3 @@ { | ||
], | ||
"version": "0.1.2", | ||
"version": "0.1.3", | ||
"homepage": "https://github.com/chbrown/academia", | ||
@@ -17,7 +17,13 @@ "repository": "git://github.com/chbrown/academia.git", | ||
"license": "MIT", | ||
"dependencies": {}, | ||
"dependencies": { | ||
"lexing": "*" | ||
}, | ||
"devDependencies": { | ||
"mocha": "*", | ||
"typescript-declare": "*", | ||
"typescript": "*" | ||
}, | ||
"scripts": { | ||
"test": "make test" | ||
} | ||
} |
@@ -26,3 +26,3 @@ var types = require('../types'); | ||
exports.citeRegExp = new RegExp(citeSources.join('|'), 'g'); | ||
var yearRegExp = new RegExp(year); | ||
exports.yearRegExp = new RegExp(year); | ||
var citeCleanRegExp = new RegExp("[(),]|" + year, 'g'); | ||
@@ -37,3 +37,3 @@ /** | ||
return (body.match(exports.citeRegExp) || []).map(function (cite) { | ||
var year_match = cite.match(yearRegExp); | ||
var year_match = cite.match(exports.yearRegExp); | ||
// we cull it down to just the names by removing parentheses, commas, | ||
@@ -43,3 +43,3 @@ // and years (with optional suffixes), and trimming any extra whitespace | ||
return { | ||
authors: names.splitNames(names_string).map(names.parseName), | ||
authors: names.parseNames(names_string), | ||
year: year_match ? year_match[0] : null, | ||
@@ -51,3 +51,3 @@ style: types.CiteStyle.Textual, | ||
exports.parseCites = parseCites; | ||
var referenceRegExp = new RegExp("^(.+?)\\.\\s*(" + year + ")\\.\\s*(.+?)\\."); | ||
exports.referenceRegExp = new RegExp("^(.+?)[.,]\\s*\\(?(" + year + ")\\)?\\.\\s*(.+?)\\."); | ||
/** | ||
@@ -59,4 +59,4 @@ Given a list of strings representing individual references in a bibliography, | ||
return references.map(function (reference) { | ||
var match = reference.match(referenceRegExp); | ||
var authors = match ? names.splitNames(match[1]).map(names.parseName) : []; | ||
var match = reference.match(exports.referenceRegExp); | ||
var authors = match ? names.parseNames(match[1]) : []; | ||
return { | ||
@@ -63,0 +63,0 @@ authors: authors, |
@@ -14,4 +14,4 @@ import types = require('../types'); | ||
var name = '[A-Z][^()\\s]+(?: [IV]+)?'; | ||
var year = '[0-9]{4}(?:[-–—][0-9]{4})?[a-z]?'; | ||
const name = '[A-Z][^()\\s]+(?: [IV]+)?'; | ||
const year = '[0-9]{4}(?:[-–—][0-9]{4})?[a-z]?'; | ||
@@ -28,4 +28,5 @@ var citeSources = [ | ||
]; | ||
export const citeRegExp = new RegExp(citeSources.join('|'), 'g'); | ||
const yearRegExp = new RegExp(year); | ||
export const yearRegExp = new RegExp(year); | ||
const citeCleanRegExp = new RegExp(`[(),]|${year}`, 'g'); | ||
@@ -46,3 +47,3 @@ | ||
return { | ||
authors: names.splitNames(names_string).map(names.parseName), | ||
authors: names.parseNames(names_string), | ||
year: year_match ? year_match[0] : null, | ||
@@ -54,3 +55,3 @@ style: types.CiteStyle.Textual, | ||
const referenceRegExp = new RegExp(`^(.+?)\\.\\s*(${year})\\.\\s*(.+?)\\.`); | ||
export const referenceRegExp = new RegExp(`^(.+?)[.,]\\s*\\(?(${year})\\)?\\.\\s*(.+?)\\.`); | ||
@@ -64,3 +65,3 @@ /** | ||
var match = reference.match(referenceRegExp); | ||
var authors = match ? names.splitNames(match[1]).map(names.parseName) : []; | ||
var authors = match ? names.parseNames(match[1]) : []; | ||
return { | ||
@@ -67,0 +68,0 @@ authors: authors, |
294
tmp/index.ts
@@ -0,1 +1,3 @@ | ||
/// <reference path="./type_declarations/index.d.ts" /> | ||
import lexing = require('lexing'); | ||
module academia { | ||
@@ -15,17 +17,18 @@ export module styles { | ||
var name = '[A-Z][^()\\s]+'; | ||
var year = '[0-9]{4}(?:[-–—][0-9]{4})?[a-z]?'; | ||
const name = '[A-Z][^()\\s]+(?: [IV]+)?'; | ||
const year = '[0-9]{4}(?:[-–—][0-9]{4})?[a-z]?'; | ||
var citeSources = [ | ||
// et al., duo, and single, with year in parens | ||
`${name} et al. \\(${year}\\)`, | ||
`${name} and ${name} \\(${year}\\)`, | ||
`${name} \\(${year}\\)`, | ||
`${name}\\s+et\\s+al.\\s+\\(${year}\\)`, | ||
`${name}\\s+(?:and|&)\\s+${name}\\s+\\(${year}\\)`, | ||
`${name}\\s+\\(${year}\\)`, | ||
// et al., duo, and single, with year not in parens (note the commas) | ||
`${name} et al., ${year}\\b`, | ||
`${name} and ${name}, ${year}\\b`, | ||
`${name}, ${year}\\b`, | ||
`${name}\\s+et\\s+al.,\\s+${year}\\b`, | ||
`${name}\\s+(?:and|&)\\s+${name},\\s+${year}\\b`, | ||
`${name},\\s+${year}\\b`, | ||
]; | ||
export const citeRegExp = new RegExp(citeSources.join('|'), 'g'); | ||
const yearRegExp = new RegExp(year); | ||
export const yearRegExp = new RegExp(year); | ||
const citeCleanRegExp = new RegExp(`[(),]|${year}`, 'g'); | ||
@@ -46,3 +49,3 @@ | ||
return { | ||
authors: names.splitNames(names_string).map(names.parseName), | ||
authors: names.parseNames(names_string), | ||
year: year_match ? year_match[0] : null, | ||
@@ -54,3 +57,3 @@ style: types.CiteStyle.Textual, | ||
const referenceRegExp = new RegExp(`^(.+?)\\.\\s*(${year})\\.\\s*(.+?)\\.`); | ||
export const referenceRegExp = new RegExp(`^(.+?)[.,]\\s*\\(?(${year})\\)?\\.\\s*(.+?)\\.`); | ||
@@ -64,3 +67,3 @@ /** | ||
var match = reference.match(referenceRegExp); | ||
var authors = match ? names.splitNames(match[1]).map(names.parseName) : []; | ||
var authors = match ? names.parseNames(match[1]) : []; | ||
return { | ||
@@ -90,5 +93,176 @@ authors: authors, | ||
} | ||
} | ||
} | ||
export module names { | ||
var Token = lexing.Token; | ||
/** | ||
Given a name represented by a single string, parse it into first name, middle | ||
name, and last name. | ||
makeName('Leonardo da Vinci') -> { first: 'Leonardo', last: 'da Vinci' } | ||
makeName('Chris Callison-Burch') -> { first: 'Chris', last: 'Callison-Burch' } | ||
makeName('Hanna M Wallach') -> { first: 'Hanna', middle: 'M', last: 'Wallach' } | ||
makeName('Zhou') -> { last: 'Zhou' } | ||
makeName('McCallum, Andrew') -> { first: 'Andrew', last: 'McCallum' } | ||
TODO: handle 'van', 'von', 'da', etc. | ||
*/ | ||
function makeName(parts: string[]): types.Name { | ||
var n = parts.length; | ||
if (n >= 3) { | ||
return { | ||
first: parts[0], | ||
middle: parts.slice(1, n - 1).join(' '), | ||
last: parts[n - 1], | ||
}; | ||
} | ||
else if (n == 2) { | ||
return { | ||
first: parts[0], | ||
last: parts[1], | ||
}; | ||
} | ||
return { | ||
last: parts[0] | ||
}; | ||
} | ||
var default_rules: lexing.RegexRule<string>[] = [ | ||
[/^$/, match => Token('EOF') ], | ||
[/^\s+/, match => null ], | ||
[/^,\s+/, match => Token('SEPARATOR', match[0]) ], | ||
[/^(and|et|&)/, match => Token('CONJUNCTION', match[0]) ], | ||
[/^[A-Z](\.|\b)/, match => Token('INITIAL', match[0]) ], | ||
[/^((van|von|da|de)\s+)?[A-Z][^,\s]+(\s+[IVX]+)?/i, match => Token('NAME', match[0]) ], | ||
]; | ||
/** | ||
1. Typical list of 3+ | ||
'David Mimno, Hanna M Wallach, and Andrew McCallum' -> | ||
['David Mimno', 'Hanna M Wallach', 'Andrew McCallum'] | ||
2. List of 3+ without the Oxford comma, in case that ever happens | ||
'Aravind K Joshi, Ben King and Steven Abney' -> | ||
['David Mimno', 'Hanna M Wallach', 'Andrew McCallum'] | ||
3. Duo | ||
'Daniel Ramage and Chris Callison-Burch' -> | ||
['David Mimno', 'Chris Callison-Burch'] | ||
4. Single author | ||
'David Sankofl' -> | ||
['David Sankofl'] | ||
5. Et al. abbreviation | ||
'Zhao et al.' -> | ||
['Zhao', 'al.'] | ||
TODO: handle last-name-first swaps, e.g., | ||
'Levy, R., & Daumé III, H.' -> 'R. Levy, H. Daumé III' -> ['R. Levy', 'H. Daumé III'] | ||
Or: | ||
'Liu, F., Tian, F., & Zhu, Q.' -> 'F. Liu, F. Tian, & Q. Zhu' -> ['F. Liu', 'F. Tian', 'Q. Zhu'] | ||
Technically, this is ambiguous, since we could support lists of only last names | ||
(e.g., 'Liu, Tian'; is this ['Tian Liu'] or ['Liu', 'Tian']?), but heuristics are better than nothing. | ||
Example chunks: | ||
[FIRST MIDDLE LAST] SEP | ||
[FIRST LAST] SEP | ||
[LAST SEP FIRST] SEP | ||
[LAST SEP INITIAL] [LAST2 SEP INITIAL2] | ||
*/ | ||
export function parseNames(input: string): types.Name[] { | ||
var input_iterable = new lexing.StringIterator(input); | ||
var tokenizer = new lexing.Tokenizer(default_rules); | ||
var token_iterator = tokenizer.map(input_iterable); | ||
var names: types.Name[] = []; | ||
var buffer: string[] = []; | ||
var buffer_swap = false; | ||
function flush() { | ||
if (buffer_swap) { | ||
// move the first item to the last item | ||
buffer.push(buffer.shift()); | ||
} | ||
var name = makeName(buffer); | ||
names.push(name); | ||
// reset | ||
buffer = []; | ||
buffer_swap = false; | ||
} | ||
while (1) { | ||
var token = token_iterator.next(); | ||
// tokens: EOF NAME INITIAL SEPARATOR CONJUNCTION | ||
if (token.name === 'EOF') { | ||
break; | ||
} | ||
else if (token.name === 'NAME') { | ||
// the first long name after | ||
if (buffer.length > 0 && buffer_swap) { | ||
flush(); | ||
} | ||
buffer.push(token.value); | ||
} | ||
else if (token.name === 'INITIAL') { | ||
// console.log('INITIAL=%s', token.value); | ||
buffer.push(token.value); | ||
} | ||
else if (token.name === 'SEPARATOR' || token.name === 'CONJUNCTION') { | ||
if (buffer.length === 1) { | ||
buffer_swap = true; | ||
} | ||
else if (buffer.length > 1) { | ||
flush(); | ||
} | ||
else { | ||
// a second separator without anything to separate | ||
} | ||
} | ||
} | ||
// finish up | ||
if (buffer.length > 0) { | ||
flush(); | ||
} | ||
return names; | ||
} | ||
/** | ||
Typically, in-paper citations (`Cite`s) only have the last names of the authors, | ||
while the `Reference`s in the Bibliography have full names, or at least first | ||
initials and last names. | ||
This method determines whether a `Cite`'s names match a `Reference`'s authors. | ||
authorsMatch(['Joshi'], ['Aravind K Joshi']) -> true | ||
authorsMatch(['Diab', 'Kamboj'], ['Mona Diab', 'Ankit Kamboj']) -> true | ||
'et al.' gets special treatment. 'et al.' is a match if and only if there are | ||
more reference authors beyond the one parallel to the 'et al.' citation author. | ||
In other words, 'et al.' cannot stand in for a single author. | ||
authorsMatch(['Blei', 'et al.'], ['David M Blei', 'Andrew Y Ng', 'Michael I Jordan']) -> true | ||
*/ | ||
export function authorsMatch(citeAuthors: types.Name[], referenceAuthors: types.Name[]) { | ||
for (var i = 0, l = Math.max(citeAuthors.length, referenceAuthors.length); i < l; i++) { | ||
var citeAuthor = citeAuthors[i]; | ||
var referenceAuthor = referenceAuthors[i]; | ||
// the et al. handling has to precede the normal name-checking conditional below | ||
if (citeAuthor && citeAuthor.last === 'al.' && referenceAuthors.length > (i + 1)) { | ||
// early exit: ignore the rest of the reference authors | ||
return true; | ||
} | ||
if (citeAuthor === undefined || referenceAuthor === undefined || citeAuthor.last !== referenceAuthor.last) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
} | ||
export module types { | ||
@@ -117,3 +291,3 @@ /** | ||
export interface AuthorYearCite extends Cite { | ||
/** usually only last names, one of which may be 'et al.' */ | ||
/** usually only last names, one of which may be 'al.' (from 'et al.') */ | ||
authors: Name[]; | ||
@@ -168,98 +342,8 @@ /** not necessarily a number, if there is a letter suffix */ | ||
sections: Section[]; | ||
// analysis | ||
references?: Reference[]; | ||
} | ||
} | ||
export module names { | ||
/** | ||
1. Typical list of 3+ | ||
'David Mimno, Hanna M Wallach, and Andrew McCallum' -> | ||
['David Mimno', 'Hanna M Wallach', 'Andrew McCallum'] | ||
2. List of 3+ without the Oxford comma, in case that ever happens | ||
'Aravind K Joshi, Ben King and Steven Abney' -> | ||
['David Mimno', 'Hanna M Wallach', 'Andrew McCallum'] | ||
3. Duo | ||
'Daniel Ramage and Chris Callison-Burch' -> | ||
['David Mimno', 'Chris Callison-Burch'] | ||
4. Single author | ||
'David Sankofl' -> | ||
['David Sankofl'] | ||
5. Et al. abbreviation | ||
'Zhao et al.' -> | ||
['Zhao', 'et al.'] | ||
*/ | ||
export function splitNames(input: string): string[] { | ||
// three split options: (, and ) or ( and ) or (, ) | ||
// TODO: fix the 'et al.' hack | ||
return input.replace(/\s+et al\./, ', et al.').split(/,\s*and\s+|\s*and\s+|,\s*/); | ||
} | ||
/** | ||
Typically, in-paper citations (`Cite`s) only have the last names of the authors, | ||
while the `Reference`s in the Bibliography have full names, or at least first | ||
initials and last names. | ||
This method determines whether a `Cite`'s names match a `Reference`'s authors. | ||
authorsMatch(['Joshi'], ['Aravind K Joshi']) -> true | ||
authorsMatch(['Diab', 'Kamboj'], ['Mona Diab', 'Ankit Kamboj']) -> true | ||
'et al.' gets special treatment. 'et al.' is a match if and only if there are | ||
more reference authors beyond the one parallel to the 'et al.' citation author. | ||
In other words, 'et al.' cannot stand in for a single author. | ||
authorsMatch(['Blei', 'et al.'], ['David M Blei', 'Andrew Y Ng', 'Michael I Jordan']) -> true | ||
*/ | ||
export function authorsMatch(citeAuthors: types.Name[], referenceAuthors: types.Name[]) { | ||
return citeAuthors.every((citeAuthor, i) => { | ||
if (referenceAuthors[i] && citeAuthor.last === referenceAuthors[i].last) { | ||
return true; | ||
} | ||
if (citeAuthor.last === 'et al.' && referenceAuthors.length > (i + 1)) { | ||
return true; | ||
} | ||
return false; | ||
}); | ||
} | ||
/** | ||
Given a name represented by a single string, parse it into first name, middle | ||
name, and last name. | ||
parseAuthor('Leonardo da Vinci') -> { first: 'Leonardo', last: 'da Vinci' } | ||
parseAuthor('Chris Callison-Burch') -> { first: 'Chris', last: 'Callison-Burch' } | ||
parseAuthor('Hanna M Wallach') -> { first: 'Hanna', middle: 'M', last: 'Wallach' } | ||
parseAuthor('Zhou') -> { last: 'Zhou' } | ||
parseAuthor('McCallum, Andrew') -> { first: 'Andrew', last: 'McCallum' } | ||
*/ | ||
export function parseName(input: string): types.Name { | ||
// 0. 'et al.' is a special case | ||
if (input === 'et al.') { | ||
return {last: input}; | ||
} | ||
// 1. normalize the comma out | ||
input = input.split(/,\s*/).reverse().join(' '); | ||
// 2. split on whitespace | ||
var parts = input.split(/\s+/); | ||
var n = parts.length; | ||
if (n >= 3) { | ||
return { | ||
first: parts[0], | ||
middle: parts.slice(1, n - 1).join(' '), | ||
last: parts[n - 1], | ||
}; | ||
} | ||
else if (n == 2) { | ||
return { | ||
first: parts[0], | ||
last: parts[1], | ||
}; | ||
} | ||
return { | ||
last: parts[0] | ||
}; | ||
} | ||
} | ||
} | ||
export = academia; |
@@ -122,1 +122,126 @@ // the original text from the paper (for debugging) | ||
*/ | ||
[format] | ||
pretty = %C(yellow)%h%C(reset) [%C(magenta)%an <%ae>%C(reset)] %C(green)%cr%C(reset): %s | ||
gulpfile: | ||
// var eventStream = require('event-stream'); | ||
// var gulp = require('gulp'); | ||
// var ts = require('gulp-typescript'); | ||
// function compile() { | ||
// var result = gulp.src([ | ||
// // 'typings/**/*.d.ts', | ||
// 'lib/**/*.ts' | ||
// ], { | ||
// base: './lib' | ||
// }) | ||
// .pipe(ts(project)); | ||
// return eventStream.merge( | ||
// result.dts.pipe(gulp.dest('d.ts')), | ||
// result.js | ||
// .pipe(gulp.dest('js')) | ||
// ); | ||
// } | ||
// var project = ts.createProject({ | ||
// target: 'es5', | ||
// module: 'commonjs', | ||
// declarationFiles: true, | ||
// noExternalResolve: true | ||
// }); | ||
// gulp.task('compile', compile()); | ||
// 1. normalize the comma out | ||
// input = input.split(/,\s*/).reverse().join(' '); | ||
// and currentName is partly filled, it must be first/middle names | ||
// but if currentName is empty, then this must be | ||
export function parseNames(input: string): types.Name[] { | ||
var parts = input.split(/,?\s+(?:and|et|&)\s+|,\s+/); | ||
// console.error('parts', parts); | ||
var names: types.Name[] = []; | ||
var currentLastName: string; | ||
// function flush() { | ||
// names.push(currentName); | ||
// currentLastName = undefined; | ||
// } | ||
for (var i = 0, part; (part = parts[i]) !== undefined; i++) { | ||
// split on whitespace | ||
var subparts = part.split(/\s+/); | ||
// if we didn't use the last part, put it on the end | ||
if (currentLastName !== undefined) { | ||
subparts.push(currentLastName); | ||
} | ||
// if the part has multiple subparts, it must be the full name all together | ||
if (subparts.length > 1) { | ||
// console.error('subparts', subparts); | ||
var name = makeName(subparts); | ||
names.push(name); | ||
currentLastName = undefined; | ||
} | ||
// otherwise, it's got a single subpart, which must be a last name, which | ||
// we keep for the next iteration of this loop | ||
else { | ||
currentLastName = subparts[0]; | ||
} | ||
} | ||
// finish up | ||
if (currentLastName !== undefined) { | ||
var name = makeName([currentLastName]); | ||
names.push(name); | ||
} | ||
// console.error('names', names); | ||
return names; | ||
} | ||
var parts = input.split(/,?\s+(?:and|et|&)\s+|,\s+/); | ||
console.error('parts', parts); | ||
for (var i = 0, part; (part = parts[i]) !== undefined; i++) { | ||
// split on whitespace | ||
var subparts = part.match(/.+/); | ||
console.error('subparts', subparts); | ||
// if we didn't use the last part, put it on the end | ||
if (currentLastName !== undefined) { | ||
subparts.push(currentLastName); | ||
} | ||
// if the part has multiple subparts, it must be the full name all together | ||
if (subparts.length > 1) { | ||
var name = makeName(subparts); | ||
names.push(name); | ||
currentLastName = undefined; | ||
} | ||
// otherwise, it's got a single subpart, which must be a last name, which | ||
// we keep for the next iteration of this loop | ||
else { | ||
currentLastName = subparts[0]; | ||
} | ||
} | ||
console.log('SEPARATOR', buffer.length); | ||
console.log('NAME?', buffer.length > 1 && buffer_swap); | ||
function splitNames(input: string): string[] { | ||
// seven split options: | ||
// 1a. ", and " | ||
// 1b. ", et " | ||
// 1b. ", & " | ||
// 2a. " and " | ||
// 2a. " et " | ||
// 2b. " & " | ||
// 3. ", " | ||
return input.split(/,?\s*\b(?:and|et|&)\b\s+|,\s*/); | ||
} |
@@ -23,3 +23,3 @@ /** | ||
export interface AuthorYearCite extends Cite { | ||
/** usually only last names, one of which may be 'et al.' */ | ||
/** usually only last names, one of which may be 'al.' (from 'et al.') */ | ||
authors: Name[]; | ||
@@ -26,0 +26,0 @@ /** not necessarily a number, if there is a letter suffix */ |
Sorry, the diff of this file is not supported yet
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Wildcard dependency
QualityPackage has a dependency with a floating version range. This can cause issues if the dependency publishes a new major version.
Found 1 instance in 1 package
Major refactor
Supply chain riskPackage has recently undergone a major refactor. It may be unstable or indicate significant internal changes. Use caution when updating to versions that include significant changes.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
126440
19
2801
1
3
2
1
+ Addedlexing@*
+ Addedlexing@0.9.0(transitive)