Metadata-Version: 1.1
		Name: nameparser
		Version: 1.0.5
		Version: 1.0.6
		Summary: A simple Python module for parsing human names into their individual components.
		@@ -5,0 +5,0 @@ Home-page: https://github.com/derek73/python-nameparser

+1

-1

nameparser/__init__.py

		@@ -1,2 +0,2 @@
		VERSION = (1, 0, 5)
		VERSION = (1, 0, 6)
		__version__ = '.'.join(map(str, VERSION))
		@@ -3,0 +3,0 @@ __author__ = "Derek Gulbranson"

+132

-132

nameparser/parser.py

		@@ -20,3 +20,3 @@ # -- coding: utf-8 --
		"""
		return list of tuples containing first and last index
		return list of tuples containing first and last index
		position of contiguous numbers in a series
		@@ -34,9 +34,9 @@ """
		Parse a person's name into individual components.


		Instantiation assigns to ``full_name``, and assignment to
		:py:attr:`full_name` triggers :py:func:`parse_full_name`. After parsing the
		name, these instance attributes are available.


		HumanName Instance Attributes


		* :py:attr:`title`
		@@ -51,17 +51,17 @@ * :py:attr:`first`
		:param str full_name: The name string to be parsed.
		:param constants constants:
		a :py:class:`~nameparser.config.Constants` instance. Pass ``None`` for
		`per-instance config <customize.html>`_.
		:param constants constants:
		a :py:class:`~nameparser.config.Constants` instance. Pass ``None`` for
		`per-instance config <customize.html>`_.
		:param str encoding: string representing the encoding of your input
		:param str string_format: python string formatting
		:param str string_format: python string formatting
		"""


		C = CONSTANTS
		"""
		A reference to the configuration for this instance, which may or may not be
		a reference to the shared, module-wide instance at
		:py:mod:`~nameparser.config.CONSTANTS`. See `Customizing the Parser
		a reference to the shared, module-wide instance at
		:py:mod:`~nameparser.config.CONSTANTS`. See `Customizing the Parser
		<customize.html>`_.
		"""


		original = ''
		@@ -71,3 +71,3 @@ """
		"""


		_count = 0
		@@ -77,3 +77,3 @@ _members = ['title','first','middle','last','suffix','nickname']
		_full_name = ''


		def __init__(self, full_name="", constants=CONSTANTS, encoding=DEFAULT_ENCODING,
		@@ -84,3 +84,3 @@ string_format=None):
		self.C = Constants()


		self.encoding = encoding
		@@ -90,6 +90,6 @@ self.string_format = string_format or self.C.string_format
		self.full_name = full_name


		def __iter__(self):
		return self


		def __len__(self):
		@@ -100,13 +100,13 @@ l = 0
		return l


		def __eq__(self, other):
		"""
		HumanName instances are equal to other objects whose
		HumanName instances are equal to other objects whose
		lower case unicode representation is the same.
		"""
		return (u(self)).lower() == (u(other)).lower()


		def __ne__(self, other):
		return not (u(self)).lower() == (u(other)).lower()


		def __getitem__(self, key):
		@@ -144,3 +144,3 @@ if isinstance(key, slice):
		return " ".join(self)


		def __str__(self):
		@@ -150,3 +150,3 @@ if sys.version_info[0] >= 3:
		return self.__unicode__().encode(self.encoding)


		def __repr__(self):
		@@ -168,12 +168,12 @@ if self.unparsable:
		return _string.encode(self.encoding)


		def as_dict(self, include_empty=True):
		"""
		Return the parsed name as a dictionary of its attributes.


		:param bool include_empty: Include keys in the dictionary for empty name attributes.
		:rtype: dict


		.. doctest::


		>>> name = HumanName("Bob Dole")
		@@ -184,3 +184,3 @@ >>> name.as_dict()
		{'last': 'Dole', 'first': 'Bob'}


		"""
		@@ -196,18 +196,18 @@ d = {}
		return d


		@property
		def has_own_config(self):
		"""
		True if this instance is not using the shared module-level
		True if this instance is not using the shared module-level
		configuration.
		"""
		return self.C is not CONSTANTS


		### attributes


		@property
		def title(self):
		"""
		The person's titles. Any string of consecutive pieces in
		:py:mod:`~nameparser.config.titles` or
		The person's titles. Any string of consecutive pieces in
		:py:mod:`~nameparser.config.titles` or
		:py:mod:`~nameparser.config.conjunctions`
		@@ -217,27 +217,27 @@ at the beginning of :py:attr:`full_name`.
		return " ".join(self.title_list) or self.C.empty_attribute_default


		@property
		def first(self):
		"""
		The person's first name. The first name piece after any known
		The person's first name. The first name piece after any known
		:py:attr:`title` pieces parsed from :py:attr:`full_name`.
		"""
		return " ".join(self.first_list) or self.C.empty_attribute_default


		@property
		def middle(self):
		"""
		The person's middle names. All name pieces after the first name and
		The person's middle names. All name pieces after the first name and
		before the last name parsed from :py:attr:`full_name`.
		"""
		return " ".join(self.middle_list) or self.C.empty_attribute_default


		@property
		def last(self):
		"""
		The person's last name. The last name piece parsed from
		The person's last name. The last name piece parsed from
		:py:attr:`full_name`.
		"""
		return " ".join(self.last_list) or self.C.empty_attribute_default


		@property
		@@ -248,12 +248,12 @@ def suffix(self):
		:py:mod:`~nameparser.config.suffixes`, or pieces that are at the end
		of comma separated formats, e.g.
		"Lastname, Title Firstname Middle[,] Suffix [, Suffix]" parsed
		of comma separated formats, e.g.
		"Lastname, Title Firstname Middle[,] Suffix [, Suffix]" parsed
		from :py:attr:`full_name`.
		"""
		return ", ".join(self.suffix_list) or self.C.empty_attribute_default


		@property
		def nickname(self):
		"""
		The person's nicknames. Any text found inside of quotes (``""``) or
		The person's nicknames. Any text found inside of quotes (``""``) or
		parenthesis (``()``)
		@@ -278,3 +278,3 @@ """
		### setter methods


		def _set_list(self, attr, value):
		@@ -292,40 +292,40 @@ if isinstance(value, list):
		setattr(self, attr+"_list", self.parse_pieces(val))


		@title.setter
		def title(self, value):
		self._set_list('title', value)


		@first.setter
		def first(self, value):
		self._set_list('first', value)


		@middle.setter
		def middle(self, value):
		self._set_list('middle', value)


		@last.setter
		def last(self, value):
		self._set_list('last', value)


		@suffix.setter
		def suffix(self, value):
		self._set_list('suffix', value)


		@nickname.setter
		def nickname(self, value):
		self._set_list('nickname', value)


		### Parse helpers


		def is_title(self, value):
		"""Is in the :py:data:`~nameparser.config.titles.TITLES` set."""
		return lc(value) in self.C.titles


		def is_conjunction(self, piece):
		"""Is in the conjuctions set and not :py:func:`is_an_initial()`."""
		return piece.lower() in self.C.conjunctions and not self.is_an_initial(piece)


		def is_prefix(self, piece):
		"""
		Lowercase and no periods version of piece is in the
		Lowercase and no periods version of piece is in the
		:py:data:`~nameparser.config.prefixes.PREFIXES` set.
		@@ -337,12 +337,12 @@ """
		"""
		Matches the ``roman_numeral`` regular expression in
		Matches the ``roman_numeral`` regular expression in
		:py:data:`~nameparser.config.regexes.REGEXES`.
		"""
		return bool(self.C.regexes.roman_numeral.match(value))


		def is_suffix(self, piece):
		"""
		Is in the suffixes set and not :py:func:`is_an_initial()`.

		Some suffixes may be acronyms (M.B.A) while some are not (Jr.),
		Is in the suffixes set and not :py:func:`is_an_initial()`.

		Some suffixes may be acronyms (M.B.A) while some are not (Jr.),
		so we remove the periods from `piece` when testing against
		@@ -362,3 +362,3 @@ `C.suffix_acronyms`.
		return True


		def is_rootname(self, piece):
		@@ -369,9 +369,9 @@ """
		return lc(piece) not in self.C.suffixes_prefixes_titles \
		and not self.is_an_initial(piece)

		and not self.is_an_initial(piece)

		def is_an_initial(self, value):
		"""
		Words with a single period at the end, or a single uppercase letter.

		Matches the ``initial`` regular expression in

		Matches the ``initial`` regular expression in
		:py:data:`~nameparser.config.regexes.REGEXES`.
		@@ -381,5 +381,5 @@ """



		### full_name parser


		@property
		@@ -389,3 +389,3 @@ def full_name(self):
		return self.__str__()


		@full_name.setter
		@@ -398,3 +398,3 @@ def full_name(self, value):
		self.parse_full_name()


		def collapse_whitespace(self, string):
		@@ -409,3 +409,3 @@ # collapse multiple spaces into single space
		"""


		This method happens at the beginning of the :py:func:`parse_full_name`
		@@ -415,3 +415,3 @@ before any other processing of the string aside from unicode
		subclass. Runs :py:func:`parse_nicknames` and :py:func:`squash_emoji`.


		"""
		@@ -440,13 +440,13 @@ self.fix_phd()
		"""
		The content of parenthesis or quotes in the name will be added to the
		The content of parenthesis or quotes in the name will be added to the
		nicknames list. This happens before any other processing of the name.


		Single quotes cannot span white space characters and must border
		white space to allow for quotes in names like O'Connor and Kawai'ae'a.
		Double quotes and parenthesis can span white space.

		Loops through 3 :py:data:`~nameparser.config.regexes.REGEXES`;

		Loops through 3 :py:data:`~nameparser.config.regexes.REGEXES`;
		`quoted_word`, `double_quotes` and `parenthesis`.
		"""


		re_quoted_word = self.C.regexes.quoted_word
		@@ -474,3 +474,3 @@ re_double_quotes = self.C.regexes.double_quotes
		like "Sir", then when it's followed by a single name that name is always
		a first name.
		a first name.
		"""
		@@ -484,3 +484,3 @@ if self.title \
		"""


		The main parse method for the parser. This method is run upon
		@@ -492,7 +492,7 @@ assignment to the :py:attr:`full_name` attribute or instantiation.
		on the number of commas.


		:py:func:`parse_pieces` then splits those parts on spaces and
		:py:func:`join_on_conjunctions` joins any pieces next to conjunctions.
		:py:func:`join_on_conjunctions` joins any pieces next to conjunctions.
		"""


		self.title_list = []
		@@ -505,19 +505,19 @@ self.first_list = []
		self.unparsable = True




		self.pre_process()


		self._full_name = self.collapse_whitespace(self._full_name)


		# break up full_name by commas
		parts = [x.strip() for x in self._full_name.split(",")]


		log.debug("full_name: %s", self._full_name)
		log.debug("parts: %s", parts)


		if len(parts) == 1:


		# no commas, title first middle middle middle last suffix
		# part[0]


		pieces = self.parse_pieces(parts)
		@@ -530,3 +530,3 @@ p_len = len(pieces)
		nxt = None


		# title must have a next piece, unless it's just a title
		@@ -545,6 +545,6 @@ if self.is_title(piece) \
		if self.are_suffixes(pieces[i+1:]) or \
		(
		(
		# if the next piece is the last piece and a roman
		# numeral but this piece is not an initial
		self.is_roman_numeral(nxt) and i == p_len - 2
		self.is_roman_numeral(nxt) and i == p_len - 2
		and not self.is_an_initial(piece)
		@@ -558,3 +558,3 @@ ):
		continue


		self.middle_list.append(piece)
		@@ -571,8 +571,8 @@ else:
		and len(parts[0].split(' ')) > 1:

		# suffix comma:

		# suffix comma:
		# title first middle last [suffix], suffix [suffix] [, suffix]
		# parts[0], parts[1:...]




		self.suffix_list += parts[1:]
		@@ -604,9 +604,9 @@ pieces = self.parse_pieces(parts[0].split(' '))
		else:

		# lastname comma:

		# lastname comma:
		# last [suffix], title first middles[,] suffix [,suffix]
		# parts[0], parts[1], parts[2:...]


		log.debug("post-comma pieces: %s", u(post_comma_pieces))


		# lastname part may have suffixes in it
		@@ -621,3 +621,3 @@ lastname_pieces = self.parse_pieces(parts[0].split(' '), 1)
		self.last_list.append(piece)


		for i, piece in enumerate(post_comma_pieces):
		@@ -628,3 +628,3 @@ try:
		nxt = None


		if self.is_title(piece) \
		@@ -647,3 +647,3 @@ and (nxt or len(post_comma_pieces) == 1) \
		pass


		if len(self) < 0:
		@@ -662,8 +662,8 @@ log.info("Unparsable: \"%s\" ", self.original)
		add to the constant so they will be found.


		:param list parts: name part strings from the comma split
		:param int additional_parts_count:

		if the comma format contains other parts, we need to know
		how many there are to decide if things should be considered a
		:param int additional_parts_count:

		if the comma format contains other parts, we need to know
		how many there are to decide if things should be considered a
		conjunction.
		@@ -673,3 +673,3 @@ :return: pieces split on spaces and joined on conjunctions
		"""


		output = []
		@@ -681,3 +681,3 @@ for part in parts:
		output += [x.strip(' ,') for x in part.split(' ')]


		# If part contains periods, check if it's multiple titles or suffixes
		@@ -694,3 +694,3 @@ # together without spaces if so, add the new part with periods to the
		suffixes = list(filter(self.is_suffix, period_chunks))


		# add the part to the constant so it will be found
		@@ -703,15 +703,15 @@ if len(list(titles)):
		continue


		return self.join_on_conjunctions(output, additional_parts_count)


		def join_on_conjunctions(self, pieces, additional_parts_count=0):
		"""
		Join conjunctions to surrounding pieces. Title- and prefix-aware. e.g.:


		['Mr.', 'and'. 'Mrs.', 'John', 'Doe'] ==>
		['Mr. and Mrs.', 'John', 'Doe']


		['The', 'Secretary', 'of', 'State', 'Hillary', 'Clinton'] ==>
		['The Secretary of State', 'Hillary', 'Clinton']


		When joining titles, saves newly formed piece to the instance's titles
		@@ -721,6 +721,6 @@ constant so they will be parsed correctly later. E.g. after parsing the
		be present in the titles constant set.


		:param list pieces: name pieces strings after split on spaces
		:param int additional_parts_count:
		:return: new list with piece next to conjunctions merged into one piece
		:param int additional_parts_count:
		:return: new list with piece next to conjunctions merged into one piece
		with spaces in it.
		@@ -786,3 +786,3 @@ :rtype: list

		if i is 0:
		if i == 0:
		new_piece = " ".join(pieces[i:i+2])
		@@ -860,6 +860,6 @@ if self.is_title(pieces[i+1]):
		return pieces




		### Capitalization Support


		def cap_word(self, word, attribute):
		@@ -892,3 +892,3 @@ if (self.is_prefix(word) and attribute in ('last','middle')) \
		pass the parameter `force=True`.


		:param bool force: Forces capitalization of mixed case strings. This
		@@ -899,5 +899,5 @@ parameter overrides rules set within
		Usage


		.. doctest:: capitalize


		>>> name = HumanName('bob v. de la macdole-eisenhower phd')
		@@ -910,8 +910,8 @@ >>> name.capitalize()
		>>> name.capitalize()
		>>> str(name)
		>>> str(name)
		'Shirley Maclaine'
		>>> name.capitalize(force=True)
		>>> str(name)
		>>> str(name)
		'Shirley MacLaine'


		"""
		@@ -918,0 +918,0 @@ name = u(self)

+1

-1

PKG-INFO

		Metadata-Version: 1.1
		Name: nameparser
		Version: 1.0.5
		Version: 1.0.6
		Summary: A simple Python module for parsing human names into their individual components.
		@@ -5,0 +5,0 @@ Home-page: https://github.com/derek73/python-nameparser

nameparser - pypi Package Compare versions

Worsened metrics