nameparser
Advanced tools
| Metadata-Version: 1.1 | ||
| Name: nameparser | ||
| Version: 1.0.5 | ||
| Version: 1.0.6 | ||
| Summary: A simple Python module for parsing human names into their individual components. | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/derek73/python-nameparser |
@@ -1,2 +0,2 @@ | ||
| VERSION = (1, 0, 5) | ||
| VERSION = (1, 0, 6) | ||
| __version__ = '.'.join(map(str, VERSION)) | ||
@@ -3,0 +3,0 @@ __author__ = "Derek Gulbranson" |
+132
-132
@@ -20,3 +20,3 @@ # -*- coding: utf-8 -*- | ||
| """ | ||
| return list of tuples containing first and last index | ||
| return list of tuples containing first and last index | ||
| position of contiguous numbers in a series | ||
@@ -34,9 +34,9 @@ """ | ||
| Parse a person's name into individual components. | ||
| Instantiation assigns to ``full_name``, and assignment to | ||
| :py:attr:`full_name` triggers :py:func:`parse_full_name`. After parsing the | ||
| name, these instance attributes are available. | ||
| **HumanName Instance Attributes** | ||
| * :py:attr:`title` | ||
@@ -51,17 +51,17 @@ * :py:attr:`first` | ||
| :param str full_name: The name string to be parsed. | ||
| :param constants constants: | ||
| a :py:class:`~nameparser.config.Constants` instance. Pass ``None`` for | ||
| `per-instance config <customize.html>`_. | ||
| :param constants constants: | ||
| a :py:class:`~nameparser.config.Constants` instance. Pass ``None`` for | ||
| `per-instance config <customize.html>`_. | ||
| :param str encoding: string representing the encoding of your input | ||
| :param str string_format: python string formatting | ||
| :param str string_format: python string formatting | ||
| """ | ||
| C = CONSTANTS | ||
| """ | ||
| A reference to the configuration for this instance, which may or may not be | ||
| a reference to the shared, module-wide instance at | ||
| :py:mod:`~nameparser.config.CONSTANTS`. See `Customizing the Parser | ||
| a reference to the shared, module-wide instance at | ||
| :py:mod:`~nameparser.config.CONSTANTS`. See `Customizing the Parser | ||
| <customize.html>`_. | ||
| """ | ||
| original = '' | ||
@@ -71,3 +71,3 @@ """ | ||
| """ | ||
| _count = 0 | ||
@@ -77,3 +77,3 @@ _members = ['title','first','middle','last','suffix','nickname'] | ||
| _full_name = '' | ||
| def __init__(self, full_name="", constants=CONSTANTS, encoding=DEFAULT_ENCODING, | ||
@@ -84,3 +84,3 @@ string_format=None): | ||
| self.C = Constants() | ||
| self.encoding = encoding | ||
@@ -90,6 +90,6 @@ self.string_format = string_format or self.C.string_format | ||
| self.full_name = full_name | ||
| def __iter__(self): | ||
| return self | ||
| def __len__(self): | ||
@@ -100,13 +100,13 @@ l = 0 | ||
| return l | ||
| def __eq__(self, other): | ||
| """ | ||
| HumanName instances are equal to other objects whose | ||
| HumanName instances are equal to other objects whose | ||
| lower case unicode representation is the same. | ||
| """ | ||
| return (u(self)).lower() == (u(other)).lower() | ||
| def __ne__(self, other): | ||
| return not (u(self)).lower() == (u(other)).lower() | ||
| def __getitem__(self, key): | ||
@@ -144,3 +144,3 @@ if isinstance(key, slice): | ||
| return " ".join(self) | ||
| def __str__(self): | ||
@@ -150,3 +150,3 @@ if sys.version_info[0] >= 3: | ||
| return self.__unicode__().encode(self.encoding) | ||
| def __repr__(self): | ||
@@ -168,12 +168,12 @@ if self.unparsable: | ||
| return _string.encode(self.encoding) | ||
| def as_dict(self, include_empty=True): | ||
| """ | ||
| Return the parsed name as a dictionary of its attributes. | ||
| :param bool include_empty: Include keys in the dictionary for empty name attributes. | ||
| :rtype: dict | ||
| .. doctest:: | ||
| >>> name = HumanName("Bob Dole") | ||
@@ -184,3 +184,3 @@ >>> name.as_dict() | ||
| {'last': 'Dole', 'first': 'Bob'} | ||
| """ | ||
@@ -196,18 +196,18 @@ d = {} | ||
| return d | ||
| @property | ||
| def has_own_config(self): | ||
| """ | ||
| True if this instance is not using the shared module-level | ||
| True if this instance is not using the shared module-level | ||
| configuration. | ||
| """ | ||
| return self.C is not CONSTANTS | ||
| ### attributes | ||
| @property | ||
| def title(self): | ||
| """ | ||
| The person's titles. Any string of consecutive pieces in | ||
| :py:mod:`~nameparser.config.titles` or | ||
| The person's titles. Any string of consecutive pieces in | ||
| :py:mod:`~nameparser.config.titles` or | ||
| :py:mod:`~nameparser.config.conjunctions` | ||
@@ -217,27 +217,27 @@ at the beginning of :py:attr:`full_name`. | ||
| return " ".join(self.title_list) or self.C.empty_attribute_default | ||
| @property | ||
| def first(self): | ||
| """ | ||
| The person's first name. The first name piece after any known | ||
| The person's first name. The first name piece after any known | ||
| :py:attr:`title` pieces parsed from :py:attr:`full_name`. | ||
| """ | ||
| return " ".join(self.first_list) or self.C.empty_attribute_default | ||
| @property | ||
| def middle(self): | ||
| """ | ||
| The person's middle names. All name pieces after the first name and | ||
| The person's middle names. All name pieces after the first name and | ||
| before the last name parsed from :py:attr:`full_name`. | ||
| """ | ||
| return " ".join(self.middle_list) or self.C.empty_attribute_default | ||
| @property | ||
| def last(self): | ||
| """ | ||
| The person's last name. The last name piece parsed from | ||
| The person's last name. The last name piece parsed from | ||
| :py:attr:`full_name`. | ||
| """ | ||
| return " ".join(self.last_list) or self.C.empty_attribute_default | ||
| @property | ||
@@ -248,12 +248,12 @@ def suffix(self): | ||
| :py:mod:`~nameparser.config.suffixes`, or pieces that are at the end | ||
| of comma separated formats, e.g. | ||
| "Lastname, Title Firstname Middle[,] Suffix [, Suffix]" parsed | ||
| of comma separated formats, e.g. | ||
| "Lastname, Title Firstname Middle[,] Suffix [, Suffix]" parsed | ||
| from :py:attr:`full_name`. | ||
| """ | ||
| return ", ".join(self.suffix_list) or self.C.empty_attribute_default | ||
| @property | ||
| def nickname(self): | ||
| """ | ||
| The person's nicknames. Any text found inside of quotes (``""``) or | ||
| The person's nicknames. Any text found inside of quotes (``""``) or | ||
| parenthesis (``()``) | ||
@@ -278,3 +278,3 @@ """ | ||
| ### setter methods | ||
| def _set_list(self, attr, value): | ||
@@ -292,40 +292,40 @@ if isinstance(value, list): | ||
| setattr(self, attr+"_list", self.parse_pieces(val)) | ||
| @title.setter | ||
| def title(self, value): | ||
| self._set_list('title', value) | ||
| @first.setter | ||
| def first(self, value): | ||
| self._set_list('first', value) | ||
| @middle.setter | ||
| def middle(self, value): | ||
| self._set_list('middle', value) | ||
| @last.setter | ||
| def last(self, value): | ||
| self._set_list('last', value) | ||
| @suffix.setter | ||
| def suffix(self, value): | ||
| self._set_list('suffix', value) | ||
| @nickname.setter | ||
| def nickname(self, value): | ||
| self._set_list('nickname', value) | ||
| ### Parse helpers | ||
| def is_title(self, value): | ||
| """Is in the :py:data:`~nameparser.config.titles.TITLES` set.""" | ||
| return lc(value) in self.C.titles | ||
| def is_conjunction(self, piece): | ||
| """Is in the conjuctions set and not :py:func:`is_an_initial()`.""" | ||
| return piece.lower() in self.C.conjunctions and not self.is_an_initial(piece) | ||
| def is_prefix(self, piece): | ||
| """ | ||
| Lowercase and no periods version of piece is in the | ||
| Lowercase and no periods version of piece is in the | ||
| :py:data:`~nameparser.config.prefixes.PREFIXES` set. | ||
@@ -337,12 +337,12 @@ """ | ||
| """ | ||
| Matches the ``roman_numeral`` regular expression in | ||
| Matches the ``roman_numeral`` regular expression in | ||
| :py:data:`~nameparser.config.regexes.REGEXES`. | ||
| """ | ||
| return bool(self.C.regexes.roman_numeral.match(value)) | ||
| def is_suffix(self, piece): | ||
| """ | ||
| Is in the suffixes set and not :py:func:`is_an_initial()`. | ||
| Some suffixes may be acronyms (M.B.A) while some are not (Jr.), | ||
| Is in the suffixes set and not :py:func:`is_an_initial()`. | ||
| Some suffixes may be acronyms (M.B.A) while some are not (Jr.), | ||
| so we remove the periods from `piece` when testing against | ||
@@ -362,3 +362,3 @@ `C.suffix_acronyms`. | ||
| return True | ||
| def is_rootname(self, piece): | ||
@@ -369,9 +369,9 @@ """ | ||
| return lc(piece) not in self.C.suffixes_prefixes_titles \ | ||
| and not self.is_an_initial(piece) | ||
| and not self.is_an_initial(piece) | ||
| def is_an_initial(self, value): | ||
| """ | ||
| Words with a single period at the end, or a single uppercase letter. | ||
| Matches the ``initial`` regular expression in | ||
| Matches the ``initial`` regular expression in | ||
| :py:data:`~nameparser.config.regexes.REGEXES`. | ||
@@ -381,5 +381,5 @@ """ | ||
| ### full_name parser | ||
| @property | ||
@@ -389,3 +389,3 @@ def full_name(self): | ||
| return self.__str__() | ||
| @full_name.setter | ||
@@ -398,3 +398,3 @@ def full_name(self, value): | ||
| self.parse_full_name() | ||
| def collapse_whitespace(self, string): | ||
@@ -409,3 +409,3 @@ # collapse multiple spaces into single space | ||
| """ | ||
| This method happens at the beginning of the :py:func:`parse_full_name` | ||
@@ -415,3 +415,3 @@ before any other processing of the string aside from unicode | ||
| subclass. Runs :py:func:`parse_nicknames` and :py:func:`squash_emoji`. | ||
| """ | ||
@@ -440,13 +440,13 @@ self.fix_phd() | ||
| """ | ||
| The content of parenthesis or quotes in the name will be added to the | ||
| The content of parenthesis or quotes in the name will be added to the | ||
| nicknames list. This happens before any other processing of the name. | ||
| Single quotes cannot span white space characters and must border | ||
| white space to allow for quotes in names like O'Connor and Kawai'ae'a. | ||
| Double quotes and parenthesis can span white space. | ||
| Loops through 3 :py:data:`~nameparser.config.regexes.REGEXES`; | ||
| Loops through 3 :py:data:`~nameparser.config.regexes.REGEXES`; | ||
| `quoted_word`, `double_quotes` and `parenthesis`. | ||
| """ | ||
| re_quoted_word = self.C.regexes.quoted_word | ||
@@ -474,3 +474,3 @@ re_double_quotes = self.C.regexes.double_quotes | ||
| like "Sir", then when it's followed by a single name that name is always | ||
| a first name. | ||
| a first name. | ||
| """ | ||
@@ -484,3 +484,3 @@ if self.title \ | ||
| """ | ||
| The main parse method for the parser. This method is run upon | ||
@@ -492,7 +492,7 @@ assignment to the :py:attr:`full_name` attribute or instantiation. | ||
| on the number of commas. | ||
| :py:func:`parse_pieces` then splits those parts on spaces and | ||
| :py:func:`join_on_conjunctions` joins any pieces next to conjunctions. | ||
| :py:func:`join_on_conjunctions` joins any pieces next to conjunctions. | ||
| """ | ||
| self.title_list = [] | ||
@@ -505,19 +505,19 @@ self.first_list = [] | ||
| self.unparsable = True | ||
| self.pre_process() | ||
| self._full_name = self.collapse_whitespace(self._full_name) | ||
| # break up full_name by commas | ||
| parts = [x.strip() for x in self._full_name.split(",")] | ||
| log.debug("full_name: %s", self._full_name) | ||
| log.debug("parts: %s", parts) | ||
| if len(parts) == 1: | ||
| # no commas, title first middle middle middle last suffix | ||
| # part[0] | ||
| pieces = self.parse_pieces(parts) | ||
@@ -530,3 +530,3 @@ p_len = len(pieces) | ||
| nxt = None | ||
| # title must have a next piece, unless it's just a title | ||
@@ -545,6 +545,6 @@ if self.is_title(piece) \ | ||
| if self.are_suffixes(pieces[i+1:]) or \ | ||
| ( | ||
| ( | ||
| # if the next piece is the last piece and a roman | ||
| # numeral but this piece is not an initial | ||
| self.is_roman_numeral(nxt) and i == p_len - 2 | ||
| self.is_roman_numeral(nxt) and i == p_len - 2 | ||
| and not self.is_an_initial(piece) | ||
@@ -558,3 +558,3 @@ ): | ||
| continue | ||
| self.middle_list.append(piece) | ||
@@ -571,8 +571,8 @@ else: | ||
| and len(parts[0].split(' ')) > 1: | ||
| # suffix comma: | ||
| # suffix comma: | ||
| # title first middle last [suffix], suffix [suffix] [, suffix] | ||
| # parts[0], parts[1:...] | ||
| self.suffix_list += parts[1:] | ||
@@ -604,9 +604,9 @@ pieces = self.parse_pieces(parts[0].split(' ')) | ||
| else: | ||
| # lastname comma: | ||
| # lastname comma: | ||
| # last [suffix], title first middles[,] suffix [,suffix] | ||
| # parts[0], parts[1], parts[2:...] | ||
| log.debug("post-comma pieces: %s", u(post_comma_pieces)) | ||
| # lastname part may have suffixes in it | ||
@@ -621,3 +621,3 @@ lastname_pieces = self.parse_pieces(parts[0].split(' '), 1) | ||
| self.last_list.append(piece) | ||
| for i, piece in enumerate(post_comma_pieces): | ||
@@ -628,3 +628,3 @@ try: | ||
| nxt = None | ||
| if self.is_title(piece) \ | ||
@@ -647,3 +647,3 @@ and (nxt or len(post_comma_pieces) == 1) \ | ||
| pass | ||
| if len(self) < 0: | ||
@@ -662,8 +662,8 @@ log.info("Unparsable: \"%s\" ", self.original) | ||
| add to the constant so they will be found. | ||
| :param list parts: name part strings from the comma split | ||
| :param int additional_parts_count: | ||
| if the comma format contains other parts, we need to know | ||
| how many there are to decide if things should be considered a | ||
| :param int additional_parts_count: | ||
| if the comma format contains other parts, we need to know | ||
| how many there are to decide if things should be considered a | ||
| conjunction. | ||
@@ -673,3 +673,3 @@ :return: pieces split on spaces and joined on conjunctions | ||
| """ | ||
| output = [] | ||
@@ -681,3 +681,3 @@ for part in parts: | ||
| output += [x.strip(' ,') for x in part.split(' ')] | ||
| # If part contains periods, check if it's multiple titles or suffixes | ||
@@ -694,3 +694,3 @@ # together without spaces if so, add the new part with periods to the | ||
| suffixes = list(filter(self.is_suffix, period_chunks)) | ||
| # add the part to the constant so it will be found | ||
@@ -703,15 +703,15 @@ if len(list(titles)): | ||
| continue | ||
| return self.join_on_conjunctions(output, additional_parts_count) | ||
| def join_on_conjunctions(self, pieces, additional_parts_count=0): | ||
| """ | ||
| Join conjunctions to surrounding pieces. Title- and prefix-aware. e.g.: | ||
| ['Mr.', 'and'. 'Mrs.', 'John', 'Doe'] ==> | ||
| ['Mr. and Mrs.', 'John', 'Doe'] | ||
| ['The', 'Secretary', 'of', 'State', 'Hillary', 'Clinton'] ==> | ||
| ['The Secretary of State', 'Hillary', 'Clinton'] | ||
| When joining titles, saves newly formed piece to the instance's titles | ||
@@ -721,6 +721,6 @@ constant so they will be parsed correctly later. E.g. after parsing the | ||
| be present in the titles constant set. | ||
| :param list pieces: name pieces strings after split on spaces | ||
| :param int additional_parts_count: | ||
| :return: new list with piece next to conjunctions merged into one piece | ||
| :param int additional_parts_count: | ||
| :return: new list with piece next to conjunctions merged into one piece | ||
| with spaces in it. | ||
@@ -786,3 +786,3 @@ :rtype: list | ||
| if i is 0: | ||
| if i == 0: | ||
| new_piece = " ".join(pieces[i:i+2]) | ||
@@ -860,6 +860,6 @@ if self.is_title(pieces[i+1]): | ||
| return pieces | ||
| ### Capitalization Support | ||
| def cap_word(self, word, attribute): | ||
@@ -892,3 +892,3 @@ if (self.is_prefix(word) and attribute in ('last','middle')) \ | ||
| pass the parameter `force=True`. | ||
| :param bool force: Forces capitalization of mixed case strings. This | ||
@@ -899,5 +899,5 @@ parameter overrides rules set within | ||
| **Usage** | ||
| .. doctest:: capitalize | ||
| >>> name = HumanName('bob v. de la macdole-eisenhower phd') | ||
@@ -910,8 +910,8 @@ >>> name.capitalize() | ||
| >>> name.capitalize() | ||
| >>> str(name) | ||
| >>> str(name) | ||
| 'Shirley Maclaine' | ||
| >>> name.capitalize(force=True) | ||
| >>> str(name) | ||
| >>> str(name) | ||
| 'Shirley MacLaine' | ||
| """ | ||
@@ -918,0 +918,0 @@ name = u(self) |
+1
-1
| Metadata-Version: 1.1 | ||
| Name: nameparser | ||
| Version: 1.0.5 | ||
| Version: 1.0.6 | ||
| Summary: A simple Python module for parsing human names into their individual components. | ||
@@ -5,0 +5,0 @@ Home-page: https://github.com/derek73/python-nameparser |
Alert delta unavailable
Currently unable to show alert delta for PyPI packages.
171490
-0.42%