Latest Threat Research:SANDWORM_MODE: Shai-Hulud-Style npm Worm Hijacks CI Workflows and Poisons AI Toolchains.Details
Socket
Book a DemoInstallSign in
Socket

twill

Package Overview
Dependencies
Maintainers
2
Versions
33
Alerts
File Explorer

Advanced tools

Socket logo

Install Socket

Detect and block malicious and high-risk dependencies

Install

twill - npm Package Compare versions

Comparing version
3.1
to
3.2.1
+21
LICENSE.txt
The MIT License, https://opensource.org/licenses/MIT
Copyright 2005-2023 by C. Titus Brown, Ben R. Taylor, Christoph Zwerschke
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
[project]
name = "twill"
version = "3.2.1"
description = "A web browsing and testing language"
keywords = ["web", "testing", "browsing", "automation"]
readme = "README.md"
license = {file = "LICENSE.txt"}
authors = [
{name = "C. Titus Brown"},
{name = "Ben R. Taylor"},
{name = "Christoph Zwerschke"}
]
maintainers = [
{name = "Christoph Zwerschke", email="cito@online.de"}
]
classifiers = [
'Development Status :: 6 - Mature',
'Environment :: Console',
'Intended Audience :: Developers',
'Intended Audience :: System Administrators',
'License :: OSI Approved :: MIT License',
'Natural Language :: English',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Other Scripting Engines',
'Topic :: Internet :: WWW/HTTP',
'Topic :: Software Development :: Testing'
]
requires-python = ">=3.8"
dependencies = [
"lxml >=4.9, <5",
"httpx >=0.25.0, <1",
"pyparsing >=3.0, <4",
]
[project.optional-dependencies]
docs = [
"sphinx >=5.2, <6",
"sphinx_rtd_theme >=1, <2"
]
tidy = [
"pytidylib >=0.3, <0.4"
]
tests = [
"tox >=4, <5",
"pytest >=7.4, <8",
"pytidylib >=0.3, <0.4",
"quixote >=3.6, <4",
]
[project.scripts]
twill = "twill.shell:main"
twill-fork = "twill.fork:main"
[project.urls]
Homepage = "https://github.com/twill-tools/twill"
Issues = "https://github.com/twill-tools/twill/issues"
Documentation = "https://twill-tools.github.io/twill/"
Source = "https://github.com/twill-tools/twill"
ChangeLog = "https://twill-tools.github.io/twill/changelog.html"
Download = "https://pypi.org/project/twill/"
[tool.setuptools.package-data]
"twill" = ["py.typed"]
[tool.pytest.ini_options]
minversion = "7.4"
testpaths = [ "tests" ]
[tool.mypy]
python_version = 3.11
check_untyped_defs = true
no_implicit_optional = true
strict_optional = false
warn_redundant_casts = true
warn_unused_ignores = true
disallow_untyped_defs = false
[tool.black]
line-length = 79
[tool.ruff]
select = [
"A", # flake8-builtins
"ANN", # flake8-annotations
"ARG", # flake8-unused-arguments
"B", # flake8-bugbear
"BLE", # flake8-blind-except
"C4", # flake8-comprehensions
"C90", # McCabe cyclomatic complexity
"COM", # flake8-commas
"D", # pydocstyle
"DTZ", # flake8-datetimez
"E", # pycodestyle
"EM", # flake8-errmsg
"ERA", # eradicate
"EXE", # flake8-executable
"F", # Pyflakes
"FBT", # flake8-boolean-trap
"G", # flake8-logging-format
"I", # isort
"ICN", # flake8-import-conventions
"INP", # flake8-no-pep420
"INT", # flake8-gettext
"ISC", # flake8-implicit-str-concat
"N", # pep8-naming
"PGH", # pygrep-hooks
"PIE", # flake8-pie
"PL", # Pylint
"PT", # flake8-pytest-style
"PTH", # flake8-use-pathlib
"PYI", # flake8-pyi
"Q", # flake8-quotes
"RET", # flake8-return
"RSE", # flake8-raise
"RUF", # Ruff-specific rules
"S", # flake8-bandit
"SLF", # flake8-self
"SIM", # flake8-simplify
"T10", # flake8-debugger
"T20", # flake8-print
"TCH", # flake8-type-checking
"TID", # flake8-tidy-imports
"TRY", # tryceratops
"UP", # pyupgrade
"W", # pycodestyle
"YTT", # flake8-2020
]
ignore = [
"ANN002", "ANN003", # no type annotations needed for args and kwargs
"ANN101", "ANN102", # no type annotation for self and cls needed
"ANN401", # allow explicit Any
"COM812", # allow trailing commas for auto-formatting
"D203", # no blank line before class docstring
"D213", # multi-line docstrings should not start at second line
"EM101", "EM102", # allows exceptions with literal and f-strings
"ISC001", # allow string literal concatenatin for auto-formatting
"PTH123", # allow builtin-open
"TRY003", # allow specific messages outside the exception class
"TRY301", # allow raise
]
line-length = 79
target-version = "py38"
[tool.ruff.flake8-quotes]
inline-quotes = "double"
[tool.ruff.mccabe]
max-complexity = 25
[tool.ruff.pylint]
max-args = 12
max-branches = 25
max-returns = 7
max-statements = 75
[tool.ruff.per-file-ignores]
"src/twill/commands.py" = [
"A001", # may shadow builtins
"D400", "D401", "D415", # allow more flexible docstrings
"S102" # allow use of exec
]
"src/twill/fork.py" = [
"T201" # allow using print()
]
"src/twill/parse.py" = [
"PGH001", "S307" # allow evaluation of expressions
]
"src/twill/extensions/*" = [
"D400", "D401", "D415", # allow more flexible docstrings
]
"docs/*" = [
"A001", # may shadow builtins
"INP001", # allow stand-alone scripts
"ERA001", # allow commented-out code
]
"extras/*" = [
"ANN", # no annotations needed
"INP001", # allow stand-alone scripts
]
"tests/server.py" = [
"T201", # allow using print()
]
"tests/test_*" = [
"D", # do not require docstrings
"ANN201", # do no trequre return types
"S101", # allow assert statements
"PLR2004", # allow magic values
]
[build-system]
requires = ["setuptools >=68"]
build-backend = "setuptools.build_meta"
[console_scripts]
twill = twill.shell:main
twill-fork = twill.fork:main
Metadata-Version: 2.1
Name: twill
Version: 3.2.1
Summary: A web browsing and testing language
Author: C. Titus Brown, Ben R. Taylor, Christoph Zwerschke
Maintainer-email: Christoph Zwerschke <cito@online.de>
License: The MIT License, https://opensource.org/licenses/MIT
Copyright 2005-2023 by C. Titus Brown, Ben R. Taylor, Christoph Zwerschke
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Project-URL: Homepage, https://github.com/twill-tools/twill
Project-URL: Issues, https://github.com/twill-tools/twill/issues
Project-URL: Documentation, https://twill-tools.github.io/twill/
Project-URL: Source, https://github.com/twill-tools/twill
Project-URL: ChangeLog, https://twill-tools.github.io/twill/changelog.html
Project-URL: Download, https://pypi.org/project/twill/
Keywords: web,testing,browsing,automation
Classifier: Development Status :: 6 - Mature
Classifier: Environment :: Console
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: System Administrators
Classifier: License :: OSI Approved :: MIT License
Classifier: Natural Language :: English
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Other Scripting Engines
Classifier: Topic :: Internet :: WWW/HTTP
Classifier: Topic :: Software Development :: Testing
Requires-Python: >=3.8
Description-Content-Type: text/markdown
License-File: LICENSE.txt
Requires-Dist: lxml<5,>=4.9
Requires-Dist: httpx<1,>=0.25.0
Requires-Dist: pyparsing<4,>=3.0
Provides-Extra: docs
Requires-Dist: sphinx<6,>=5.2; extra == "docs"
Requires-Dist: sphinx_rtd_theme<2,>=1; extra == "docs"
Provides-Extra: tidy
Requires-Dist: pytidylib<0.4,>=0.3; extra == "tidy"
Provides-Extra: tests
Requires-Dist: tox<5,>=4; extra == "tests"
Requires-Dist: pytest<8,>=7.4; extra == "tests"
Requires-Dist: pytidylib<0.4,>=0.3; extra == "tests"
Requires-Dist: quixote<4,>=3.6; extra == "tests"
twill: a simple scripting language for web browsing
===================================================
twill is a simple scripting language intended for programmatic or automated browsing of websites.
The current version 3.2 supports Python 3.8 to 3.12.
Take a look at the [changelog](https://twill-tools.github.io/twill/changelog.html) to find a list of all changes and improvements made since version 2. For a brief overview of twill's history starting from its early days, see the [acknowledgements and history](https://twill-tools.github.io/twill/overview.html#acknowledgements-and-history) section.
The full [documentation](https://twill-tools.github.io/twill/) is included in the distribution and provided online via [GitHub](https://twill-tools.github.io/twill/) and [ReadTheDocs](https://twill.readthedocs.io/en/latest/).
Copyright (c) 2005-2023 by C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al.
Newer versions have been created and are maintained by [Christoph Zwerschke](https://github.com/Cito).
twill is available for use, modification, and distribution under the MIT license.
lxml<5,>=4.9
httpx<1,>=0.25.0
pyparsing<4,>=3.0
[docs]
sphinx<6,>=5.2
sphinx_rtd_theme<2,>=1
[tests]
tox<5,>=4
pytest<8,>=7.4
pytidylib<0.4,>=0.3
quixote<4,>=3.6
[tidy]
pytidylib<0.4,>=0.3
LICENSE.txt
MANIFEST.in
README.md
pyproject.toml
tox.ini
docs/Makefile
docs/browsing.rst
docs/changelog.rst
docs/commands.rst
docs/conf.py
docs/developer.rst
docs/examples.rst
docs/extensions.rst
docs/index.rst
docs/install.rst
docs/make.bat
docs/other.rst
docs/overview.rst
docs/python-api.rst
docs/requirements.txt
docs/testing.rst
extras/examples/discard-sf-mailman-msgs.twill
extras/examples/extend-example.py
extras/examples/quixote-demo.twill
extras/examples/set-user-agent.twill
extras/maxq/README.txt
extras/maxq/TwillScriptGenerator.java
src/twill/__init__.py
src/twill/__main__.py
src/twill/agents.py
src/twill/browser.py
src/twill/commands.py
src/twill/errors.py
src/twill/fork.py
src/twill/namespaces.py
src/twill/parse.py
src/twill/py.typed
src/twill/shell.py
src/twill/unit.py
src/twill/utils.py
src/twill.egg-info/PKG-INFO
src/twill.egg-info/SOURCES.txt
src/twill.egg-info/dependency_links.txt
src/twill.egg-info/entry_points.txt
src/twill.egg-info/requires.txt
src/twill.egg-info/top_level.txt
src/twill/extensions/__init__.py
src/twill/extensions/argparse.py
src/twill/extensions/check_links.py
src/twill/extensions/csv_iterate.py
src/twill/extensions/dirstack.py
src/twill/extensions/dns_check.py
src/twill/extensions/formfill.py
src/twill/extensions/mailman_sf.py
src/twill/extensions/match_parse.py
src/twill/extensions/require.py
src/twill/extensions/shell_test.py
tests/README.txt
tests/__init__.py
tests/conftest.py
tests/mock_dns.py
tests/server.py
tests/test_back.py
tests/test_back.twill
tests/test_basic.py
tests/test_basic.twill
tests/test_broken_html.py
tests/test_buildbot.py
tests/test_check_links.py
tests/test_check_links.twill
tests/test_checkbox.py
tests/test_dns.py
tests/test_dns.twill
tests/test_equiv_refresh.py
tests/test_equiv_refresh.twill
tests/test_find.py
tests/test_find.twill
tests/test_find_links.py
tests/test_form.py
tests/test_form.twill
tests/test_formfill.py
tests/test_formfill.twill
tests/test_gather.py
tests/test_global_form.py
tests/test_global_form.twill
tests/test_go.py
tests/test_go.twill
tests/test_go_exit.twill
tests/test_go_fail.twill
tests/test_go_fail2.twill
tests/test_headers.py
tests/test_headers.twill
tests/test_http_auth.py
tests/test_http_auth.twill
tests/test_http_codes.py
tests/test_http_codes.twill
tests/test_info.py
tests/test_info.twill
tests/test_match_parse.py
tests/test_match_parse.twill
tests/test_misc.py
tests/test_multisub.py
tests/test_multisub.twill
tests/test_radiobutton.py
tests/test_shell.py
tests/test_shell.twill
tests/test_shell_fail.twill
tests/test_show.py
tests/test_show.twill
tests/test_tidy.py
tests/test_timeout.py
tests/test_two_forms.py
tests/test_unit_support.py
tests/test_unit_support.twill
tests/test_utils.py
tests/test_variables.py
tests/test_variables.twill
tests/test_wsgi.py
tests/test_xml.py
tests/test_xml.twill
tests/utils.py
tests/test_gather/00-testme/x-script.twill
tests/test_gather/00-testme/y-script.txt
tests/test_gather/01-test/a.notwill
tests/test_gather/01-test/b.twill
tests/test_gather/02-test2/c.bak
tests/test_gather/02-test2/c.twill
tests/test_gather/02-test2/02-subtest/d.twill
# This file is part of the twill source distribution.
#
# twill is an extensible scriptlet language for testing Web apps,
# available at https://github.com/twill-tools/twill.
#
# Copyright (c) 2005-2023
# by C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al.
#
# This program and all associated source code files are released under the
# terms of the MIT license; please see the included LICENSE file for more
# information, or go to https://opensource.org/licenses/mit-license.php.
"""The twill web browsing and testing language and associated utilities.
A scripting system for automating web browsing. Useful for testing
web pages or grabbing data from password-protected sites automatically.
"""
import importlib.metadata
import logging
import sys
from pathlib import Path
from typing import Optional, TextIO, Union
metadata = importlib.metadata.metadata(__package__)
__version__: str = metadata["Version"]
__url__: str = metadata["Project-URL"].rsplit(None, 1)[-1]
__all__ = [
"browser",
"execute_file",
"execute_string",
"log",
"set_log_level",
"set_output",
"set_err_out",
"twill_ext",
"TwillCommandLoop",
"__url__",
"__version__",
]
this_dir = Path(__file__).parent
# Add extensions directory at the *end* of sys.path.
# This means that user extensions will take priority over twill extensions.
extensions = this_dir / "extensions"
sys.path.append(str(extensions))
twill_ext = ".twill" # file extension for twill scripts
log_levels = {
"CRITICAL": logging.CRITICAL,
"ERROR": logging.ERROR,
"WARNING": logging.WARNING,
"INFO": logging.INFO,
"DEBUG": logging.DEBUG,
"NOTSET": logging.NOTSET,
}
log = logging.getLogger(__name__)
handler = None
stdout, stderr = sys.stdout, sys.stderr
def set_log_level(level: Optional[Union[int, str]] = None) -> None:
"""Set the logging level.
If no level is passed, use INFO as logging level.
"""
if level is None:
level = logging.INFO
if isinstance(level, str):
level = log_levels[level.upper()]
log.setLevel(level)
def set_output(stream: Optional[TextIO] = None) -> None:
"""Set the standard output.
If no stream is passed, use standard output.
"""
global handler # noqa: PLW0603
if stream is None:
stream = stdout
if handler:
log.removeHandler(handler)
handler = logging.StreamHandler(stream)
log.addHandler(handler)
sys.stdout = stream
def set_err_out(stream: Optional[TextIO] = None) -> None:
"""Set the error output.
If no stream is passed, use standard error.
"""
if stream is None:
stream = stderr
sys.stderr = stream
def shutdown() -> None:
"""Shut down and flush the logging system."""
sys.stdout.flush()
sys.stderr.flush()
logging.shutdown()
set_log_level()
set_output()
# initialize global dict
from . import namespaces # noqa: E402
# a convenience function:
from .browser import browser # noqa: E402
# the two core components of twill:
from .parse import execute_file, execute_string # noqa: E402
from .shell import TwillCommandLoop # noqa: E402
namespaces.init_global_dict()
"""Main module for the twill package."""
from . import shell
if __name__ == "__main__":
shell.main()
"""Map of various User-Agent string shortcuts that can be used for testing."""
from typing import Dict
# noinspection HttpUrlsUsage
agents: Dict[str, str] = {
# Desktop
"chrome_40": "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
"chrome_107": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
" AppleWebKit/537.36 (KHTML, like Gecko)"
" Chrome/107.0.0.0 Safari/537.36",
"edge_12": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/42.0.2311.135"
" Safari/537.36 Edge/12.246",
"edge_107": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/107.0.0.0"
" Safari/537.36 Edg/107.0.1418.26",
"firefox_40": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0)"
" Gecko/20100101 Firefox/40.1",
"firefox_106": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:106.0)"
" Gecko/20100101 Firefox/106.0",
"ie_3": "Mozilla/2.0 (compatible; MSIE 3.0; Windows 3.1)",
"ie_4": "Mozilla/4.0 (compatible; MSIE 4.0; Windows NT 5.0)",
"ie_5": "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT 5.0)",
"ie_6": "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
"ie_7": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
"ie_8": "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
"ie_9": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)",
"ie_10": "Mozilla/5.0"
" (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
"ie_11": "Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko",
"ie_mobile_9": "Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5;"
" Trident/5.0; IEMobile/9.0)",
"opera_7": "Opera/7.0 (Windows NT 5.1; U) [en]",
"opera_8": "Opera/8.00 (Windows NT 5.1; U; en)",
"opera_9": "Opera/9.00 (Windows NT 5.2; U; en)",
"opera_10": "Opera/9.80 (Windows NT 6.1; U; en)"
" Presto/2.2.15 Version/10.00",
"opera_11": "Opera/9.80 (Windows NT 6.1; U; en)"
" Presto/2.7.62 Version/11.00",
"opera_12": "Opera/12.0 (Windows NT 5.1; U; en)"
" Presto/22.9.168 Version/12.00",
"opera_mini_7": "Opera/9.80 (Android; Opera Mini/7.0.29952/28.2075; en)"
" Presto/2.8.119 Version/11.10",
"opera_mini_9": "Opera/9.80 (J2ME/MIDP; Opera Mini/9"
" (Compatible; MSIE:9.0; iPhone; BlackBerry9700;"
" AppleWebKit/24.746; en) Presto/2.5.25 Version/10.54",
"konqueror_3": "Mozilla/5.0 (compatible; Konqueror/3.0; Linux)",
"konqueror_4": "Mozilla/5.0 (compatible; Konqueror/4.0; Linux)"
" KHTML/4.0.3 (like Gecko)",
"lynx_2_8": "Lynx/2.8.7rel.2 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/1.0.0a",
"w3m_0_5": "w3m/0.5.2 (Linux i686; it; Debian-3.0.6-3)",
"netscape_3": "Mozilla/3.0 (X11; I; AIX 2)",
"netscape_4": "Mozilla/4.0 (compatible; Mozilla/5.0 ; Linux i686)",
"netscape_4_5": "Mozilla/4.5 [en] (X11; I; SunOS 5.6 sun4u)",
"netscape_7": "Mozilla/5.0 (X11; U; SunOS sun4u; en-US; rv:1.0.1)"
" Gecko/20020921 Netscape/7.0",
"netscape_9": "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.8pre)"
" Gecko/20071015 Firefox/2.0.0.7 Navigator/9.0",
"palemoon_25": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:25.6)"
" Gecko/20150723 Firefox/31.9 PaleMoon/25.6.0",
"safari_1": "Mozilla/5.0 (Macintosh; PPC Mac OS X; en)"
" AppleWebKit/85.7 (KHTML, like Gecko) Safari/85.6",
"safari_2": "Mozilla/5.0 (Macintosh; PPC Mac OS; en)"
" AppleWebKit/412 (KHTML, like Gecko) Safari/412",
"safari_3": "Mozilla/5.0 (Macintosh; Intel Mac OS X; en)"
" AppleWebKit/522.7 (KHTML, like Gecko)"
" Version/3.0 Safari/522.7",
"safari_4": "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_6; en)"
" AppleWebKit/530.9+ (KHTML, like Gecko)"
"Version/4.0 Safari/528.16",
"safari_5": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en)"
" AppleWebKit/534.1+ (KHTML, like Gecko)"
" Version/5.0 Safari/533.16",
"safari_6": "Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X)"
" AppleWebKit/536.26 (KHTML, like Gecko)"
" Version/6.0 Mobile/10A5355d Safari/8536.25",
"safari_7": "Mozilla/5.0 (iPad; CPU OS 7_1_2 like Mac OS X)"
" AppleWebKit/537.51.2 (KHTML, like Gecko)"
" Version/7.0 Mobile/11D257 Safari/9537.53",
"safari_605": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_0)"
" AppleWebKit/605.1.15 (KHTML, like Gecko)"
" Version/16.1 Safari/605.1.15",
"vivaldi_5": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/107.0.0.0"
" Safari/537.36 Vivaldi/5.4.2753.51",
# Android phones
"galaxy_s7": "Mozilla/5.0 (Linux; Android 7.0; SM-G930VC Build/NRD90M; wv)"
" AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0"
" Chrome/58.0.3029.83 Mobile Safari/537.36",
"galaxy_s10": "Mozilla/5.0"
" (Linux; Android 9; SM-G973U Build/PPR1.180610.011)"
" AppleWebKit/537.36 (KHTML, like Gecko)"
" Chrome/69.0.3497.100 Mobile Safari/537.36",
"galaxy_s20": "Mozilla/5.0 (Linux; Android 10;"
" SM-G980F Build/QP1A.190711.020; wv) AppleWebKit/537.36"
" (KHTML, like Gecko) Version/4.0 Chrome/78.0.3904.96"
" Mobile Safari/537.36",
"galaxy_s22": "Mozilla/5.0 (Linux; Android 12;"
" SM-S906N Build/QP1A.190711.020; wv) AppleWebKit/537.36"
" (KHTML, like Gecko) Version/4.0 Chrome/80.0.3987.119"
" Mobile Safari/537.36",
"google_pixel": "Mozilla/5.0 (Linux; Android 7.1.1; Google Pixel"
" Build/NMF26F; wv) AppleWebKit/537.36 (KHTML, like Gecko)"
" Version/4.0 Chrome/54.0.2840.85 Mobile Safari/537.36",
"google_pixel4": "Mozilla/5.0 (Linux; Android 10; Google Pixel 4"
" Build/QD1A.190821.014.C2; wv) AppleWebKit/537.36"
" (KHTML, like Gecko) Version/4.0 Chrome/78.0.3904.108"
" Mobile Safari/537.36",
"google_pixel_6": "Mozilla/5.0 (Linux; Android 12; Pixel 6"
" Build/SD1A.210817.023; wv) AppleWebKit/537.36"
" (KHTML, like Gecko) Version/4.0 Chrome/94.0.4606.71"
" Mobile Safari/537.36",
"nexus_6p": "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 6P Build/MMB29P)"
" AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.83"
" Mobile Safari/537.36",
"sony_xperia_1": "Mozilla/5.0 (Linux; Android 9;"
" J8110 Build/55.0.A.0.552; wv) AppleWebKit/537.36"
" (KHTML, like Gecko) Version/4.0 Chrome/71.0.3578.99"
" Mobile Safari/537.36",
"htc_one_x10": "Mozilla/5.0 (Linux; Android 6.0; HTC One"
" X10 Build/MRA58K; wv) AppleWebKit/537.36"
" (KHTML, like Gecko) Version/4.0 Chrome/61.0.3163.98"
" Mobile Safari/537.36",
# iPhones
"iphone_6": "Mozilla/5.0"
" (Apple-iPhone7C2/1202.466; U; CPU like Mac OS X; en)"
" AppleWebKit/420+ (KHTML, like Gecko) Version/3.0"
" Mobile/1A543 Safari/419.3",
"iphone_7": "Mozilla/5.0"
" (iPhone9,3; U; CPU iPhone OS 10_0_1 like Mac OS X)"
" AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0"
" Mobile/14A403 Safari/602.1",
"iphone_8": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X)"
" AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0"
" Mobile/15A5341f Safari/604.1",
"iphone_x": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X)"
" AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0"
" Mobile/15A372 Safari/604.1",
"iphone_11": "Mozilla/5.0"
" (iPhone12,1; U; CPU iPhone OS 13_0 like Mac OS X)"
" AppleWebKit/602.1.50 (KHTML, like Gecko)"
" Version/10.0 Mobile/15E148 Safari/602.1",
"iphone_12": "Mozilla/5.0"
" (iPhone13,2; U; CPU iPhone OS 14_0 like Mac OS X)"
" AppleWebKit/602.1.50 (KHTML, like Gecko)"
" Version/10.0 Mobile/15E148 Safari/602.1",
"iphone_13_pro_max": "Mozilla/5.0 (iPhone14,3; U; CPU iPhone OS 15_0"
" like Mac OS X) AppleWebKit/602.1.50"
" (KHTML, like Gecko) Version/10.0"
" Mobile/19A346 Safari/602.1",
"iphone_se_3": "Mozilla/5.0 (iPhone14,6; U; CPU iPhone OS 15_4"
" like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko)"
" Version/10.0 Mobile/19E241 Safari/602.1",
# MS Windows phones
"ms_lumia_650": "Mozilla/5.0"
" (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152)"
" AppleWebKit/537.36 (KHTML, like Gecko)"
" Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254",
"ms_lumia_950": "Mozilla/5.0 (Windows Phone 10.0;"
" Android 4.2.1; Microsoft; Lumia 950)"
" AppleWebKit/537.36 (KHTML, like Gecko)"
" Chrome/46.0.2486.0 Mobile Safari/537.36 Edge/13.1058",
# Tablets
"galaxy_tab_s8": "Mozilla/5.0 (Linux; Android 12;"
" SM-X906C Build/QP1A.190711.020; wv) AppleWebKit/537.36"
" (KHTML, like Gecko) Version/4.0 Chrome/80.0.3987.119"
" Mobile Safari/537.36",
"lenovo_yoga_tab_11": "Mozilla/5.0 (Linux; Android 11; Lenovo YT-J706X)"
" AppleWebKit/537.36 (KHTML, like Gecko)"
" Chrome/96.0.4664.45 Safari/537.36",
"sony_xperia_tab_z4": "Mozilla/5.0 (Linux; Android 6.0.1;"
" SGP771 Build/32.2.A.0.253; wv) AppleWebKit/537.36"
" (KHTML, like Gecko) Version/4.0"
" Chrome/52.0.2743.98 Safari/537.36",
"galaxy_tab_s3": "Mozilla/5.0 (Linux; Android 7.0; SM-T827R4 Build/NRD90M)"
" AppleWebKit/537.36 (KHTML, like Gecko)"
" Chrome/60.0.3112.116 Safari/537.36",
"amazon_fire_hdx_7": "Mozilla/5.0"
" (Linux; Android 4.4.3; KFTHWI Build/KTU84M)"
" AppleWebKit/537.36 (KHTML, like Gecko) Silk/47.1.79"
" like Chrome/47.0.2526.80 Safari/537.36",
"lg_g_pad_7": "Mozilla/5.0 (Linux; Android 5.0.2; LG-V410/V41020c"
" Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko)"
" Version/4.0 Chrome/34.0.1847.118 Safari/537.36",
# E-Readers
"kindle_4": "Mozilla/5.0 (X11; U; Linux armv7l like Android; en-us)"
" AppleWebKit/531.2+ (KHTML, like Gecko) Version/5.0"
" Safari/533.2+ Kindle/3.0+",
"kindle_3": "Mozilla/5.0 (Linux; U; en-US) AppleWebKit/528.5+"
" (KHTML, like Gecko, Safari/528.5+) Version/4.0 Kindle/3.0"
" (screen 600x800; rotate)",
# Set tops
"chromecast": "Mozilla/5.0 (CrKey armv7l 1.5.16041) AppleWebKit/537.36"
" (KHTML, like Gecko) Chrome/31.0.1650.0 Safari/537.36",
"amazon_4k_fire_tv": "Mozilla/5.0 (Linux; Android 5.1; AFTS Build/LMY47O)"
" AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0"
" Chrome/41.99900.2250.0242 Safari/537.36",
"nexus_player": "Dalvik/2.1.0 (Linux; U; Android 6.0.1;"
" Nexus Player Build/MMB29T)",
"apple_tv_6": "AppleTV11,1/11.1",
"apple_tv_5": "AppleTV6,2/11.1",
"apple_tv_4": "AppleTV5,3/9.1.1",
# Game consoles
"playstation_5": "Mozilla/5.0 (PlayStation; PlayStation 5/2.26)"
" AppleWebKit/605.1.15 (KHTML, like Gecko)"
" Version/13.0 Safari/605.1.15",
"playstation_4": "Mozilla/5.0 (PlayStation 4 3.11) AppleWebKit/537.73"
" (KHTML, like Gecko)",
"xbox_x": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox Series X)"
" AppleWebKit/537.36 (KHTML, like Gecko)"
" Chrome/48.0.2564.82 Safari/537.36 Edge/20.02",
"xbox_one": "Mozilla/5.0"
" (Windows Phone 10.0; Android 4.2.1; Xbox; Xbox One)"
" AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0"
" Mobile Safari/537.36 Edge/13.10586",
"nintendo_switch": "Mozilla/5.0 (Nintendo Switch; WifiWebAuthApplet)"
" AppleWebKit/601.6 (KHTML, like Gecko) NF/4.0.0.5.10"
" NintendoBrowser/5.1.0.13343",
# Bots
"google_bot_2": "Mozilla/5.0 (compatible; Googlebot/2.1;"
" +http://www.google.com/bot.html)",
"bing_bot_2": "Mozilla/5.0 (compatible; bingbot/2.0;"
" +http://www.bing.com/bingbot.htm)",
"yahoo_bot": "Mozilla/5.0 (compatible; Yahoo! Slurp;"
" http://help.yahoo.com/help/us/ysearch/slurp)",
}
"""Implementation of the TwillBrowser."""
import pickle
import re
from contextlib import suppress
from http import HTTPStatus
from typing import (
IO,
Any,
Callable,
Dict,
List,
Optional,
Sequence,
Tuple,
Union,
cast,
)
from urllib.parse import urljoin
from httpx import (
BasicAuth,
Client,
ConnectError,
Cookies,
Headers,
InvalidURL,
Timeout,
)
from . import __version__, log
from .errors import TwillException
from .utils import (
CheckboxGroup,
FieldElement,
FormElement,
HtmlElement,
InputElement,
Link,
RadioGroup,
Response,
ResultWrapper,
UrlWithRealm,
get_equiv_refresh_interval,
html_to_tree,
print_form,
trunc,
unique_match,
)
__all__ = ["browser"]
def _set_http_connection_debuglevel(level: int) -> None:
"""Set the debug level for the connection pool."""
from http.client import HTTPConnection
HTTPConnection.debuglevel = level
class TwillBrowser:
"""A simple, stateful browser."""
user_agent = f"TwillBrowser/{__version__}"
def __init__(
self,
base_url: str = "",
app: Optional[Callable[..., Any]] = None,
follow_redirects: bool = True, # noqa: FBT001, FBT002
verify: Union[bool, str] = False, # noqa: FBT002
timeout: Union[None, float, Timeout] = 10,
) -> None:
"""Initialize the twill browser.
Optionally, you can send requests to a WSGI app instead over the
network, and you can specify a base URL for all requests.
The "follow_redirects" parameter has the default value True so that
the browser by default automatically follows all redirects.
The "verify" argument can be used to specify whether or how server
certificates shall be verified; this can also be a CA bundle path.
In the "timeout" argument you can specify the timeout in seconds.
"""
self.reset(
app=app,
base_url=base_url,
follow_redirects=follow_redirects,
verify=verify,
timeout=timeout,
)
def _assert_result_for(self, what: str) -> ResultWrapper:
if not self.result:
raise TwillException(f"Cannot get {what} since there is no page.")
return self.result
@property
def debug_level(self) -> int:
return self._debug_level
@debug_level.setter
def debug_level(self, level: int) -> None:
_set_http_connection_debuglevel(level)
self._debug_level = level
def close(self) -> None:
try:
client = self._client
except AttributeError:
pass
else:
client.close()
del self.result
del self.last_submit_button
del self.first_error
del self._client
del self._form
del self._form_files
del self._auth
del self._post_load_hooks
del self._history
def reset(
self,
base_url: str = "",
app: Optional[Callable[..., Any]] = None,
follow_redirects: bool = True, # noqa: FBT001, FBT002
verify: Union[bool, str] = False, # noqa: FBT002
timeout: Union[None, float, Timeout] = 10,
) -> None:
"""Reset the browser.
Optionally, you can send requests to a WSGI app instead over the
network, and you can specify a base URL for all requests.
The "follow_redirects" parameter has the default value True so that
the browser by default automatically follows all redirects.
The "verify" argument can be used to specify whether or how server
certificates shall be verified; this can also be a CA bundle path.
In the "timeout" argument you can specify the timeout in seconds.
"""
self.close()
self.result: Optional[ResultWrapper] = None
self.last_submit_button: Optional[InputElement] = None
self.first_error: Optional[str] = None
# whether meta refresh will be displayed
self.show_refresh = False
# debug level to be used for the connection pool
self._debug_level = 0
# Client stores cookies
self._client = Client(
app=app,
base_url=base_url,
follow_redirects=follow_redirects,
verify=verify,
timeout=timeout,
)
# A lxml FormElement, None until a form is selected
# replaces self._browser.form from mechanize
self._form: Optional[FormElement] = None
self._form_files: Dict[str, IO] = {}
# A dict of BasicAuth from httpx, keyed off URL
self._auth: Dict[UrlWithRealm, BasicAuth] = {}
# callables to be called after each page load.
self._post_load_hooks: List[Callable] = []
self._history: List[ResultWrapper] = []
# set default headers
self.reset_headers()
@property
def creds(self) -> Dict[UrlWithRealm, BasicAuth]:
"""Get the credentials for basic authentication."""
return self._auth
def add_creds(self, url: UrlWithRealm, user: str, password: str) -> None:
"""Set the credentials for basic authentication."""
self._auth[url] = BasicAuth(user, password)
def go(self, url: str) -> None:
"""Visit given URL."""
try_urls: List[str] = []
if "://" in url:
try_urls.append(url)
else: # URL does not have a schema
# if this is a relative URL, then assume that we want to tack it
# onto the end of the current URL
current_url = self.url
if current_url:
try_urls.append(urljoin(current_url, url))
# if this is an absolute URL, it may be just missing the 'http://'
# at the beginning, try fixing that (mimic browser behavior)
if not url.startswith((".", "/", "?")):
# noinspection HttpUrlsUsage
try_urls.append(f"http://{url}")
try_urls.append(f"https://{url}")
for try_url in try_urls:
try:
self._journey("open", try_url)
except (
OSError,
ConnectError,
InvalidURL,
UnicodeError,
) as error:
log.info("cannot go to '%s': %s", try_url, error)
else:
break
else:
raise TwillException(f"cannot go to '{url}'")
log.info("==> at %s", self.url)
def reload(self) -> None:
"""Tell the browser to reload the current page."""
self._journey("reload")
log.info("==> reloaded")
def back(self) -> None:
"""Return to previous page, if possible."""
try:
self._journey("back")
log.info("==> back to %s", self.url)
except TwillException:
log.warning("==> back at empty page")
@property
def code(self) -> int:
"""Get the HTTP status code received for the current page."""
return self._assert_result_for("status code").http_code
@property
def encoding(self) -> Optional[str]:
"""Get the encoding used by the server for the current page."""
return None if self.result is None else self.result.encoding
@property
def html(self) -> str:
"""Get the HTML for the current page."""
return self._assert_result_for("HTML").text
@property
def dump(self) -> bytes:
"""Get the binary content of the current page."""
return self._assert_result_for("content dump").content
@property
def title(self) -> Optional[str]:
return self._assert_result_for("title").title
@property
def url(self) -> Optional[str]:
"""Get the URL of the current page."""
return self.result.url if self.result else None
def find_link(self, pattern: str) -> Optional[Link]:
"""Find the first link matching the given regular expression pattern.
The pattern is searched in the URL and in the link text.
"""
return self._assert_result_for("links").find_link(pattern)
def find_links(self, pattern: str) -> Optional[List[Link]]:
"""Find all links matching the given regular expression pattern.
The pattern is searched in the URL and in the link text.
"""
return self._assert_result_for("links").find_links(pattern)
def follow_link(self, link: Union[str, Link]) -> None:
"""Follow the given link."""
self._journey("follow_link", link)
log.info("==> at %s", self.url)
@property
def headers(self) -> Headers:
"""Return the request headers currently used by the browser."""
return self._client.headers
def reset_headers(self) -> None:
"""Reset the request headers currently used by the browser."""
self.headers.clear()
self.headers.update(
{"Accept": "text/html; */*", "User-Agent": self.user_agent}
)
@property
def response_headers(self) -> Headers:
"""Get the headers returned with the current page."""
return self._assert_result_for("headers").headers
@property
def agent_string(self) -> Optional[str]:
"""Get the user agent string."""
agent = self.headers.get("User-Agent")
if isinstance(agent, bytes):
agent = agent.decode()
return agent
@agent_string.setter
def agent_string(self, agent: str) -> None:
"""Set the user agent string to the given value."""
self.headers["User-Agent"] = agent
@property
def timeout(self) -> Union[None, float, Timeout]:
"""Get the request timeout in seconds."""
timeout = self._client.timeout
if timeout.connect == timeout.read == timeout.write == timeout.pool:
return timeout.connect or None
return timeout
@timeout.setter
def timeout(self, timeout: Union[None, float, Timeout]) -> None:
"""Set the request timeout in seconds."""
self._client.timeout = timeout # type: ignore[assignment]
def show_forms(self) -> None:
"""Pretty-print all forms on the page.
Include the global form (form elements outside <form> pairs)
as forms[0] if present.
"""
for n, form in enumerate(self.forms, 1):
print_form(form, n)
def show_links(self) -> None:
"""Pretty-print all links on the page."""
info = log.info
links = self.links
if links:
info("\nLinks (%d links total):\n", len(links))
for n, link in enumerate(links, 1):
info("\t%d. %s ==> %s", n, trunc(link.text, 40), link.url)
info("")
else:
info("\n** no links **\n")
def show_history(self) -> None:
"""Pretty-print the history of links visited."""
info = log.info
history = self._history
if history:
info("\nHistory (%d pages total):\n", len(history))
for n, page in enumerate(history, 1):
info("\t%d. %s", n, page.url)
info("")
else:
info("\n** no history **\n")
@property
def links(self) -> List[Link]:
"""Return a list of all links on the page."""
return self._assert_result_for("links").links
@property
def history(self) -> List[ResultWrapper]:
"""Return a list of all pages visited by the browser."""
return self._history
@property
def forms(self) -> List[FormElement]:
"""Return a list of forms on the page.
This includes the global form at index 0 if present.
"""
return self._assert_result_for("forms").forms
def form(self, name: Union[str, int] = 1) -> Optional[FormElement]:
"""Return the first form that matches the given form name."""
return self._assert_result_for("form").form(name)
def form_field(
self,
form: Optional[FormElement] = None,
name_or_num: Union[str, int] = 1,
) -> FieldElement:
"""Return the control that matches the given field name.
Must be a *unique* regex/exact string match, but the returned
control can also be a CheckboxGroup or RadioGroup list.
Raises a TwillException if no such field or multiple fields are found.
"""
if form is None:
form = self._form
if form is None:
raise TwillException("Must specify a form for the field")
inputs = form.inputs
found_multiple = False
name = name_or_num if isinstance(name_or_num, str) else None
if name:
if name in form.fields:
match_name = [c for c in inputs if c.name == name]
if len(match_name) > 1:
if all(
getattr(c, "type", None) == "checkbox"
for c in match_name
):
return CheckboxGroup(
cast(List[InputElement], match_name)
)
if all(
getattr(c, "type", None) == "radio" for c in match_name
):
return RadioGroup(cast(List[InputElement], match_name))
else:
match_name = None
# test exact match to id
match_id = [c for c in inputs if c.get("id") == name]
if match_id:
if unique_match(match_id):
return match_id[0]
found_multiple = True
# test exact match to name
if match_name:
if unique_match(match_name):
return match_name[0]
found_multiple = True
num = name_or_num if isinstance(name_or_num, int) else None
if num is None and name and name.isdigit():
with suppress(ValueError):
num = int(name)
# test field index
if num is not None:
with suppress(IndexError):
return list(inputs)[num - 1]
if name:
# test regex match
regex = re.compile(name)
match_name = [c for c in inputs if c.name and regex.search(c.name)]
if match_name:
if unique_match(match_name):
return match_name[0]
found_multiple = True
# test field values
match_value = [c for c in inputs if c.value == name]
if match_value:
if len(match_value) == 1:
return match_value[0]
found_multiple = True
# error out
if found_multiple:
raise TwillException(f'multiple matches to "{name_or_num}"')
raise TwillException(f'no field matches "{name_or_num}"')
def add_form_file(self, field_name: str, fp: IO) -> None:
self._form_files[field_name] = fp
def clicked(self, form: FormElement, control: FieldElement) -> None:
"""Record a 'click' in a specific form."""
if self._form != form:
# construct a function to choose a particular form;
# select_form can use this to pick out a precise form.
self._form = form
self.last_submit_button = None
# record the last submit button clicked.
if getattr(control, "type", None) in ("submit", "image"):
self.last_submit_button = cast(InputElement, control)
def submit(
self,
field_name: Optional[Union[str, int]] = None,
form_name: Optional[Union[str, int]] = None,
) -> None:
"""Submit the last or specified form using the given field."""
forms = self.forms
if not forms:
raise TwillException("There are no forms on this page.")
ctl: Optional[InputElement] = None
form = self._form if form_name is None else self.form(form_name)
if form is None:
if len(forms) > 1:
raise TwillException(
"There is more than one form on this page;"
" therefore you must specify a form explicitly"
" or select one (use 'fv') before submitting."
)
form = forms[0]
action = form.action or ""
if "://" not in action:
form.action = urljoin(self.url, action)
# no field name? see if we can use the last submit button clicked...
if field_name is None:
if form is not self._form or self.last_submit_button is None:
# get first submit button in form.
submits = [
c
for c in form.inputs
if getattr(c, "type", None) in ("submit", "image")
]
if submits:
ctl = cast(InputElement, submits[0])
else:
ctl = self.last_submit_button
else:
# field name given; find it
ctl = cast(InputElement, self.form_field(form, field_name))
# now set up the submission by building the request object that
# will be sent in the form submission.
if ctl is None:
log.debug("Note: submit without using a submit button")
else:
log.info(
"Note: submit is using submit button:"
" name='%s', value='%s'",
ctl.get("name"),
ctl.value,
)
# Add referer information. This may require upgrading the
# request object to have an 'add_unredirected_header' function.
# @BRT: For now, the referrer is always the current page
# @CTB: this seems like an issue for further work.
# Note: We do not set Content-Type from form.attrib.get('enctype'),
# since httpx does a much better job at setting the proper one.
headers = {"Referer": self.url}
payload = form.form_values()
if ctl is not None:
name = ctl.get("name")
if name:
payload.append((name, ctl.value or ""))
payload_dict = self._make_payload_dict(payload)
# now actually GO
if form.method == "POST":
if self._form_files:
log.debug("Submitting files: %r", self._form_files)
result = self._client.post(
form.action,
data=payload_dict,
headers=headers,
files=self._form_files,
)
else:
result = self._client.post(
form.action, data=payload_dict, headers=headers
)
else:
result = self._client.get(
form.action, params=payload_dict, headers=headers
)
self._form = None
self._form_files.clear()
self.last_submit_button = None
if self.result is not None:
self._history.append(self.result)
self.result = ResultWrapper(result)
def cookies(self) -> Cookies:
"""Get all cookies from the current client session."""
return self._client.cookies
def save_cookies(self, filename: str) -> None:
"""Save cookies into the given file."""
with open(filename, "wb") as f:
pickle.dump(dict(self._client.cookies), f)
def load_cookies(self, filename: str) -> None:
"""Load cookies from the given file."""
with open(filename, "rb") as f:
self._client.cookies = pickle.load(f) # noqa: S301
def clear_cookies(self) -> None:
"""Delete all the cookies."""
self._client.cookies.clear()
def show_cookies(self) -> None:
"""Pretty-print all the cookies."""
info = log.info
cookies = self._client.cookies
n = len(cookies)
if n:
log.info("\nThere are %d cookie(s) in the cookie jar.\n", n)
for n, cookie in enumerate(cookies, 1):
info("\t%d. %s", n, cookie)
info("")
else:
log.info("\nThere are no cookies in the cookie jar.\n", n)
def decode(self, value: Union[bytes, str]) -> str:
"""Decode a value using the current encoding."""
if isinstance(value, bytes):
value = value.decode(self.encoding or "utf-8")
return value
def xpath(self, path: str) -> List[HtmlElement]:
"""Evaluate an xpath expression."""
return self._assert_result_for("xpath").xpath(path)
def _make_payload_dict(
self,
payload: Sequence[Tuple[str, Union[str, bytes]]],
) -> Dict[str, Union[str, List[Union[str]]]]:
"""Prepare a payload by decoding bytes and converting to a dict."""
encoding = self.encoding or "utf-8"
data: Dict[str, Union[str, List[Union[str]]]] = {}
for key, value in payload:
new_value = (
value if isinstance(value, str) else value.decode(encoding)
)
try:
existing_value = data[key]
except KeyError:
data[key] = new_value
else:
if isinstance(existing_value, list):
existing_value.append(new_value)
else:
data[key] = [existing_value, new_value]
return data
@staticmethod
def _get_meta_refresh(
response: Response,
) -> Tuple[Optional[int], Optional[str]]:
"""Get meta refresh interval and url from a response."""
try:
tree = html_to_tree(response.text)
except ValueError:
# may happen when there is an XML encoding declaration
tree = html_to_tree(response.content)
try:
content = tree.xpath( # "refresh" is case-insensitive
"//meta[translate(@http-equiv,'REFSH','refsh')="
"'refresh'][1]/@content"
)[0]
interval, url = content.split(";", 1)
interval = int(interval)
if interval < 0:
raise ValueError
url = url.strip().strip('"').strip().strip("'").strip()
url = url.split("=", 1)
if url[0].strip().lower() != "url":
raise IndexError
url = url[1].strip().strip('"').strip().strip("'").strip()
except (IndexError, ValueError):
interval = url = None
else:
if "://" not in url: # relative URL, adapt
url = str(response.url.join(url))
return interval, url
_re_basic_auth = re.compile('Basic realm="(.*)"', re.I)
def _journey(self, func_name: str, *args, **_kwargs) -> None:
"""Execute the function with the given name and arguments.
The name should be one of 'open', 'reload', 'back', or 'follow_link'.
This method then runs that function with the given arguments and turns
the results into a nice friendly standard ResultWrapper object, which
is stored as self.result.
(Idea stolen from Python Browsing Probe (PBP).)
"""
self._form = None
self._form_files.clear()
self.last_submit_button = None
if func_name == "open":
url = args[0]
elif func_name == "follow_link":
url = args[0]
with suppress(AttributeError): # may be already a url
url = url.url
if "://" not in url and self.url:
url = urljoin(self.url, url)
elif func_name == "reload":
url = self.url
elif func_name == "back":
try:
self.result = self._history.pop()
except IndexError as error:
raise TwillException("Cannot go further back") from error
return
else:
raise TwillException(f"Unknown function {func_name!r}")
result = self._client.get(url)
if result.status_code == HTTPStatus.UNAUTHORIZED:
header = result.headers.get("WWW-Authenticate")
match_realm = self._re_basic_auth.match(header)
if match_realm:
realm = match_realm.group(1)
auth = self._auth.get((url, realm)) or self._auth.get(url)
if auth:
result = self._client.get(url, auth=auth)
# handle redirection via meta refresh (not handled in requests)
refresh_interval = get_equiv_refresh_interval()
if refresh_interval:
visited = set() # break circular refresh chains
while True:
interval, url = self._get_meta_refresh(result)
if not url:
break
if interval >= refresh_interval:
(log.info if self.show_refresh else log.debug)(
"Meta refresh interval too long: %d", interval
)
break
if url in visited:
log.warning("Circular meta refresh detected!")
break
(log.info if self.show_refresh else log.debug)(
"Meta refresh to new URL: %s", url
)
result = self._client.get(url)
visited.add(url)
if func_name in ("follow_link", "open") and (
# if we're really reloading and just didn't say so, don't store
self.result is not None and self.result.url != result.url
):
self._history.append(self.result)
self.result = ResultWrapper(result)
browser = TwillBrowser() # the global twill browser instance
"""The twill shell commands.
Implementation of all the individual twill commands that are available
through the twill command line script.
"""
import getpass
import re
import sys
import time
from os.path import sep
from typing import Any, Dict, Optional
from . import log, set_err_out, set_output, utils
from .agents import agents
from .browser import browser
from .errors import TwillAssertionError, TwillException
from .namespaces import get_twill_glocals
# noinspection SpellCheckingInspection
__all__ = [
"add_auth",
"add_cleanup",
"add_extra_header",
"agent",
"back",
"browser",
"clear_cookies",
"clear_extra_headers",
"code",
"config",
"debug",
"echo",
"exit",
"extend_with",
"find",
"follow",
"form_action",
"formaction",
"fa",
"form_clear",
"formclear",
"form_file",
"formfile",
"form_value",
"formvalue",
"fv",
"get_input",
"getinput",
"get_password",
"getpassword",
"go",
"info",
"load_cookies",
"not_find",
"notfind",
"options",
"redirect_error",
"redirect_output",
"reload",
"reset_browser",
"reset_error",
"reset_output",
"run",
"run_file",
"runfile",
"rf",
"save_cookies",
"save_html",
"setglobal",
"set_global",
"setlocal",
"set_local",
"show",
"showcookies",
"show_cookies",
"show_extra_headers",
"showforms",
"show_forms",
"showhistory",
"show_history",
"showhtml",
"show_html",
"showlinks",
"show_links",
"sleep",
"submit",
"tidy_ok",
"title",
"url",
]
def reset_browser(base_url: str = "") -> None:
""">> reset_browser [base_url]
Reset the browser completely.
"""
browser.reset(base_url=base_url)
options.clear()
options.update(default_options)
# noinspection PyShadowingBuiltins
def exit(code: str = "0") -> None:
""">> exit [<code>]
Exit twill with given exit code (default 0, "no error").
"""
raise SystemExit(int(code) if code else 0)
def go(url: str) -> None:
""">> go <url>
Visit the URL given.
"""
browser.go(url)
def reload() -> None:
""">> reload
Reload the current URL.
"""
browser.reload()
def timeout(timeout: float = 10) -> None:
""">> timeout [timeout]
Set the request timeout (0 for no timeout).
"""
if isinstance(timeout, str):
timeout = float(timeout) if timeout else 0
browser.timeout = timeout or None
def code(should_be: int) -> None:
""">> code <int>
Check to make sure the response code for the last page is as given.
"""
if not isinstance(should_be, int):
should_be = int(should_be)
if browser.code != should_be:
raise TwillAssertionError(f"code is {browser.code} != {should_be}")
def tidy_ok() -> None:
""">> tidy_ok
Assert that 'tidy' does not produce any warnings or errors when run on
the current page.
If 'tidy' cannot be run, will fail silently (unless 'require_tidy' option
is true; see 'config' command).
"""
page = browser.html
if page is None:
raise TwillAssertionError("not viewing HTML!")
clean_page, errors = utils.run_tidy(page)
if clean_page is None: # tidy doesn't exist...
if options.get("require_tidy"):
raise TwillAssertionError("cannot run 'tidy'")
elif errors:
raise TwillAssertionError(f"tidy errors:\n====\n{errors}\n====\n")
def url(should_be: str) -> str:
""">> url <pattern>
Check to make sure that the current URL matches the regex pattern.
The local variable __match__ is set to the matching part of the URL.
"""
regex = re.compile(should_be)
current_url = browser.url
if current_url is None:
current_url = ""
m = None
else:
m = regex.search(current_url)
if not m:
raise TwillAssertionError(
f"current url is '{current_url}';\n"
f"does not match '{should_be}'\n"
)
match_str = m.group(1 if m.groups() else 0)
global_dict, local_dict = get_twill_glocals()
local_dict["__match__"] = match_str
return match_str
def follow(what: str) -> str:
""">> follow <pattern>
Find the first link on the page matching the given regex pattern and
then visit it.
"""
link = browser.find_link(what)
if link:
browser.follow_link(link)
if not browser.url:
raise TwillAssertionError(f"Cannot follow link '{link}'")
return browser.url
raise TwillAssertionError(f"no links match to '{what}'")
_find_flags = {"i": re.IGNORECASE, "m": re.MULTILINE, "s": re.DOTALL}
def _parse_find_flags(flags: str) -> int:
"""Helper function to parse the find flags."""
re_flags = 0
for char in flags:
try:
re_flags |= _find_flags[char]
except IndexError as error:
raise TwillAssertionError(
f"unknown 'find' flag {char!r}"
) from error
return re_flags
def find(what: str, flags: str = "") -> str:
""">> find <pattern> [<flags>]
Succeed if the regular expression pattern can be found on the page.
Sets the local variable __match__ to the matching text.
Flags is a string consisting of the following characters:
* i: ignore case
* m: multi-line
* s: dot matches all
* x: use XPath expressions instead of regular expression
For explanations of regular expressions, please see the Python re module
documentation.
"""
page = browser.html
local_dict = get_twill_glocals()[1]
if "x" in flags:
elements = browser.xpath(what)
if not elements:
raise TwillAssertionError(f"no element to path '{what}'")
match_str = elements[0].text or ""
else:
match = re.search(what, page, flags=_parse_find_flags(flags))
if not match:
raise TwillAssertionError(f"no match to '{what}'")
match_str = match.group(1 if match.groups() else 0)
local_dict["__match__"] = match_str
return match_str
def not_find(what: str, flags: str = "") -> None:
""">> not_find <pattern> [<flags>]
Fail if the regular expression pattern can be found on the page.
"""
try:
find(what, flags)
except TwillAssertionError:
pass
else:
raise TwillAssertionError(f"match to '{what}'")
# noinspection SpellCheckingInspection
notfind = not_find # backward compatibility and convenience
def back() -> None:
""">> back
Return to the previous page.
"""
browser.back()
def show(what: Optional[str] = None) -> None:
""">> show [<objects>]
Show the specified objects (html, cookies, forms, links, history).
"""
if not what:
what = "html"
command = None
if what.isalpha():
command_name = f"show_{what}"
if command_name in __all__:
command = globals().get(command_name)
if not command:
raise TwillException(f'Cannot show "{what}".')
command()
def show_html() -> None:
""">> show_html
Show the HTML for the current page or show the specified objects
(which can be cookies, forms, history or links).
Note: Use browser.html to get the HTML programmatically.
"""
html = browser.html.strip()
log.info("")
log.info(html)
log.info("")
# noinspection SpellCheckingInspection
showhtml = show_html # backward compatibility and consistency
def echo(*strs: str) -> None:
""">> echo <list> <of> <strings>
Echo the arguments to the screen.
"""
log.info(" ".join(map(str, strs)))
def save_html(filename: Optional[str] = None) -> None:
""">> save_html [<filename>]
Save the HTML for the current page into <filename>.
If no filename given, construct the filename from the URL.
"""
html = browser.html
if html is None:
log.warning("No page to save.")
return
if filename is None:
url = browser.url
if url:
url = url.split("?", 1)[0]
filename = url.rsplit("/", 1)[-1]
if not filename:
filename = "index.html"
log.info("Using filename '%s'.", filename)
encoding = browser.encoding or "utf-8"
try:
with open(filename, "w", encoding=encoding) as f:
f.write(html)
except UnicodeEncodeError:
if encoding == "utf-8":
raise
with open(filename, "w", encoding="utf-8") as f:
f.write(html)
def sleep(interval: str = "1") -> None:
""">> sleep [<interval>]
Sleep for the specified amount of time.
If no interval is given, sleep for 1 second.
"""
time.sleep(float(interval))
def agent(what: str) -> None:
""">> agent <agent>
Set the agent string (identifying the browser brand).
Some convenient shortcuts:
chrome_107, firefox_106, safari_605, edge_107, ie_11.
See twill.agents for a list of all available shortcuts.
"""
what = what.strip()
agent = agents.get(what, what)
browser.agent_string = agent
def submit(
submit_button: Optional[str] = None, form_name: Optional[str] = None
) -> None:
""">> submit [<submit_button> [<form_name>]]
Submit the current form (the one last clicked on) by clicking on the
given submission button. If no 'submit_button' is given, submit the
current form by using the last clicked submit button.
The form to submit is the last form clicked on with a 'form_value' command
unless explicitly specified given the
The button used to submit is chosen based on 'submit_button'.
If 'submit_button' is given, it's matched against buttons using
the same rules that 'form_value' uses. If 'button_name' is not given,
this function uses the last submit button clicked on by 'form_value'.
If none can be found, it submits the form with no submit button clicked.
"""
browser.submit(submit_button, form_name)
def show_forms() -> None:
""">> show_forms
Show all the forms on the current page.
Note: Use browser.forms to get the forms programmatically.
"""
browser.show_forms()
# noinspection SpellCheckingInspection
showforms = show_forms # backward compatibility and convenience
def show_links() -> None:
""">> show_links
Show all the links on the current page.
Note: Use browser.links to get the links programmatically.
"""
browser.show_links()
# noinspection SpellCheckingInspection
showlinks = show_links # backward compatibility and convenience
def show_history() -> None:
""">> show_history
Show the browser history (what URLs were visited).
Note: Use browser.history to get the history programmatically.
"""
browser.show_history()
# noinspection SpellCheckingInspection
showhistory = show_history # backward compatibility and convenience
def form_clear(form_name: str) -> None:
""">> form_clear <form_name>
Run 'clear' on all the controls in this form.
"""
form = browser.form(form_name)
if form is None:
raise TwillAssertionError("Form not found")
for control in form.inputs:
if not (
"readonly" in control.attrib
or "disabled" in control.attrib
or getattr(control, "type", None) in ("submit", "image", "hidden")
):
del control.value
browser.last_submit_button = None
# noinspection SpellCheckingInspection
formclear = form_clear # backward compatibility and convenience
def form_value(form_name: str, field_name: str, value: str) -> None:
""">> form_value <form_name> <field_name> <value>
Set value of a form field.
There are some ambiguities in the way 'form_value' deals with lists:
'form_value' will *add* the given value to a list of multiple selection,
for lists that allow it.
Forms are matched against 'form_name' as follows:
1. regex match to actual form name;
2. if 'form_name' is an integer, it's tried as an index.
Form controls are matched against 'field_name' as follows:
1. unique exact match to control name;
2. unique regex match to control name;
3. if field_name is an integer, it's tried as an index;
4. unique & exact match to submit-button values.
'form_value' ignores read-only fields completely; if they're readonly,
nothing is done, unless the config options ('config' command) are
changed.
'form_value' is available as 'fv' as well.
"""
form = browser.form(form_name)
if form is None:
raise TwillAssertionError("Form not found")
control = browser.form_field(form, field_name)
browser.clicked(form, control)
attrib = getattr(control, "attrib", {})
if "readonly" in attrib:
if options["readonly_controls_writeable"]:
log.info("Forcing read-only form field to writeable.")
del attrib["readonly"]
else:
log.info("Form field is read-only or ignorable; nothing done.")
return
if getattr(control, "type", None) == "file":
raise TwillException(
'form field is for file upload; use "form_file" instead'
)
value = browser.decode(value)
utils.set_form_control_value(control, value)
# noinspection SpellCheckingInspection
fv = formvalue = form_value # backward compatibility and convenience
def form_action(form_name: str, action_url: str) -> None:
""">> form_action <form_name> <action_url>
Sets action parameter on form to action_url.
'form_action' is available as 'fa' as well.
"""
form = browser.form(form_name)
if form is None:
raise TwillAssertionError("Form not found")
log.info("Setting action for form %s to %s.", form, action_url)
form.action = action_url
# noinspection SpellCheckingInspection
fa = formaction = form_action # backward compatibility and convenience
def form_file(
form_name: str,
field_name: str,
filename: str,
content_type: Optional[str] = None,
) -> None:
""">> form_file <form_name> <field_name> <filename> [<content_type>]
Upload a file via an "upload file" form field.
"""
filename = filename.replace("/", sep)
form = browser.form(form_name)
if form is None:
raise TwillAssertionError("Form not found")
control = browser.form_field(form, field_name)
if getattr(control, "type", None) != "file":
raise TwillException("ERROR: field is not a file upload field!")
browser.clicked(form, control)
plain = content_type and content_type.startswith(("plain/", "html/"))
fp = open(filename, "r" if plain else "rb") # noqa: SIM115
browser.add_form_file(field_name, fp)
log.info(
'Added file "%s" to file upload field "%s".', filename, field_name
)
# noinspection SpellCheckingInspection
formfile = form_file # backward compatibility and convenience
def extend_with(module_name: str) -> None:
""">> extend_with <module_name>
Import contents of given module.
"""
global_dict, local_dict = get_twill_glocals()
exec(f"from {module_name} import *", global_dict)
# now add the commands into the commands available for the shell,
# and print out some nice stuff about what the extension module does.
mod = sys.modules[module_name]
from . import parse, shell
fn_list = getattr(mod, "__all__", None)
if fn_list is None:
fn_list = [fn for fn in dir(mod) if callable(getattr(mod, fn))]
for command in fn_list:
fn = getattr(mod, command)
shell.add_command(command, fn.__doc__)
parse.command_list.append(command)
info, debug = log.info, log.debug
info("Imported extension module '%s'.", module_name)
debug("(at %s)", mod.__file__)
if shell.interactive:
if mod.__doc__:
info("\nDescription:\n\n%s\n", mod.__doc__.strip())
elif fn_list:
info("New commands:\n")
for name in fn_list:
info("\t%s", name)
info("")
def get_input(prompt: str) -> str:
""">> get_input <prompt>
Get input, store it in '__input__'.
"""
local_dict = get_twill_glocals()[1]
inp = input(prompt)
local_dict["__input__"] = inp
return inp
# noinspection SpellCheckingInspection
getinput = get_input # backward compatibility and convenience
def get_password(prompt: str) -> str:
""">> get_password <prompt>
Get a password ("invisible input"), store it in '__password__'.
"""
local_dict = get_twill_glocals()[1]
# we use sys.stdin here in order to get the same behaviour on Unix
# as on other platforms and for better testability of this function
inp = getpass.getpass(prompt, sys.stdin)
local_dict["__password__"] = inp
return inp
# noinspection SpellCheckingInspection
getpassword = get_password # backward compatibility and convenience
def save_cookies(filename: str) -> None:
""">> save_cookies <filename>
Save all the current cookies to the given file.
"""
browser.save_cookies(filename)
def load_cookies(filename: str) -> None:
""">> load_cookies <filename>
Clear the cookie jar and load cookies from the given file.
"""
browser.load_cookies(filename)
def clear_cookies() -> None:
""">> clear_cookies
Clear the cookie jar.
"""
browser.clear_cookies()
def show_cookies() -> None:
""">> show_cookies
Show all the cookies in the cookie jar.
Note: Use browser.cookies to get the cookies programmatically.
"""
browser.show_cookies()
# noinspection SpellCheckingInspection
showcookies = show_cookies # backward compatibility and convenience
def add_auth(realm: str, uri: str, user: str, passwd: str) -> None:
""">> add_auth <realm> <uri> <user> <passwd>
Add HTTP Basic Authentication information for the given realm/uri.
"""
if realm is not None:
browser.add_creds((uri, realm), user, passwd)
log.info(
"Added auth info: realm '%s' / URI '%s' / user '%s'.",
realm,
uri,
user,
)
if realm is None or options["with_default_realm"]:
browser.add_creds(uri, user, passwd)
if realm is None:
log.info("Added auth info: URI '%s' / user '%s'.", uri, user)
def debug(what: str, level: str) -> None:
""">> debug <what> <level>
<what> can be:
* http (any level >= 1), to display the HTTP transactions.
* commands (any level >= 1), to display the commands being executed.
* equiv-refresh (any level >= 1) to display HTTP-EQUIV refresh handling.
"""
from . import parse
try:
num_level = int(level)
except ValueError:
num_level = 1 if utils.make_boolean(level) else 0
log.info("DEBUG: Setting %s debugging to level %d.", what, num_level)
if what == "http":
browser.debug_level = num_level
elif what == "equiv-refresh":
browser.show_refresh = num_level > 0
elif what == "commands":
parse.log_commands(num_level > 0)
else:
raise TwillException(f'Unknown debugging type: "{what}"')
def run(cmd: str) -> None:
""">> run <command>
<command> can be any valid Python command; 'exec' is used to run it.
"""
# @CTB: use pyparsing to grok the command? make sure that quoting works...
# execute command.
global_dict, local_dict = get_twill_glocals()
# set __url__
local_dict["__cmd__"] = cmd
local_dict["__url__"] = browser.url
exec(cmd, global_dict, local_dict)
def run_file(*args: str) -> None:
""">> run_file <file1> [<file2> ...]
Execute the given twill scripts or directories of twill scripts.
'run_file' is available as 'rf' as well.
"""
from . import parse
filenames = utils.gather_filenames(args)
for filename in filenames:
parse.execute_file(filename, no_reset=True)
# noinspection SpellCheckingInspection
rf = runfile = run_file # backward compatibility and convenience
def add_cleanup(*args: str) -> None:
""">> add_cleanup <file1> [<file2> ...]
Execute the given twill scripts after the current twill script.
"""
local_dict = get_twill_glocals()[1]
cleanups = local_dict.setdefault("__cleanups__", [])
filenames = utils.gather_filenames(args)
log.debug("Adding cleanup scripts: %s", ", ".join(filenames))
cleanups.extend(reversed(filenames))
def set_global(name: str, value: str) -> None:
"""set_global <name> <value>
Sets the variable <name> to the value <value> in the global namespace.
"""
global_dict, local_dict = get_twill_glocals()
global_dict[name] = value
# noinspection SpellCheckingInspection
setglobal = set_global # backward compatibility and convenience
def set_local(name: str, value: str) -> None:
"""set_local <name> <value>
Sets the variable <name> to the value <value> in the local namespace.
"""
global_dict, local_dict = get_twill_glocals()
local_dict[name] = value
# noinspection SpellCheckingInspection
setlocal = set_local # backward compatibility and convenience
def title(what: str) -> str:
""">> title <pattern>
Succeed if the regular expression pattern is in the page title.
"""
regex = re.compile(what)
title = browser.title
if title is None:
log.info("The page has no title.")
else:
log.info("The title is '%s'.", title)
m = regex.search(title) if title else None
if m is None:
raise TwillAssertionError(f"The title does not contain '{what}'.")
match_str = m.group(1 if m.groups() else 0)
global_dict, local_dict = get_twill_glocals()
local_dict["__match__"] = match_str
return match_str
def redirect_output(filename: str) -> None:
""">> redirect_output <filename>
Append all twill output to the given file.
"""
fp = open(filename, "a", encoding="utf-8") # noqa: SIM115
set_output(fp)
def reset_output() -> None:
""">> reset_output
Reset twill output to go to the screen.
"""
set_output(None)
def redirect_error(filename: str) -> None:
""">> redirect_error <filename>
Append all twill error output to the given file.
"""
fp = open(filename, "a", encoding="utf-8") # noqa: SIM115
set_err_out(fp)
def reset_error() -> None:
""">> reset_error
Reset twill error output to go to the screen.
"""
set_err_out(None)
def add_extra_header(header_key: str, header_value: str) -> None:
""">> add_header <name> <value>
Add an HTTP header to each HTTP request. See 'show_extra_headers' and
'clear_extra_headers'.
"""
browser.headers[header_key] = header_value
def show_extra_headers() -> None:
""">> show_extra_headers
Show any extra headers being added to each HTTP request.
"""
info = log.info
headers = browser.headers
if headers:
info("\nThe following HTTP headers are added to each request:\n")
for key, value in headers.items():
info('\t"%s" = "%s"', key, value)
info("")
else:
info("** no extra HTTP headers **")
def clear_extra_headers() -> None:
""">> clear_extra_headers
Remove all user-defined HTTP headers. See 'add_extra_header' and
'show_extra_headers'.
"""
browser.reset_headers()
default_options: Dict[str, Any] = {
"equiv_refresh_interval": 2,
"readonly_controls_writeable": False,
"require_tidy": False,
"with_default_realm": False,
}
options = default_options.copy() # the global options dictionary
def config(key: Optional[str] = None, value: Any = None) -> None:
""">> config [<key> [<int value>]]
Configure/report various options. If no <value> is given, report
the current key value; if no <key> given, report current settings.
Options starting with "tidy_" will be used to configure HTML tidy.
So far:
* 'equiv_refresh_interval', default 2 -- time limit for HTTP-EQUIV=REFRESH
* 'readonly_controls_writeable', default False -- all controls writeable
* 'require_tidy', default False -- *require* that tidy be installed
* 'with_default_realm', default False -- use a default realm for HTTP AUTH
"""
info = log.info
if key is None:
keys = sorted(options)
info("\nCurrent configuration:\n")
for k in keys:
info("\t%s : %s", k, options[k])
info("")
else:
v = options.get(key)
if v is None and not key.startswith("tidy_"):
log.error("no such configuration key '%s'", key)
info("valid keys are: %s", sorted(options))
raise TwillException(f"no such configuration key: {key!r}")
if value is None:
info("\nkey %s: value %s\n", key, v)
else:
if isinstance(v, bool):
value = utils.make_boolean(value)
elif isinstance(v, int):
value = utils.make_int(value)
options[key] = value
def info() -> None:
""">> info
Report information on current page.
"""
current_url = browser.url
if current_url is None:
log.warning("We're not on a page!")
return
content_type = browser.response_headers["content-type"]
if isinstance(content_type, bytes):
content_type = content_type.decode()
is_html = content_type and content_type.split(";", 1)[0] == "text/html"
code = browser.code
info = log.info
info("\tURL: %s", current_url)
info("\tHTTP code: %s", code)
info("\tContent type: %s%s", content_type, " (HTML)" if is_html else "")
if is_html:
title = browser.title
info("\tPage title: %s", title)
forms = browser.forms
if len(forms):
info("\tThis page contains %d form(s)", len(forms))
info("")
"""The twill exceptions."""
class TwillException(Exception): # noqa: N818
"""General twill exception."""
class TwillAssertionError(TwillException):
"""AssertionError to raise upon failure of some twill command."""
class TwillNameError(TwillException):
"""Error to raise when an unknown command is called."""
"""The twill extensions."""
"""Extension functions for parsing sys.argv.
Commands:
get_args -- load all command-line arguments after the last --
into $arg1...$argN.
"""
from twill import log, namespaces, shell
__all__ = ["get_args"]
def get_args(require: int = 0) -> None:
""">> get_args [<require>]
Load the command line arguments after the last '--' into $arg1...$argN,
optionally requiring at least 'require' such arguments.
"""
global_dict, local_dict = namespaces.get_twill_glocals()
require = int(require)
if len(shell.twill_args) < require:
from twill.errors import TwillAssertionError
given = len(shell.twill_args)
raise TwillAssertionError(
f"too few arguments; {given} rather than {require}"
)
if shell.twill_args:
for n, arg in enumerate(shell.twill_args, 1):
global_dict[f"arg{n}"] = arg
n = len(shell.twill_args)
log.info("get_args: loaded %d args as $arg1..$arg%d.", n, n)
else:
log.info("no arguments to parse!")
"""Extension functions to check all of the links on a page.
Usage:
check_links [ <pattern> ]
Make sure that all the HTTP links on the current page can be visited
successfully. If 'pattern' is given, check only URLs that match that
regular expression.
If option 'check_links.only_collect_bad_links' is on, then all bad
links are silently collected across all calls to check_links. The
function 'report_bad_links' can then be used to report all the links,
together with their referring pages.
"""
import re
from http import HTTPStatus
from typing import Dict, List, Set
from twill import browser, commands, log, utils
from twill.errors import TwillAssertionError
__all__ = ["check_links", "report_bad_links", "good_urls", "bad_urls"]
# first, set up config options & persistent 'bad links' memory...
if commands.options.get("check_links.only_collection_bad_links") is None:
commands.options["check_links.only_collect_bad_links"] = False
good_urls: Set[str] = set()
bad_urls: Dict[str, Set[str]] = {}
def check_links(pattern: str = "") -> None:
r""">> check_links [<pattern>]
Make sure that all the HTTP links on the current page can be visited
with an HTTP response 200 (success). If 'pattern' is given, interpret
it as a regular expression that link URLs must contain in order to be
tested, e.g.
check_links https://.*\.google\.com
would check only links to google URLs. Note that because 'follow' is
used to visit the pages, the referrer URL is properly set on the visit.
"""
debug, info = log.debug, log.info
debug("in check_links")
# compile the regex
regex = re.compile(pattern) if pattern else None
# iterate over all links, collecting those that match
#
# note that in the case of duplicate URLs, only one of the
# links is actually followed!
collected_urls: Set[str] = set()
links = browser.links
if not links:
debug("no links to check!?")
return
for link in links:
url = link.url
url = url.split("#", 1)[0] # get rid of subpage pointers
# noinspection HttpUrlsUsage
if not url.startswith(("http://", "https://")):
debug("url '%s' is not an HTTP link; ignoring", url)
continue
if regex:
if regex.search(url):
collected_urls.add(url)
debug("Gathered URL %s -- matched pattern", url)
else:
debug("URL %s doesn't match pattern", url)
else:
collected_urls.add(url)
debug("Gathered URL %s.", url)
# now, for each unique and unchecked URL, follow the link
failed: List[str] = []
for url in sorted(collected_urls):
debug("Checking %s", url)
if url in good_urls:
debug("... already known as good")
elif url in bad_urls:
debug("... already collected as broken")
else:
try:
browser.follow_link(url)
except Exception: # noqa: BLE001
# count as failure
code = int(HTTPStatus.NOT_FOUND)
else:
code = browser.code
browser.back()
if code == int(HTTPStatus.OK):
debug("...success!")
good_urls.add(url)
else:
debug("...failure!")
failed.append(url)
if commands.options["check_links.only_collect_bad_links"]:
for url in failed:
referrers = bad_urls.setdefault(url, set())
referrer = browser.url
if referrer:
info("*** %s", referrer)
referrers.add(referrer)
elif failed:
info("\nCould not follow %d links:\n", len(failed))
for url in failed:
info("* %s", url)
raise TwillAssertionError("broken links on page")
else:
info("\nNo broken links were detected.\n")
def report_bad_links(
fail_if_exist: str = "true", flush_bad_links: str = "true"
) -> None:
""">> report_bad_links [<fail-if-exist> [<flush-bad-links>]]
Report all the links collected across check_links runs (collected
if and only if the config option check_links.only_collect_bad_links
is set).
If <fail-if-exist> is false (true by default) then the command will
fail after reporting any bad links.
If <flush-bad-links> is false (true by default) then the list of
bad links will be retained across the function call.
"""
info = log.info
if not bad_urls:
info("\nNo bad links to report.\n")
return
info("\nCould not follow %d links", len(bad_urls))
for url in sorted(bad_urls):
referrers = sorted(bad_urls[url])
info("\tlink '%s' (occurs on: %s)", url, ",".join(referrers))
if utils.make_boolean(flush_bad_links):
bad_urls.clear()
if utils.make_boolean(fail_if_exist):
raise TwillAssertionError("broken links encountered")
"""An extension function to iterate over a list of comma-separated values.
Function 'csv_iterate' reads a file containing one or more rows of
comma-separated columns, assigns them to col1...colN, and, for each row,
executes the given twill script.
"""
import csv
from twill import execute_file, log, namespaces
__all__ = ["csv_iterate"]
def csv_iterate(file_name: str, script_name: str) -> None:
""">> csv_iterate <csv_file> <script>
For each line in <csv_file>, read in a list of comma-separated values,
put them in $col1...$colN, and execute <script>.
"""
global_dict, local_dict = namespaces.get_twill_glocals()
with open(file_name, encoding="utf-8") as csv_file:
reader = csv.reader(csv_file)
for i, row in enumerate(reader, 1):
log.debug("csv_iterate: on row %d of %s", i, file_name)
for j, col in enumerate(row, 1):
global_dict[f"col{j}"] = col
execute_file(script_name, no_reset=True)
"""Extension functions for manipulating the current working directory (cwd).
Commands:
chdir -- push the cwd onto the directory stack & change to the new location.
popd -- change to the last directory on the directory stack.
"""
import os
from twill import commands, log
__all__ = ["chdir", "popd"]
_dir_stack = []
def chdir(where: str) -> None:
""">> chdir <where>
Change to the new location, after saving the current directory onto
the directory stack. The global variable __dir__ is set to the cwd.
"""
cwd = os.getcwd() # noqa: PTH109
_dir_stack.append(cwd)
log.debug('current directory: "%s"', cwd)
os.chdir(where)
log.info('changed directory to "%s"', where)
commands.setglobal("__dir__", where)
def popd() -> None:
""">> popd
Change back to the last directory on the directory stack. The global
variable __dir__ is set to the cwd.
"""
where = _dir_stack.pop()
os.chdir(where)
log.info('popped back to directory "%s"', where)
commands.setglobal("__dir__", where)
"""Extension functions to help query/assert name service information.
Functions:
* dns_resolves -- assert that a host resolves to a specific IP address.
* dns_a -- assert that a host directly resolves to a specific IP address
* dns_cname -- assert that a host is an alias for another hostname.
* dnx_mx -- assert that a given host is a mail exchanger for the given name.
* dns_ns -- assert that a given hostname is a name server for the given name.
"""
from typing import Optional
from twill.errors import TwillAssertionError
try:
from dns.ipv4 import inet_aton
from dns.name import from_text
from dns.rdatatype import RdataType
from dns.resolver import Answer, Resolver
except ImportError as error:
msg = str(error)
msg += "\nMust have dnspython installed to use the DNS extension module."
raise ImportError(msg) from error
def dns_a(host: str, ipaddress: str, server: Optional[str] = None) -> None:
""">> dns_a <name> <ipaddress> [<name server>]
Assert that <name> resolves to <ipaddress> (and is an A record).
Optionally use the given name server.
"""
if not is_ip_addr(ipaddress):
raise ValueError(
"<ipaddress> parameter must be an IP address, not a hostname"
)
for answer in _resolve(host, RdataType.A, server):
if answer.address == ipaddress:
return
raise TwillAssertionError
def dns_cname(host: str, cname: str, server: Optional[str] = None) -> None:
""">> dns_cname <name> <alias_for> [<name server>]
Assert that <name> is a CNAME alias for <alias_for>.
Optionally use the given <name server>.
"""
if is_ip_addr(cname):
raise ValueError(
"<alias_for> parameter must be a hostname, not an IP address"
)
cname_name = from_text(cname)
for answer in _resolve(host, RdataType.CNAME, server):
if answer.target == cname_name:
return
raise TwillAssertionError
def dns_resolves(
host: str, ipaddress: str, server: Optional[str] = None
) -> None:
""">> dns_resolves <name> <name2/ipaddress> [<name server>]
Assert that <name> ultimately resolves to the given IP address (or
the same IP address that 'name2' resolves to).
Optionally use the given name server.
"""
if not is_ip_addr(ipaddress):
ipaddress = _resolve_name(ipaddress, server)
for answer in _resolve(host, RdataType.A, server):
if answer.address == ipaddress:
return
raise TwillAssertionError
def dns_mx(host: str, mailserver: str, server: Optional[str] = None) -> None:
""">> dns_mx <name> <mailserver> [<name server>]
Assert that <mailserver> is a mailserver for <name>.
"""
mailserver_name = from_text(mailserver)
for rdata in _resolve(host, RdataType.MX, server):
if rdata.exchange == mailserver_name:
return
raise TwillAssertionError
def dns_ns(host: str, query_ns: str, server: Optional[str] = None) -> None:
""">> dns_ns <domain> <nameserver> [<name server to use>]
Assert that <nameserver> is a mailserver for <domain>.
"""
query_ns_name = from_text(query_ns)
for answer in _resolve(host, RdataType.NS, server):
if answer.target == query_ns_name:
return
raise TwillAssertionError
def is_ip_addr(text: str) -> bool:
"""Check the 'name' to see if it's just an IP address."""
try:
inet_aton(text)
except OSError:
return False
return True
def _resolve_name(name: str, server: Optional[str] = None) -> str:
"""Resolve the given name to an IP address."""
if is_ip_addr(name):
return name
resolver = Resolver()
if server:
resolver.nameservers = [_resolve_name(server, None)]
answers = resolver.resolve(name)
return str(answers[0])
def _resolve(
query: str, query_type: RdataType, server: Optional[str] = None
) -> Answer:
"""Resolve, perhaps via the given name server."""
resolver = Resolver()
if server:
resolver.nameservers = [_resolve_name(server, None)]
return resolver.resolve(query, query_type)
"""Extension functions for easier form filling.
(This module is a dumping ground for features that may ultimately get
added into the main twill command set.)
Commands:
* fv_match -- fill in *all* fields that match a regex (unlike 'form_value'
which will complain about multiple matches). Useful for forms
with lots of repeated field names -- 'field-1', 'field-2', etc.
* fv_multi -- fill in multiple form fields at once, e.g.
fv_multi <form_name> field1=value1 field2=value2 field3=value3
* fv_multi_sub -- same as 'fv_multi', followed by a 'submit'.
"""
import re
from twill import browser, commands, log, utils
__all__ = ["fv_match", "fv_multi_match", "fv_multi", "fv_multi_sub"]
def fv_match(form_name: str, field_pattern: str, value: str) -> None:
""">> fv_match <form_name> <field_pattern> <value>
Set value of *all* form fields with a name that matches the given
regular expression pattern.
(Unlike 'form_value' or 'fv', this will not complain about multiple
matches!)
"""
form = browser.form(form_name)
if form is None:
log.error("no such form '%s'", form_name)
return
regex = re.compile(field_pattern)
matches = [
ctl for ctl in form.inputs if regex.search(str(ctl.get("name")))
]
if matches:
log.info("-- matches %d", len(matches))
n = 0
for control in matches:
browser.clicked(form, control)
if "readonly" in control.attrib:
continue
n += 1
utils.set_form_control_value(control, value)
log.info("set %d values total", n)
def fv_multi_match(form_name: str, field_pattern: str, *values: str) -> None:
""">> fv_multi_match <form_name> <field_pattern> <value>...
Set value of each consecutive form field matching the given pattern with
the next specified value. If there are no more values, use the last for
all remaining form fields.
"""
form = browser.form(form_name)
if form is None:
log.error("no such form '%s'", form_name)
return
regex = re.compile(field_pattern)
matches = [
ctl for ctl in form.inputs if regex.search(str(ctl.get("name")))
]
if matches:
log.info("-- matches %d, values %d", len(matches), len(values))
for n, control in enumerate(matches):
browser.clicked(form, control)
if "readonly" in control.attrib:
continue
try:
utils.set_form_control_value(control, values[n])
except IndexError:
utils.set_form_control_value(control, values[-1])
log.info("set %d values total", n)
def fv_multi(form_name: str, *pairs: str) -> None:
""">> fv_multi <form_name> <pair>...
Set multiple form fields; each pair should be of the form
field_name=value
The pair will be split around the first '=', and
'fv <form_name> field_name value' will be executed in the order the
pairs are given.
"""
for pair in pairs:
field_name, value = pair.split("=", 1)
commands.fv(form_name, field_name, value)
def fv_multi_sub(form_name: str, *pairs: str) -> None:
""">> fv_multi_sub <form_name> <pair>...
Set multiple form fields (as with 'fv_multi') and then submit().
"""
for pair in pairs:
field_name, value = pair.split("=", 1)
commands.fv(form_name, field_name, value)
commands.submit()
"""Extension functions for managing a SourceForge-based mailman queue.
The extension can discard all moderated messages in a mailman queue.
This is useful since currently there is no way to do this without manually
selecting 'discard' for each and every message.
"""
import re
from twill import browser, log, utils
__all__ = ["discard_all_messages", "exit_if_empty"]
def exit_if_empty() -> None:
""">> exit_if_empty
Exit the script currently running, if there are no deferred messages
on the current page.
"""
form = browser.form()
if not form:
log.error("No messages; exiting.")
raise SystemExit
def discard_all_messages() -> None:
""">> discard_all_messages
Set all buttons to "discard".
"""
_form_value_by_regex_setall("1", "^\\d+$", "3")
def _form_value_by_regex_setall(
form_name: str, field_name: str, value: str
) -> None:
form = browser.form(form_name)
if not form:
log.error("no such form '%s'", form_name)
return
regex = re.compile(field_name)
matches = [ctl for ctl in form.inputs if regex.search(str(ctl.name))]
if matches:
log.info("-- matches %d", len(matches))
n = 0
for control in matches:
browser.clicked(form, control)
if "readonly" not in control.attrib:
utils.set_form_control_value(control, value)
n += 1
log.info("set %d values total", n)
"""Suresh's extension for slicing and dicing using regular expressions."""
import re
from typing import Any
from twill import browser, log
from twill.namespaces import get_twill_glocals
def showvar(which: str) -> None:
""">> showvar var
Shows the value of the variable 'var'.
"""
global_dict, local_dict = get_twill_glocals()
d = global_dict.copy()
d.update(local_dict)
log.info(d.get(str(which)))
def split(what: str) -> None:
""">> split <regex>
Sets __matchlist__ to re.split(regex, page).
"""
page = browser.html
m = re.split(what, page)
global_dict, local_dict = get_twill_glocals()
local_dict["__matchlist__"] = m
def findall(what: str) -> None:
""">> findall <regex>
Sets __matchlist__ to re.findall(regex, page).
"""
page = browser.html
regex = re.compile(what, re.DOTALL)
m = regex.findall(page)
global_dict, local_dict = get_twill_glocals()
local_dict["__matchlist__"] = m
def getmatch(where: str, what: str) -> None:
""">> getmatch into_var expression
Evaluates an expression against __match__ and puts it into 'into_var'.
"""
global_dict, local_dict = get_twill_glocals()
match = local_dict["__match__"]
local_dict[where] = _eval(match, what)
def setmatch(what: str) -> None:
""">> setmatch expression
Sets each element __matchlist__ to eval(expression); 'm' is set
to each element of __matchlist__ prior to processing.
"""
global_dict, local_dict = get_twill_glocals()
match = local_dict["__matchlist__"]
if isinstance(match, str):
match = [match]
new_match = [_eval(m, what) for m in match]
local_dict["__matchlist__"] = new_match
def _eval(match: str, exp: str) -> Any:
"""Evaluate an expression."""
return eval(exp, globals(), {"m": match}) # noqa: PGH001, S307
def popmatch(which: str) -> None:
""">> popmatch index
Pops __matchlist__[i] into __match__.
"""
global_dict, local_dict = get_twill_glocals()
matchlist = local_dict["__matchlist__"]
match = matchlist.pop(int(which))
local_dict["__match__"] = match
"""A simple set of extensions to manage post-load requirements for pages.
Commands:
require -- turn on post-load requirements; either 'success' or
'links_ok'.
no_require -- turn off requirements.
skip_require -- for the next page visit, skip requirements processing.
flush_visited -- flush the list of already visited pages
(for links checking)
"""
from twill import browser, commands, log
__all__ = ["require", "skip_require", "flush_visited", "no_require"]
_requirements = [] # what requirements to satisfy
class Ignore:
once: bool = False # reset after each hook call
always: bool = False # never reset
def skip_require() -> None:
""">> skip_require
Skip the post-page-load requirements.
"""
Ignore.once = True
def require(what: str) -> None:
""">> require <what>
After each page is loaded, require that 'what' be satisfied. 'what'
can be:
* 'success' -- HTTP return code is 200
* 'links_ok' -- all the links on the page load OK (see 'check_links'
extension module)
"""
# install the post-load hook function.
# noinspection PyProtectedMember
hooks = browser._post_load_hooks # noqa: SLF001
if _require_post_load_hook not in hooks:
log.debug("INSTALLING POST-LOAD HOOK")
hooks.append(_require_post_load_hook)
# add the requirement.
if what not in _requirements:
log.debug("Adding requirement: %s", what)
_requirements.append(what)
def no_require() -> None:
""">> no_require
Remove all post-load requirements.
"""
# noinspection PyProtectedMember
hooks = browser._post_load_hooks # noqa: SLF001
hooks = [fn for fn in hooks if fn != _require_post_load_hook]
browser._post_load_hooks = hooks # noqa: SLF001
_requirements.clear()
def flush_visited() -> None:
""">> flush_visited
Flush the list of pages successfully visited already.
"""
from .check_links import good_urls
good_urls.clear()
def _require_post_load_hook(action: str, *_args, **_kwargs) -> None:
"""Post load hook function to be called after each page is loaded.
See TwillBrowser._journey() for more information.
"""
if action == "back": # do nothing on a 'back'
return
if Ignore.once or Ignore.always:
Ignore.once = False
return
for what in _requirements:
if what == "success":
log.debug("REQUIRING success")
commands.code(200)
elif what == "links_ok":
from .check_links import check_links, good_urls
Ignore.always = True
log.debug("REQUIRING functioning links")
log.debug("(already visited:)")
log.debug("\n\t".join(sorted(good_urls)))
try:
check_links()
finally:
Ignore.always = False
"""Used in test_shell, to test default command execution and extensions."""
from twill.errors import TwillAssertionError
__all__ = ["set_flag", "unset_flag", "assert_flag_set", "assert_flag_unset"]
class Flag:
"""Global flag value."""
value: bool = False
def set_flag() -> None:
"""Set the flag."""
Flag.value = True
def unset_flag() -> None:
"""Unset the flag."""
Flag.value = False
def assert_flag_set() -> None:
"""Assert that the flag has been set."""
if not Flag.value:
raise TwillAssertionError("The flag has not been set")
def assert_flag_unset() -> None:
"""Assert that the flag has not been set."""
if Flag.value:
raise TwillAssertionError("The flag has been set")
"""The twill multiprocess execution system."""
import os
import sys
import time
from optparse import OptionParser
from pickle import dump, load
from . import execute_file, set_log_level
def main() -> None:
"""Run twill scripts in parallel."""
try:
if sys.platform == "win32":
raise AttributeError
fork = os.fork
except AttributeError:
sys.exit("Error: Must use Unix to be able to fork processes.")
parser = OptionParser()
add = parser.add_option
add(
"-u",
"--url",
nargs=1,
action="store",
dest="url",
help="start at the given URL before each script",
)
add(
"-n",
"--number",
nargs=1,
action="store",
dest="number",
default=1,
type="int",
help="number of times to run the given script(s)",
)
add(
"-p",
"--processes",
nargs=1,
action="store",
dest="processes",
default=1,
type="int",
help="number of processes to execute in parallel",
)
options, args = parser.parse_args()
if not args:
sys.exit("Error: Must specify one or more scripts to execute.")
# make sure that the current working directory is in the path
if "" not in sys.path:
sys.path.append("")
average_number = options.number // options.processes
last_number = average_number + options.number % options.processes
child_pids = []
is_parent = True
repeat = 0
# start a bunch of child processes and record their pids in the parent
for i in range(options.processes):
pid = fork()
if pid:
child_pids.append(pid)
else:
repeat = average_number if i else last_number
is_parent = False
break
# set the children up to run and record their stats
failed = False
if is_parent:
time.sleep(1)
total_time = total_exec = 0
# iterate over all the child pids, wait until they finish,
# and then sum statistics
for child_pid in child_pids[:]:
pid, status = os.waitpid(child_pid, 0)
if status or pid != child_pid: # failure
print(
f"[twill-fork parent: process {child_pid} FAILED:"
f" exit status {status}]"
)
print(
"[twill-fork parent:"
" (not counting stats for this process)]"
)
failed = True
else: # record statistics, otherwise
filename = ".status.%d" % (child_pid,)
with open(filename, "rb") as fp:
this_time, n_executed = load(fp) # noqa: S301
os.unlink(filename) # noqa: PTH108
total_time += this_time
total_exec += n_executed
# summarize
print("\n----\n")
print(f"number of processes: {options.processes}")
print(f"total executed: {total_exec}")
print(f"total time to execute: {total_time:.2f} s")
if total_exec:
avg_time = 1000 * total_time / total_exec
print(f"average time: {avg_time:.2f} ms")
else:
print("(nothing completed, no average!)")
print()
else:
pid = os.getpid()
print(f"[twill-fork: pid {pid} : executing {repeat} times]")
start_time = time.time()
set_log_level("warning")
for _i in range(repeat):
for filename in args:
execute_file(filename, initial_url=options.url)
end_time = time.time()
this_time = end_time - start_time
# write statistics
filename = f".status.{pid}"
with open(filename, "wb") as fp:
info = (this_time, repeat)
dump(info, fp)
sys.exit(-1 if failed else 0)
if __name__ == "__main__":
main()
"""Global and local dictionaries, and initialization/utility functions."""
from typing import Any, Dict, Tuple
from . import errors
global_dict: Dict[str, Any] = {} # the global dictionary
def init_global_dict() -> None:
"""Initialize the global dictionary with twill commands.
This must be done after all the other modules are loaded, so that all
the commands are already defined.
"""
# noinspection PyCompatibility
from . import commands, parse
cmd_list = commands.__all__
global_dict.update((cmd, getattr(commands, cmd)) for cmd in cmd_list)
parse.command_list.extend(cmd_list)
_local_dict_stack = [] # local dictionaries
def new_local_dict() -> Dict[str, Any]:
"""Initialize a new local dictionary and push it onto the stack."""
local_dict: Dict[str, Any] = {}
_local_dict_stack.append(local_dict)
return local_dict
def pop_local_dict() -> Dict[str, Any]:
"""Get rid of the current local dictionary."""
return _local_dict_stack.pop()
def get_twill_glocals() -> Tuple[Dict[str, Any], Dict[str, Any]]:
"""Return both global and current local dictionary."""
if global_dict is None:
raise errors.TwillException("Must initialize global namespace first!")
if not _local_dict_stack:
new_local_dict()
return global_dict, _local_dict_stack[-1]
"""Code parsing and evaluation for the twill mini-language."""
import re
import sys
from contextlib import nullcontext
from io import StringIO
from typing import Any, Callable, Dict, List, Optional, Sequence, TextIO, Tuple
from pyparsing import (
CharsNotIn,
Combine,
Group,
Literal,
Opt,
ParseException,
ParseResults,
Word,
ZeroOrMore,
pyparsing_unicode,
remove_quotes,
rest_of_line,
)
# noinspection PyCompatibility
from . import commands, log, namespaces
from .browser import browser
from .errors import TwillNameError
# pyparsing stuff
# allow characters in full 8-bit range
char_range = pyparsing_unicode.Latin1
alphas, alphanums = char_range.alphas, char_range.alphanums
printables = char_range.printables
# basically, a valid Python identifier:
command_word = Word(alphas + "_", alphanums + "_")
command = command_word.set_results_name("command")
command.set_name("command")
# arguments to it.
# we need to reimplement all this junk from pyparsing because pcre's
# idea of escapable characters contains a lot more than the C-like
# thing pyparsing implements
_bslash = "\\"
_sgl_quote = Literal("'")
_dbl_quote = Literal('"')
_escapables = printables
_escaped_char = Word(_bslash, _escapables, exact=2)
dbl_quoted_string = (
Combine(
_dbl_quote
+ ZeroOrMore(CharsNotIn('\\"\n\r') | _escaped_char | '""')
+ _dbl_quote
)
.streamline()
.set_name("string enclosed in double quotes")
)
sgl_quoted_string = (
Combine(
_sgl_quote
+ ZeroOrMore(CharsNotIn("\\'\n\r") | _escaped_char | "''")
+ _sgl_quote
)
.streamline()
.set_name("string enclosed in single quotes")
)
quoted_arg = dbl_quoted_string | sgl_quoted_string
quoted_arg.set_parse_action(remove_quotes)
quoted_arg.set_name("quoted_arg")
plain_arg_chars = printables.replace("#", "").replace('"', "").replace("'", "")
plain_arg = Word(plain_arg_chars)
plain_arg.set_name("plain_arg")
arguments_group = Group(ZeroOrMore(quoted_arg | plain_arg))
arguments = arguments_group.set_results_name("arguments")
arguments.set_name("arguments")
# comment line.
comment = Literal("#") + rest_of_line
comment = comment.suppress()
comment.set_name("comment")
full_command = comment | (command + arguments + Opt(comment))
full_command.set_name("full_command")
command_list: List[str] = [] # filled in by namespaces.init_global_dict().
def process_args(
args: Sequence[str],
globals_dict: Dict[str, Any],
locals_dict: Dict[str, Any],
) -> List[str]:
"""Process string arguments.
Take a list of string arguments parsed via pyparsing and evaluate
the special variables ('__*').
Return a new list.
"""
new_args: List[str] = []
for arg in args:
# __variable substitution
if arg.startswith("__"):
try:
val = eval(arg, globals_dict, locals_dict)
except NameError: # not in dictionary; don't interpret
val = arg
log.info("VAL IS %s FOR %s", val, arg)
if isinstance(val, str):
new_args.append(val)
else:
new_args.extend(val)
# $variable substitution
elif arg.startswith("$") and not arg.startswith("${"):
try:
val = str(eval(arg[1:], globals_dict, locals_dict))
except NameError: # not in dictionary; don't interpret
val = arg
new_args.append(val)
else:
new_args.append(
variable_substitution(arg, globals_dict, locals_dict)
)
return [arg.replace("\\n", "\n") for arg in new_args]
def execute_command(
cmd: str,
args: Sequence[str],
globals_dict: Dict[str, Any],
locals_dict: Dict[str, Any],
cmd_info: str,
) -> None:
"""Actually execute the command.
Side effects:
- __args__ is set to the arguments
- __cmd__ is set to the command, and
- __url__ is set to the browser URL.
"""
# execute command
locals_dict["__cmd__"] = cmd
locals_dict["__args__"] = args
if cmd not in command_list:
raise TwillNameError(f"unknown twill command: '{cmd}'")
eval_str = f"{cmd}(*__args__)"
# compile the code object so that we can get 'cmd_info' into the
# error tracebacks
code_obj = compile(eval_str, cmd_info, "eval")
# evaluate the code object in the appropriate dictionary
result = eval(code_obj, globals_dict, locals_dict)
# set __url__
locals_dict["__url__"] = browser.url
return result
_log_commands: Callable = log.debug # type: ignore[has-type]
def parse_command(
line: str,
globals_dict: Dict[str, Any],
locals_dict: Dict[str, Any],
) -> Tuple[Optional[str], Optional[List[str]]]:
"""Parse command.
Returns a tuple with the command and its arguments.
"""
try:
results: Optional[ParseResults] = full_command.parse_string(line)
except ParseException as e:
log.error("PARSE ERROR: %s", e)
results = None
if results:
_log_commands("twill: executing cmd '%s'", line.strip())
args = process_args(
results.arguments.as_list(), globals_dict, locals_dict
)
return results.command, args
return None, None # e.g. a comment
def execute_string(buf: str, **kw) -> None:
"""Execute commands from a string buffer."""
fp = StringIO(buf)
kw["source"] = ["<string buffer>"]
if "no_reset" not in kw:
kw["no_reset"] = True
_execute_script(fp, **kw)
def execute_file(filename: str, **kw) -> None:
"""Execute commands from a file."""
with (
nullcontext(sys.stdin) # type: ignore[attr-defined]
if filename == "-"
else open(filename, encoding="utf-8")
) as inp:
log.info("\n>> Running twill file %s", filename)
kw["source"] = filename
_execute_script(inp, **kw)
def _execute_script(inp: TextIO, **kw) -> None:
"""Execute lines taken from a file-like iterator."""
# initialize new local dictionary and get global and current local
namespaces.new_local_dict()
globals_dict, locals_dict = namespaces.get_twill_glocals()
locals_dict["__url__"] = browser.url
# reset browser
if not kw.get("no_reset"):
commands.reset_browser()
# go to a specific URL?
init_url = kw.get("initial_url")
if init_url:
commands.go(init_url)
locals_dict["__url__"] = browser.url
# should we catch exceptions on failure?
catch_errors = kw.get("never_fail")
# source_info stuff
source_info = kw.get("source", "<input>")
try:
for line_no, line_raw in enumerate(inp, 1):
line = line_raw.strip()
if not line: # skip empty lines
continue
cmd_info = f"{source_info}:{line_no}"
log.info("AT LINE: %s", cmd_info)
cmd, args = parse_command(line, globals_dict, locals_dict)
if cmd is None:
continue
try:
execute_command(cmd, args, globals_dict, locals_dict, cmd_info)
except SystemExit:
# abort script execution if a SystemExit is raised
return
except Exception as error: # noqa: BLE001
error_type = error.__class__.__name__ or "Error"
error_context = (
f"{error_type} raised on line {line_no}"
f"of '{source_info}'"
)
if line:
error_context += f" while executing\n>> {line}"
if not browser.first_error:
browser.first_error = error_context
log.error("\nOops! %s", error_context)
error_msg = str(error).strip()
log.error("\nError: %s", error_msg)
if not catch_errors:
raise
finally:
cleanups = locals_dict.get("__cleanups__")
if cleanups:
first_error, result = browser.first_error, browser.result
for filename in reversed(cleanups):
log.info("\n>> Running twill cleanup file %s", filename)
try:
with open(filename, encoding="utf-8") as inp:
_execute_script(inp, source=filename, no_reset=True)
except Exception as error: # noqa: BLE001
log.error(
">> Cannot run cleanup file %s: %s", filename, error
)
browser.reset()
browser.first_error, browser.result = first_error, result
namespaces.pop_local_dict()
def log_commands(flag: bool) -> bool: # noqa: FBT001
"""Turn printing of commands as they are executed on or off."""
global _log_commands # noqa: PLW0603
old_flag = _log_commands is log.info
_log_commands = log.info if flag else log.debug
return old_flag
_re_variable = re.compile(r"\${(.*?)}")
def variable_substitution(
raw_str: str, globals_dict: Dict[str, Any], locals_dict: Dict[str, Any]
) -> str:
"""Substitute the variables in the given string."""
parts: List[str] = []
append = parts.append
position = 0
for match in _re_variable.finditer(raw_str):
append(raw_str[position : match.start()])
try:
variable = match.group(1)
value = eval(variable, globals_dict, locals_dict)
append(str(value))
except NameError:
append(match.group())
position = match.end()
append(raw_str[position:])
return "".join(parts)
# Marker file for PEP 561. The twill package uses inline types.
"""A command-line interpreter for twill.
This is an implementation of a command-line interpreter based on the
'Cmd' class in the 'cmd' package of the default Python distribution.
"""
import os
import sys
import traceback
from cmd import Cmd
from contextlib import suppress
from io import TextIOWrapper
from optparse import OptionParser
from pathlib import Path
from typing import Any, Callable, List, Optional
# noinspection PyCompatibility
from . import (
__url__,
__version__,
commands,
execute_file,
log,
log_levels,
namespaces,
parse,
set_log_level,
set_output,
shutdown,
)
from .browser import browser
from .utils import Singleton, gather_filenames
readline: Any
try:
import readline
except ImportError:
# may not exist on some operating systems
readline = None
__all__ = ["main"]
python_version = sys.version.split(None, 1)[0]
version_info = f"""
twill version: {__version__}
Python Version: {python_version}
See {__url__} for more info.
"""
def make_cmd_fn(cmd: str) -> Callable[[str], None]:
"""Make a command function.
Dynamically define a twill shell command function based on an imported
function name. (This is where the twill.commands functions actually
get executed.)
"""
def do_cmd(rest_of_line: str, cmd: str = cmd) -> None:
global_dict, local_dict = namespaces.get_twill_glocals()
args = []
if rest_of_line.strip():
try:
args = parse.arguments.parseString(rest_of_line)[0]
args = parse.process_args(args, global_dict, local_dict)
except Exception as error: # noqa: BLE001
log.error("\nINPUT ERROR: %s\n", error)
return
try:
parse.execute_command(
cmd, args, global_dict, local_dict, "<shell>"
)
except SystemExit:
raise
except Exception as error: # noqa: BLE001
log.error("\nERROR: %s\n", error)
log.debug(traceback.format_exc())
return do_cmd
def make_help_cmd(cmd: str, docstring: str) -> Callable[[str], None]:
"""Make a help command function.
Dynamically define a twill shell help function for the given
command/docstring.
"""
def help_cmd(message: str = docstring, cmd: str = cmd) -> None:
message = message.strip()
max_width = max(
7 + len(cmd),
*(len(line.rstrip()) for line in message.splitlines()),
)
indent = "=" * max_width
info = log.info
info("\n%s", indent)
info("\nHelp for command %s:\n", cmd)
info(message)
info("\n%s\n", indent)
return help_cmd
def add_command(cmd: str, docstring: str) -> None:
"""Add a command with given docstring to the shell."""
shell = get_command_shell()
if shell:
shell.add_command(cmd, docstring)
class TwillCommandLoop(Singleton, Cmd):
"""The command-line interpreter for twill commands.
This is a Singleton object: you can't create more than one
of shell at a time.
Note: most of the do_ and help_ functions are dynamically created
by the metaclass.
"""
def __init__(
self,
stdin: Optional[TextIOWrapper] = None,
initial_url: Optional[str] = None,
*,
fail_on_unknown: bool = False,
) -> None:
Cmd.__init__(self, stdin=stdin)
self.use_rawinput = stdin is None
# initialize a new local namespace.
namespaces.new_local_dict()
# import readline history, if available/possible.
if readline is not None:
with suppress(OSError):
readline.read_history_file(".twill-history")
# fail on unknown commands? for test-shell, primarily.
self.fail_on_unknown = fail_on_unknown
# handle initial URL argument
if initial_url:
commands.go(initial_url)
self._set_prompt()
self.names: List[str] = []
global_dict, local_dict = namespaces.get_twill_glocals()
# add all of the commands from twill
for command in parse.command_list:
fn = global_dict.get(command)
self.add_command(command, fn.__doc__)
def add_command(self, command: str, docstring: str) -> None:
"""Add the given command into the lexicon of all commands."""
do_name = f"do_{command}"
do_cmd = make_cmd_fn(command)
setattr(self, do_name, do_cmd)
if docstring:
help_cmd = make_help_cmd(command, docstring)
help_name = f"help_{command}"
setattr(self, help_name, help_cmd)
self.names.append(do_name)
def get_names(self) -> List[str]:
"""Return the list of commands."""
return self.names
def complete_form_value(
self, text: str, line: str, _begin: int, _end: int
) -> List[str]:
"""Command arg completion for the form_value command.
The twill command has the following syntax:
form_value <form_name> <field_name> <value>
"""
cmd, args = parse.parse_command(line + ".", {}, {})
place = len(args)
if place == 1:
return self.provide_form_name(text)
if place == 2: # noqa: PLR2004
form_name = args[0]
return self.provide_field_name(form_name, text)
return []
complete_fv = complete_form_value # alias
@staticmethod
def provide_form_name(prefix: str) -> List[str]:
"""Provide the list of form names on the given page."""
names = []
forms = browser.forms
for form in forms:
form_id = form.attrib.get("id")
if form_id and form_id.startswith(prefix):
names.append(form_id)
continue
name = form.attrib.get("name")
if name and name.startswith(prefix):
names.append(name)
return names
@staticmethod
def provide_field_name(form_name: str, prefix: str) -> List[str]:
"""Provide the list of fields for the given form_name or number."""
names = []
form = browser.form(form_name)
if form is not None:
for field in form.inputs:
field_id = field.attrib.get("id")
if field_id and field_id.startswith(prefix):
names.append(field_id)
continue
name = field.name
if name and name.startswith(prefix):
names.append(name)
return names
def _set_prompt(self) -> None:
"""Set the prompt to the current page."""
url = browser.url
if url is None:
url = " *empty page* "
self.prompt = f"current page: {url}\n>> "
def precmd(self, line: str) -> str:
"""Run before each command; save."""
return line
def postcmd(self, stop: bool, line: str) -> bool: # noqa: ARG002, FBT001
"""Run after each command; set prompt."""
self._set_prompt()
return stop
def default(self, line: str) -> None:
"""Run when an unknown command is executed."""
# empty lines ==> emptyline(); here we just want to remove
# leading whitespace.
line = line.strip()
# look for command
global_dict, local_dict = namespaces.get_twill_glocals()
cmd, args = parse.parse_command(line, global_dict, local_dict)
# ignore comments & empty stuff
if cmd is None:
return
try:
parse.execute_command(
cmd, args, global_dict, local_dict, "<shell>"
)
except SystemExit:
raise
except Exception as error: # noqa: BLE001
log.error("\nERROR: %s\n", error)
if self.fail_on_unknown:
raise
def emptyline(self) -> Any:
"""Handle empty lines (by ignoring them)."""
@staticmethod
def do_EOF(*_args: str) -> None: # noqa: N802
"""Exit on CTRL-D."""
if readline is not None:
with suppress(OSError):
readline.write_history_file(".twill-history")
raise SystemExit
@staticmethod
def help_help() -> None:
"""Show help for the help command."""
log.info("\nWhat do YOU think the command 'help' does?!?\n")
@staticmethod
def do_version(*_args: str) -> None:
"""Show the version number of twill."""
log.info(version_info)
@staticmethod
def help_version() -> None:
"""Show help for the version command."""
log.info("\nPrint version information.\n")
def do_exit(self, *_args: str) -> None:
"""Exit the twill shell."""
raise SystemExit
@staticmethod
def help_exit() -> None:
"""Show help for the exit command."""
log.info("\nExit twill.\n")
do_quit = do_exit
help_quit = help_exit
def get_command_shell() -> Optional[TwillCommandLoop]:
"""Get the command shell."""
return getattr(TwillCommandLoop, "__it__", None)
twill_args: List[str] = [] # contains sys.argv *after* last '--'
interactive = False # 'True' if interacting with user
def main() -> None: # noqa: C901, PLR0912, PLR0915
"""Run as shell script."""
global interactive # noqa: PLW0603
# show the shorthand name for usage
argv = sys.argv
if argv[0].endswith("-script.py"):
argv[0] = argv[0].rsplit("-", 1)[0]
# make sure that the current working directory is in the path
if "" not in sys.path:
sys.path.append("")
parser = OptionParser()
add = parser.add_option
add(
"-d",
"--dump-html",
action="store",
dest="dumpfile",
help="dump HTML to this file on error",
)
add(
"-f",
"--fail",
action="store_true",
dest="fail",
help="fail exit on first file to fail",
)
add(
"-i",
"--interactive",
action="store_true",
dest="interactive",
help="drop into an interactive shell (after running files)",
)
add(
"-l",
"--loglevel",
nargs=1,
action="store",
dest="loglevel",
help="set the logging level",
)
add(
"-n",
"--never-fail",
action="store_true",
dest="never_fail",
help="continue executing scripts past errors",
)
add(
"-o",
"--output",
nargs=1,
action="store",
dest="outfile",
help="print log to output file",
)
add(
"-q",
"--quiet",
action="store_true",
dest="quiet",
help="do not show normal output",
)
add(
"-u",
"--url",
nargs=1,
action="store",
dest="url",
help="start at the given URL before each script",
)
add(
"-v",
"--version",
action="store_true",
dest="show_version",
help="show version information and exit",
)
add(
"-w",
"--show-error-in-browser",
action="store_true",
dest="show_browser",
help="show dumped HTML in a web browser ",
)
# parse arguments
args = argv[1:]
if "--" in args:
for last in range(len(args) - 1, -1, -1):
if args[last] == "--":
twill_args[:] = args[last + 1 :]
args = args[:last]
break
options, args = parser.parse_args(args)
if options.show_version:
log.info(version_info)
sys.exit(0)
quiet = options.quiet
show_browser = options.show_browser
dump_file = options.dumpfile
out_file = options.outfile
log_level = options.loglevel
interactive = options.interactive or not args
if out_file:
out_file = out_file.lstrip("=").lstrip() or None
if out_file == "-":
out_file = None
if interactive and (quiet or out_file or dump_file or show_browser):
sys.exit("Interactive mode is incompatible with -q, -o, -d and -w")
if options.show_browser and (not dump_file or dump_file == "-"):
sys.exit("Please also specify a dump file with -d")
if log_level:
log_level = log_level.lstrip("=").lstrip() or None
if log_level.upper() not in log_levels:
log_level_names = ", ".join(sorted(log_levels))
sys.exit(f"Valid log levels are: {log_level_names}")
set_log_level(log_level)
if quiet:
output = open(os.devnull, "w") # noqa: SIM115
elif out_file:
try:
output = open(out_file, "w") # noqa: SIM115
except OSError as error:
sys.exit(f"Invalid output file '{out_file}': {error}")
set_output(output)
# first find and run any scripts put on the command line
failed = False
if args:
success = []
failure = []
filenames = gather_filenames(args)
dump = None
for filename in filenames:
try:
interactive = False
execute_file(
filename,
initial_url=options.url,
never_fail=options.never_fail,
)
success.append(filename)
except Exception as error: # noqa: BLE001
if dump_file:
dump = browser.dump
if options.fail:
raise
if browser.first_error:
log.error("\nFirst error: %s", browser.first_error)
log.error("\n*** ERROR: %s", error)
log.debug(traceback.format_exc())
failure.append(filename)
log.info("--")
if dump:
if dump_file == "-":
log.info(
"HTML when error was encountered:\n\n%s\n--", dump.strip()
)
else:
try:
with open(dump_file, "wb") as f:
f.write(dump)
except OSError as e:
log.error("Could not dump to %s: %s\n", dump_file, e)
else:
log.info("HTML has been dumped to %s\n", dump_file)
log.info(
"%d of %d files SUCCEEDED.",
len(success),
len(success) + len(failure),
)
if len(failure):
log.error("Failed:\n\t%s", "\n\t".join(failure))
failed = True
if dump and show_browser:
import webbrowser
url = Path(dump_file).absolute().as_uri()
log.debug("Running web browser on %s", url)
webbrowser.open(url)
# if no scripts to run or -i is set, drop into an interactive shell
if interactive:
welcome_msg = "" if args else "\n -= Welcome to twill =-\n"
shell = TwillCommandLoop(initial_url=options.url)
while True:
try:
shell.cmdloop(welcome_msg)
except KeyboardInterrupt:
sys.stdout.write("\n")
break
except SystemExit:
raise
welcome_msg = ""
shutdown()
if failed:
sys.exit(1)
sys.exit(0)
if __name__ == "__main__":
main()
"""Support functionality for using twill in unit tests."""
import sys
import time
from io import StringIO
from multiprocessing import Process
from typing import Callable, Optional, TextIO
from .parse import execute_file
HOST = "127.0.0.1" # interface to run the server on
PORT = 8080 # default port to run the server on
SLEEP = 0 # time to wait for the server to start
class TestInfo:
"""Test info container.
Object containing info for a test: script to run, server function to
run, and port to run it on. Note that information about server port
*must* be decided by the end of the __init__ function.
The optional sleep argument specifies how many seconds to wait for the
server to set itself up. Default is 0.
"""
def __init__(
self,
script: str,
server_fn: Callable[[], None],
port: int = PORT,
sleep: float = SLEEP,
) -> None:
"""Initialize the test info container."""
self.script = script
self.server_fn = server_fn
self.port = port
self.stdout: Optional[TextIO] = None
self.stderr: Optional[TextIO] = None
self.sleep = sleep
def start_server(self) -> None:
"""Start the server."""
# save old stdout/stderr
stdout, stderr = sys.stdout, sys.stderr
# create new stdout/stderr
self.stdout = sys.stdout = StringIO()
self.stderr = sys.stderr = StringIO()
try:
self.server_fn()
finally:
# restore stdout/stderr
sys.stdout, sys.stderr = stdout, stderr
def run_script(self) -> None:
"""Run the given twill script on the given server."""
time.sleep(self.sleep)
url = self.url
execute_file(self.script, initial_url=url)
@property
def url(self) -> str:
"""Get the test server URL."""
# noinspection HttpUrlsUsage
return f"http://{HOST}:{self.port}/"
def run_test(test_info: TestInfo) -> None:
"""Run test on a website where the site is running in a sub process."""
# run server
server_process = Process(target=test_info.start_server)
server_process.start()
# wait for server process to spin up
timeout = max(1, test_info.sleep)
wait = min(0.125, 0.125 * timeout)
waited: float = 0
while not server_process.is_alive() and waited < timeout:
time.sleep(wait)
waited += wait
# run twill test script
try:
test_info.run_script()
finally:
server_process.terminate()
"""Various ugly utility functions for twill.
Apart from various simple utility functions, twill's robust parsing
code is implemented in the ConfigurableParsingFactory class.
"""
import os
import re
from contextlib import suppress
from pathlib import Path
from typing import Any, List, NamedTuple, Optional, Sequence, Tuple, Union
from httpx import Headers, Response
from lxml.html import (
CheckboxGroup,
FormElement,
HtmlElement,
InputElement,
MultipleSelectOptions,
RadioGroup,
SelectElement,
TextareaElement,
)
from lxml.html import fromstring as html_to_tree
from lxml.html import tostring as tree_to_html
try:
import tidylib # type: ignore[import-untyped]
except (ImportError, OSError):
# ImportError can be raised when PyTidyLib package is not installed and
# OSError can be raised when the HTML Tidy shared library is not installed
tidylib = None
from . import log, twill_ext
from .errors import TwillException
__all__ = [
"gather_filenames",
"get_equiv_refresh_interval",
"html_to_tree",
"is_hidden_filename",
"is_twill_filename",
"print_form",
"make_boolean",
"make_int",
"make_twill_filename",
"run_tidy",
"tree_to_html",
"trunc",
"unique_match",
"CheckboxGroup",
"FieldElement",
"FormElement",
"HtmlElement",
"InputElement",
"Link",
"RadioGroup",
"ResultWrapper",
"SelectElement",
"Singleton",
"TextareaElement",
"UrlWithRealm",
"Response",
]
FieldElement = Union[
CheckboxGroup, InputElement, RadioGroup, SelectElement, TextareaElement
]
class Link(NamedTuple):
"""A link with some text and a URL."""
text: str
url: str
# Depending on the configuration, realms can be ignored
UrlWithRealm = Union[str, Tuple[str, str]]
class Singleton:
"""A mixin class to create singleton objects."""
def __new__(cls, *_args, **_kw) -> "Singleton":
"""Create a new instance."""
it = cls.__dict__.get("__it__")
if it is not None:
return it
cls.__it__ = it = object.__new__(cls)
return it
@classmethod
def reset(cls) -> None:
"""Reset the singleton."""
cls.__it__ = None
class ResultWrapper:
"""Deal with request results, and present them in a unified form.
These objects are returned by browser._journey()-wrapped functions.
"""
def __init__(self, response: Response) -> None:
"""Initialize the result wrapper."""
self.response = response
self.encoding = response.encoding
try:
self.tree = html_to_tree(self.text)
except ValueError:
# may happen when there is an XML encoding declaration
self.tree = html_to_tree(self.content)
self.xpath = self.tree.xpath
self._fix_forms()
@property
def url(self) -> str:
"""Get the URL of the result page."""
return str(self.response.url)
@property
def http_code(self) -> int:
"""Get the HTTP status code of the result page."""
return self.response.status_code
@property
def text(self) -> str:
"""Get the text of the result page."""
return self.response.text
@property
def content(self) -> bytes:
"""Get the binary content of the result page."""
return self.response.content
@property
def headers(self) -> Headers:
"""Get the headers of the result page."""
return self.response.headers
@property
def title(self) -> Optional[str]:
"""Get the title of the result page."""
try:
return self.xpath("//title[1]/text()")[0]
except IndexError:
return None
@property
def links(self) -> List[Link]:
"""Get all links in the result page."""
return [
Link(a.text_content(), a.get("href"))
for a in self.xpath("//a[@href]")
]
def find_link(self, pattern: str) -> Optional[Link]:
"""Find a link with a given pattern on the result page."""
regex = re.compile(pattern)
for link in self.links:
if regex.search(link.text) or regex.search(link.url):
return link
return None
def find_links(self, pattern: str) -> List[Link]:
"""Find all links with a given pattern on the result page."""
regex = re.compile(pattern)
return [
link
for link in self.links
if regex.search(link.text) or regex.search(link.url)
]
def form(self, name_or_num: Union[str, int] = 1) -> Optional[FormElement]:
"""Get the form with the given name or number on the result page.
Returns None if no such form can be found on the result page.
"""
forms = self.forms
if isinstance(name_or_num, str):
# first, try ID
for form in forms:
form_id = form.get("id")
if form_id and form_id == name_or_num:
return form
# next, try regex with name
regex = re.compile(name_or_num)
for form in forms:
name = form.get("name")
if name and regex.search(name):
return form
# last, try number
try:
num = int(name_or_num) - 1
if not 0 <= num < len(forms):
raise IndexError
except (ValueError, IndexError):
return None
else:
return forms[num]
def _fix_forms(self) -> None:
"""Fix forms on the page for use with twill."""
# put all stray fields into a form
orphans = self.xpath("//input[not(ancestor::form)]")
if orphans:
form_parts = (
[b"<form>"]
+ [tree_to_html(orphan) for orphan in orphans]
+ [b"</form>"]
)
self.forms = html_to_tree(b"".join(form_parts)).forms
self.forms.extend(self.tree.forms)
else:
self.forms = self.tree.forms
# convert all submit button elements to input elements, since
# otherwise lxml will not recognize them as form input fields
for form in self.forms:
for button in form.xpath("//button[@type='submit']"):
button.tag = "input"
def trunc(s: Optional[str], length: int) -> str:
"""Truncate a string to a given length.
The string is truncated by cutting off the last (length-4) characters
and replacing them with ' ...'
"""
if s and len(s) > length:
return s[: length - 4] + " ..."
return s or ""
def print_form(form: FormElement, n: int) -> None:
"""Pretty-print the given form, with the assigned number."""
info = log.info
name = form.get("name")
info("\nForm name=%s (#%d)", name, n) if name else info("\nForm #%d", n)
if form.inputs is not None:
info(
"## __Name__________________"
" __Type___ __ID________ __Value__________________"
)
for n, field in enumerate(form.inputs, 1):
value = field.value
value_options = getattr(field, "value_options", None)
if value_options:
items = ", ".join(
f"'{getattr(opt, 'name', opt)}'" for opt in value_options
)
value_displayed = f"{value} of {items}"
else:
value_displayed = f"{value}"
field_name = field.name
field_type = getattr(field, "type", "select")
field_id = field.get("id")
strings = (
f"{n:2}",
f"{trunc(field_name, 24):24}",
f"{trunc(field_type, 9):9}",
f"{trunc(field_id, 12):12}",
trunc(value_displayed, 40),
)
info(" ".join(strings))
info("")
def make_boolean(value: Any) -> bool:
"""Convert the input value into a boolean."""
value = str(value).lower().strip()
# true/false
if value in ("true", "false"):
return value == "true"
# 0/nonzero
try:
ival = int(value)
except ValueError:
pass
else:
return bool(ival)
# +/-
if value in ("+", "-"):
return value == "+"
# on/off
if value in ("on", "off"):
return value == "on"
raise TwillException(f"unable to convert '{value}' into true/false")
def make_int(value: Any) -> int:
"""Convert the input value into an int."""
try:
ival = int(value)
except Exception as error: # noqa: BLE001
raise TwillException(
f"unable to convert '{value}' into an int"
) from error
return ival
def set_form_control_value(control: FieldElement, value: str) -> None:
"""Set the given control to the given value.
The controls can be checkboxes, select elements etc.
"""
if isinstance(control, InputElement):
if control.checkable:
try:
boolean_value = make_boolean(value)
except TwillException:
# if there's more than one checkbox,
# it should be a CheckboxGroup, see below.
pass
else:
control.checked = boolean_value
elif control.type not in ("submit", "image"):
control.value = value
elif isinstance(control, (TextareaElement, RadioGroup)):
control.value = value
elif isinstance(control, CheckboxGroup):
if value.startswith("-"):
value = value[1:]
with suppress(KeyError):
control.value.remove(value)
else:
if value.startswith("+"):
value = value[1:]
control.value.add(value)
elif isinstance(control, SelectElement):
# for ListControls we need to find the right *value*,
# and figure out if we want to *select* or *deselect*
if value.startswith("-"):
add = False
value = value[1:]
else:
add = True
if value.startswith("+"):
value = value[1:]
# now, select the value.
option_values = [val.strip() for val in control.value_options]
options = control.getchildren()
option_names = [(c.text or "").strip() for c in options]
for name, opt in zip(option_names, option_values):
if value not in (name, opt):
continue
if isinstance(control.value, MultipleSelectOptions):
if add:
control.value.add(opt)
elif opt in control.value:
control.value.remove(opt)
else:
control.value = opt if add else ""
break
else:
raise TwillException("Attempt to set an invalid value")
else:
raise TwillException("Attempt to set value on invalid control")
def _all_the_same_submit(matches: Sequence[FieldElement]) -> bool:
"""Check if a list of controls all belong to the same control.
For use with checkboxes, hidden, and submit buttons.
"""
name = value = None
for match in matches:
if not isinstance(match, InputElement):
return False
if match.type not in ("submit", "hidden"):
return False
if name is None:
name = match.name
value = match.value
elif match.name != name or match.value != value:
return False
return True
def _all_the_same_checkbox(matches: Sequence[FieldElement]) -> bool:
"""Check if a list of controls all belong to the same checkbox.
Hidden controls can combine with checkboxes, to allow form
processors to ensure a False value is returned even if user
does not check the checkbox. Without the hidden control, no
value would be returned.
"""
name = None
for match in matches:
if not isinstance(match, InputElement):
return False
if match.type not in ("checkbox", "hidden"):
return False
if name is None:
name = match.name
elif match.name != name:
return False
return True
def unique_match(matches: Sequence[FieldElement]) -> bool:
"""Check whether a match is unique."""
return (
len(matches) == 1
or _all_the_same_checkbox(matches)
or _all_the_same_submit(matches)
)
def run_tidy(html: str) -> Tuple[Optional[str], Optional[str]]:
"""Run HTML Tidy on the given HTML string.
Return a 2-tuple (output, errors). (None, None) will be returned if
PyTidyLib (or the required shared library for tidy) isn't installed.
"""
from .commands import options
require_tidy = options.get("require_tidy")
if not tidylib:
if require_tidy:
raise TwillException(
"Option require_tidy is set, but PyTidyLib is not installed"
)
return None, None
opts = {
key[5:].replace("_", "-"): value
for key, value in options.items()
if key.startswith("tidy_")
}
clean_html, errors = tidylib.tidy_document(html, opts)
return clean_html, errors
def get_equiv_refresh_interval() -> Optional[int]:
"""Get the smallest interval for which the browser should follow redirects.
Redirection happens if the given interval is smaller than this.
"""
from .commands import options
return options.get("equiv_refresh_interval")
def is_hidden_filename(filename: str) -> bool:
"""Check if this is a hidden file (starting with a dot)."""
return filename not in (".", "..") and Path(filename).name.startswith(".")
def is_twill_filename(filename: str) -> bool:
"""Check if the given filename has the twill file extension."""
return filename.endswith(twill_ext) and not is_hidden_filename(filename)
def make_twill_filename(name: str) -> str:
"""Add the twill extension to the name of a script if necessary."""
if name not in (".", ".."):
path = Path(name)
twill_name = path.stem
ext = path.suffix
if not ext:
twill_name += twill_ext
if Path(twill_name).exists():
name = twill_name
return name
def gather_filenames(args: Sequence[str]) -> List[str]:
"""Collect script files from within directories."""
collected_names: List[str] = []
append, extend = collected_names.append, collected_names.extend
is_dir, walk, sep = os.path.isdir, os.walk, os.sep
for arg in args:
name = make_twill_filename(arg)
if is_dir(name):
for dir_path, dir_names, names in walk(name):
dir_names[:] = [
name for name in dir_names if not is_hidden_filename(name)
]
path = dir_path + sep
extend(
path + name for name in names if is_twill_filename(name)
)
else:
append(name)
return collected_names
import twill
from twill import commands
from twill.utils import Link
def test(url: str):
commands.reset_browser()
browser = twill.browser
commands.go(url)
link = browser.find_link("logout")
assert isinstance(link, Link)
assert link == ("log out", "logout")
link = browser.find_link("log out")
assert isinstance(link, Link)
assert link == ("log out", "logout")
link = browser.find_link("log ?out")
assert isinstance(link, Link)
assert link == ("log out", "logout")
assert browser.find_link("log off") is None
assert browser.find_link("Logout") is None
link = browser.find_link("test.*")
assert isinstance(link, Link)
assert link == ("test spaces", "test spaces")
links = browser.find_links("logout")
assert all(isinstance(link, Link) for link in links)
assert links == [("log out", "logout")]
links = browser.find_links("log out")
assert all(isinstance(link, Link) for link in links)
assert links == [("log out", "logout")]
links = browser.find_links("log? out")
assert all(isinstance(link, Link) for link in links)
assert links == [("log out", "logout")]
assert browser.find_links("log off") == []
assert browser.find_links("Logout") == []
links = browser.find_links("test.*")
assert all(isinstance(link, Link) for link in links)
assert links == [
("test spaces", "test spaces"),
("test spaces2", "test_spaces"),
]
links = browser.find_links(".*")
assert all(isinstance(link, Link) for link in links)
assert len(links) == 5
from time import sleep
import pytest
from httpx import ReadTimeout
from twill import commands
def test(url: str):
commands.reset_browser()
commands.go(url)
commands.go("/sleep")
commands.find("sorry for the delay")
commands.timeout(0.25) # do not wait for server
with pytest.raises(ReadTimeout):
commands.go("/sleep")
sleep(0.25) # now wait for server to catch up
commands.timeout(900)
commands.timeout(10)
commands.timeout(0)
commands.timeout()
commands.reset_browser(url)
commands.go(url)
"""Test the WSGI support."""
from typing import Callable, Dict, Generator, List, Sequence, Tuple
from twill import browser, commands
"""Intercept HTTP connections that use
`requests <http://docs.python-requests.org/en/latest/>`_.
"""
app_was_hit = set()
Environ = Dict[str, str]
WriteCallable = Callable[[bytes], None]
Headers = List[Tuple[str, str]]
StartResponse = Callable[[str, Headers], WriteCallable]
def simple_app(
_environ: Environ,
start_response: StartResponse,
) -> Sequence[bytes]:
"""Simplest possible application object."""
status = "200 OK"
response_headers = [("Content-type", "text/plain")]
start_response(status, response_headers)
app_was_hit.add("simple_app")
return (b"Hello, World!",)
def write_app(
_environ: Environ,
start_response: StartResponse,
) -> Sequence[bytes]:
"""Simple application using a legacy write callable.
See https://peps.python.org/pep-3333/#the-write-callable.
"""
status = "200 OK"
response_headers = [("Content-type", "text/plain")]
write = start_response(status, response_headers)
app_was_hit.add("write_app")
write(b"Hello, ")
return (b"World!",)
class IteratorApp:
"""Simple application using a custom iterator."""
content = (b"Hello, world!",)
def __init__(
self,
environ: Environ,
start_response: StartResponse,
) -> None:
"""Create this application."""
self.environ = environ
self.start_response = start_response
def __iter__(self) -> Generator[bytes, None, None]:
status = "200 OK"
response_headers = [("Content-type", "text/plain")]
self.start_response(status, response_headers)
app_was_hit.add(self.__class__.__name__)
yield from self.content
def test_simple_app():
app_was_hit.clear()
browser.reset(app=simple_app)
try:
assert not app_was_hit
commands.go("http://localhost:8080/")
commands.show()
commands.find("Hello, World!")
assert "simple_app" in app_was_hit
finally:
browser.reset()
def test_write_app():
app_was_hit.clear()
browser.reset(app=write_app)
try:
assert not app_was_hit
commands.go("http://localhost:8080/")
commands.show()
# next line may be added again once this has been merged:
# https://github.com/encode/httpx/pull/2920
# commands.find("Hello, World!") # noqa: ERA001
assert "write_app" in app_was_hit
finally:
browser.reset()
def test_iterator_app():
browser.reset(app=IteratorApp)
try:
commands.go("http://localhost:80/")
commands.show()
commands.find("Hello, world!")
commands.notfind("!Hello")
assert "IteratorApp" in app_was_hit
finally:
browser.reset()
+16
-0

@@ -7,2 +7,17 @@ .. _changelog:

3.2.1 (released 2023-11-23)
---------------------------
* Increased the default request timeout of the twill browser to 10 seconds
(from 5 seconds in 3.2) and added a command to change the timeout (#18).
3.2 (released 2023-11-02)
-------------------------
* The supported Python versions are now 3.8 to 3.12.
* A new method 'find_links' was added to the twill browser (#17).
* Twill now uses httpx_ instead of requests_.
* WSGI apps are now supported via httpx, wsgi_intercept is not needed anymore.
* We now use 'pyproject.toml' instead of 'setup.py'.
* Type hints and code style have been improved and are checked with ruff.
* Internal code was reformatted using ruff format (compatible with black).
3.1 (released 2022-10-30)

@@ -134,2 +149,3 @@ -------------------------

.. _requests: https://requests.readthedocs.io/
.. _httpx: https://www.python-httpx.org/
.. _mechanize: https://mechanize.readthedocs.io/

@@ -136,0 +152,0 @@ .. _cssselect: https://github.com/scrapy/cssselect

+3
-0

@@ -217,2 +217,5 @@ .. _commands:

**timeout** *[<seconds>]* -- set browser timeout to given number of seconds.
Defaults to 10 seconds. Set to 0 for no timeout.
**sleep** *[<seconds>]* -- sleep the given number of seconds.

@@ -219,0 +222,0 @@ Defaults to 1 second.

+33
-18

@@ -1,28 +0,43 @@

# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
"""Configuration file for the Sphinx documentation builder.
This file only contains a selection of the most common options.
For a full list see the documentation:
https://www.sphinx-doc.org/en/master/usage/configuration.html
"""
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
import os
import sys
sys.path.append(
os.path.join(
os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "src"
)
)
# -- Project information -----------------------------------------------------
project = 'twill'
copyright = '2022, C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al'
author = 'C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al'
def project_version():
"""Fetch version from pyproject.toml file."""
# this also works when the package is not installed
with open("../pyproject.toml") as toml_file:
for line in toml_file:
if line.startswith("version ="):
version = line.split("=")[1].strip().strip('"')
return version
raise Exception("Cannot determine project version")
project = "twill"
author = "C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al"
copyright = "2023, " + author
# The full version, including alpha/beta/rc tags
version = release = '3.1'
version = release = project_version()
language = "en"
# -- General configuration ---------------------------------------------------

@@ -41,3 +56,3 @@

# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

@@ -50,3 +65,3 @@

#
html_theme = 'alabaster'
html_theme = "alabaster"

@@ -53,0 +68,0 @@ # Add any paths that contain custom static files (such as style sheets) here,

@@ -7,3 +7,3 @@ .. _developer:

twill is entirely written in Python. You will need Python 3.6
twill is entirely written in Python. You will need Python 3.8
or newer to use and develop it.

@@ -14,9 +14,8 @@

twill comes with several unit tests. They depend on `pytest`_,
`wsgi_intercept`_ and `Quixote`_. To run them, type 'pytest'
in the top level directory. To run an individual test, you can use
the command 'pytest tests/test_something.py'.
twill comes with several unit tests. They depend on `pytest`_ and
`Quixote`_. To run them, type 'pytest' in the top level directory.
To run an individual test, you can use the command
``pytest tests/test_something.py``.
.. _pytest: https://pytest.org/
.. _wsgi_intercept: https://pypi.org/project/wsgi-intercept/
.. _Quixote: http://quixote.ca/

@@ -35,4 +34,4 @@

Newer versions 1.x, 2.x and 3.x are also Copyright (C) 2007-2022
Ben R. Taylor , Adam V. Brandizzi, Christoph Zwerschke et al.
Newer versions 1.x, 2.x and 3.x are also Copyright (C) 2007-2023
Ben R. Taylor, Adam V. Brandizzi, Christoph Zwerschke et al.

@@ -39,0 +38,0 @@ The newer versions are released under the same `MIT license`_.

@@ -26,2 +26,5 @@ .. _other:

Note that since twill 3.2, WSGI apps are supported directly via https,
so this pacakge is not needed any more for twill itself.
.. _wsgi_intercept: https://pypi.python.org/pypi/wsgi_intercept

@@ -39,3 +42,3 @@

unfortunately development of that project also stalled.
The current version is based on requests_ and lxml_ instead.
The current version is based on httpx_ and lxml_ instead.

@@ -79,3 +82,3 @@ funkload_ is a nifty looking tool that does functional load testing.

.. _Quixote: http://www.mems-exchange.org/software/quixote/
.. _requests: http://docs.python-requests.org/
.. _httpx: https://www.python-httpx.org/
.. _Selenium: http://www.seleniumhq.org/

@@ -82,0 +85,0 @@ .. _twill in Javascript: http://blog.ianbicking.org/twill-in-javascript.html

@@ -19,3 +19,3 @@ .. _overview:

The latest release of twill is twill version 3.0.
The latest release of twill is twill version 3.2.
It is available for `download`_ from the Python Package Index,

@@ -25,3 +25,3 @@ and you can use Python's `pip`_ tool to install or upgrade twill.

twill 3.0 works with Python 3.6 and newer.
twill 3.2 works with Python 3.8 and newer.

@@ -81,4 +81,4 @@ To start using twill, install it and then type ``twill``.

Acknowledgements
----------------
Acknowledgements and History
----------------------------

@@ -117,4 +117,9 @@ In May 2004, Cory Dodt had a great idea with "Python Browser Poseur"

and the release of version 2.0 happened in April 2020 with the approval
of its original author C. Titus Brown.
of its original author C. Titus Brown, and Christoph Zwerschke acting
as new maintainer of the project.
In February 2021, twill 3.0 was released, followed by twill 3.1 in
October 2022, and twill 3.2 in October 2023. These versions do not
support Python 2 and more, adding type hints and more modernizations.
Thanks, all!

@@ -121,0 +126,0 @@

@@ -8,3 +8,3 @@ .. _python-api:

twill is essentially a web browsing and testing tool based on the
`requests`_ and `lxml`_ packages. All twill commands are implemented in
`httpx`_ and `lxml`_ packages. All twill commands are implemented in
the ``commands.py`` file, and pyparsing_ does the work of parsing the

@@ -15,4 +15,4 @@ input and converting it into Python commands (see ``parse.py``).

.. _httpx: https://www.python-httpx.org/
.. _lxml: https://lxml.de/
.. _requests: https://requests.readthedocs.io/
.. _pyparsing: https://github.com/pyparsing/pyparsing

@@ -19,0 +19,0 @@ .. _cmd: https://docs.python.org/3/library/cmd.html

@@ -1,2 +0,1 @@

sphinx>=2.4,<3
sphinx_rtd_theme>=0.4,<1
sphinx>=7,<8

@@ -123,27 +123,30 @@ .. _testing:

You can use `wsgi_intercept`_ for testing `WSGI applications`_.
You can pass a WSGI_ application to the ``reset()`` method of the browser.
HTTP calls will then go to this application "in-process" directly instead
of going over the network. This is particularly useful for unit tests,
where setting up an externally available Web server can be inconvenient.
It provides two functions, `add_wsgi_intercept` and `remove_wsgi_intercept`,
that allow Python applications to redirect HTTP calls into a WSGI application
"in-process", without going via an external Internet call. This is
particularly useful for unit tests, where setting up an externally
available Web server can be inconvenient.
For example, the following code redirects all ``localhost:80`` calls to
the given WSGI app: ::
a simple Flask_ app: ::
def create_app():
return wsgi_app
from flask import Flask
from twill import browser, commands
import wsgi_intercept
app = Flask(__name__)
wsgi_intercept.requests_intercept.install()
wsgi_intercept.add_wsgi_intercept('localhost', 80, create_app)
# your twill tests go here...
wsgi_intercept.remove_wsgi_intercept('localhost', 80)
wsgi_intercept.requests_intercept.uninstall()
See the ``tests/test_wsgi_intercept.py`` unit test for more examples.
@app.route("/")
def hello():
return "Hello World!"
.. _wsgi_intercept: https://pypi.python.org/pypi/wsgi_intercept
.. _WSGI applications: https://www.python.org/dev/peps/pep-0333/
browser.reset(app=app)
commands.go("http://localhost:80")
commands.find("Hello World!")
See the ``tests/test_wsgi`` unit test for more examples.
.. _WSGI: https://peps.python.org/pep-3333/
.. _Flask: https://flask.palletsprojects.com/

@@ -0,1 +1,3 @@

"""A twill extension example."""
from twill import log

@@ -5,4 +7,5 @@

def test(*args):
log.info('function test passed %d args', len(args))
log.info('args are: %s', args)
"""Test passed arguments."""
log.info("function test passed %d args", len(args))
log.info("args are: %s", args)

@@ -9,0 +12,0 @@

@@ -18,5 +18,5 @@ package com.bitmechanic.maxq.generator;

/**
* Generates a test case in Twill script.
* Generates a test case as a twill script.
* Take a look at the IScriptGenerator Interface to see how to specify which one you want.
* The important information is that this class generates a Twill script.
* The important information is that this class generates a twill script.
* @see IScriptGenerator

@@ -23,0 +23,0 @@ * @author Titus Brown titus@caltech.edu

@@ -6,7 +6,8 @@ include MANIFEST.in

include .bumpversion.cfg
include .flake8
include tox.ini
include src/twill/py.typed
exclude .readthedocs.yaml
graft tests

@@ -13,0 +14,0 @@ recursive-include docs *.rst conf.py Makefile make.bat requirements.txt

+47
-17
Metadata-Version: 2.1
Name: twill
Version: 3.1
Summary: twill web browsing and testing language and associated utilities.
Home-page: https://github.com/twill-tools/twill
Download-URL: https://pypi.org/project/twill/
Author: C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al.
Author-email: titus@idyll.org
Maintainer: Christoph Zwerschke
Maintainer-email: cito@online.de
License: MIT
Project-URL: Source, https://github.com/twill-tools/twill
Version: 3.2.1
Summary: A web browsing and testing language
Author: C. Titus Brown, Ben R. Taylor, Christoph Zwerschke
Maintainer-email: Christoph Zwerschke <cito@online.de>
License: The MIT License, https://opensource.org/licenses/MIT
Copyright 2005-2023 by C. Titus Brown, Ben R. Taylor, Christoph Zwerschke
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
Project-URL: Homepage, https://github.com/twill-tools/twill
Project-URL: Issues, https://github.com/twill-tools/twill/issues
Project-URL: Documentation, https://twill-tools.github.io/twill/
Project-URL: Source, https://github.com/twill-tools/twill
Project-URL: ChangeLog, https://twill-tools.github.io/twill/changelog.html
Project-URL: Download, https://pypi.org/project/twill/
Keywords: web,testing,browsing,automation
Classifier: Development Status :: 6 - Mature

@@ -25,4 +45,2 @@ Classifier: Environment :: Console

Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8

@@ -32,10 +50,22 @@ Classifier: Programming Language :: Python :: 3.9

Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Other Scripting Engines
Classifier: Topic :: Internet :: WWW/HTTP
Classifier: Topic :: Software Development :: Testing
Requires-Python: >=3.8
Description-Content-Type: text/markdown
License-File: LICENSE.txt
Requires-Dist: lxml<5,>=4.9
Requires-Dist: httpx<1,>=0.25.0
Requires-Dist: pyparsing<4,>=3.0
Provides-Extra: docs
Requires-Dist: sphinx<6,>=5.2; extra == "docs"
Requires-Dist: sphinx_rtd_theme<2,>=1; extra == "docs"
Provides-Extra: tidy
Requires-Dist: pytidylib<0.4,>=0.3; extra == "tidy"
Provides-Extra: tests
License-File: LICENSE
Requires-Dist: tox<5,>=4; extra == "tests"
Requires-Dist: pytest<8,>=7.4; extra == "tests"
Requires-Dist: pytidylib<0.4,>=0.3; extra == "tests"
Requires-Dist: quixote<4,>=3.6; extra == "tests"

@@ -47,9 +77,9 @@ twill: a simple scripting language for web browsing

The current version 3.1 supports Python 3.6 to 3.11.
The current version 3.2 supports Python 3.8 to 3.12.
See also the [changelog](https://twill-tools.github.io/twill/changelog.html) for a summary of the things that have been changed and improved since version 2.0, and the [acknowledgements](https://twill-tools.github.io/twill/overview.html#acknowledgements) for a short overview of the earlier history of twill.
Take a look at the [changelog](https://twill-tools.github.io/twill/changelog.html) to find a list of all changes and improvements made since version 2. For a brief overview of twill's history starting from its early days, see the [acknowledgements and history](https://twill-tools.github.io/twill/overview.html#acknowledgements-and-history) section.
The full [documentation](https://twill-tools.github.io/twill/) is included in the distribution and provided online.
The full [documentation](https://twill-tools.github.io/twill/) is included in the distribution and provided online via [GitHub](https://twill-tools.github.io/twill/) and [ReadTheDocs](https://twill.readthedocs.io/en/latest/).
Copyright (c) 2005-2022 by C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al.
Copyright (c) 2005-2023 by C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al.

@@ -56,0 +86,0 @@ Newer versions have been created and are maintained by [Christoph Zwerschke](https://github.com/Cito).

@@ -6,9 +6,9 @@ twill: a simple scripting language for web browsing

The current version 3.1 supports Python 3.6 to 3.11.
The current version 3.2 supports Python 3.8 to 3.12.
See also the [changelog](https://twill-tools.github.io/twill/changelog.html) for a summary of the things that have been changed and improved since version 2.0, and the [acknowledgements](https://twill-tools.github.io/twill/overview.html#acknowledgements) for a short overview of the earlier history of twill.
Take a look at the [changelog](https://twill-tools.github.io/twill/changelog.html) to find a list of all changes and improvements made since version 2. For a brief overview of twill's history starting from its early days, see the [acknowledgements and history](https://twill-tools.github.io/twill/overview.html#acknowledgements-and-history) section.
The full [documentation](https://twill-tools.github.io/twill/) is included in the distribution and provided online.
The full [documentation](https://twill-tools.github.io/twill/) is included in the distribution and provided online via [GitHub](https://twill-tools.github.io/twill/) and [ReadTheDocs](https://twill.readthedocs.io/en/latest/).
Copyright (c) 2005-2022 by C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al.
Copyright (c) 2005-2023 by C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al.

@@ -15,0 +15,0 @@ Newer versions have been created and are maintained by [Christoph Zwerschke](https://github.com/Cito).

@@ -1,13 +0,1 @@

[tool:pytest]
testpaths = tests
[tool:mypy]
python_version = 3.9
check_untyped_defs = true
no_implicit_optional = true
strict_optional = false
warn_redundant_casts = true
warn_unused_ignores = true
disallow_untyped_defs = false
[egg_info]

@@ -14,0 +2,0 @@ tag_build =

@@ -1,1 +0,1 @@

"""twill test suite"""
"""The twill test suite."""

@@ -0,1 +1,6 @@

"""Shared test configuration for pytest."""
from io import StringIO
from typing import Generator
import pytest

@@ -6,17 +11,13 @@

@pytest.fixture(scope='session')
def url(request):
@pytest.fixture(scope="session")
def url() -> Generator[str, None, None]:
"""Start a server and returns its URL."""
utils.cd_test_dir()
utils.start_server()
def stop():
utils.stop_server()
utils.pop_test_dir()
request.addfinalizer(stop)
url = utils.get_url()
from twill import set_output
from twill.commands import go, find
from twill.commands import find, go
set_output()

@@ -26,4 +27,5 @@ try:

find("These are the twill tests")
except Exception:
raise RuntimeError("""
except Exception as error: # noqa: BLE001
raise RuntimeError(
"""
***

@@ -34,14 +36,19 @@ Hello! The twill test server is not running or cannot be reached;

***
""")
"""
) from error
return url
yield url
utils.stop_server()
utils.pop_test_dir()
@pytest.fixture()
def out():
from io import StringIO
def output() -> Generator[StringIO, None, None]:
"""Get output from the test."""
from twill import set_output
output = StringIO()
set_output(output)
yield output
with StringIO() as output:
set_output(output)
yield output
set_output()

@@ -1,93 +0,110 @@

"""Simple mock implementation of dnspython to test the twill dns extension"""
"""Simple mock implementation of dnspython to test the twill DNS extension."""
import socket
import enum
import sys
from typing import Any, Dict, List, Optional
mock_records = {
'A': {
'twill-test-1.ignore.idyll.org': '192.168.1.1',
'twill-test-2.ignore.idyll.org': '192.168.1.2',
'twill-test-3.ignore.idyll.org': '192.168.1.1',
class RdataType(enum.IntEnum):
"""DNS Rdata Type."""
A = 1
NS = 2
CNAME = 5
MX = 15
mock_records: Dict[RdataType, Dict[str, str]] = {
RdataType.A: {
"twill-test-1.ignore.idyll.org": "192.168.1.1",
"twill-test-2.ignore.idyll.org": "192.168.1.2",
"twill-test-3.ignore.idyll.org": "192.168.1.1",
},
'CNAME': {
'twill-test-3.ignore.idyll.org': 'twill-test-1.ignore.idyll.org.',
RdataType.CNAME: {
"twill-test-3.ignore.idyll.org": "twill-test-1.ignore.idyll.org.",
},
'MX': {
'twill-test-4.ignore.idyll.org': 'twill-test-2.ignore.idyll.org.',
RdataType.MX: {
"twill-test-4.ignore.idyll.org": "twill-test-2.ignore.idyll.org.",
},
'NS': {
'idyll.org': 'nsa.idyll.org.',
}
RdataType.NS: {
"idyll.org": "nsa.idyll.org.",
},
}
def activate():
"""Activate the mock dns module"""
def activate() -> None:
"""Activate the mock dns module."""
package = sys.modules[__name__]
sys.modules['dns'] = package
for module in 'ipv4 name resolver'.split():
sys.modules[f'dns.{module}'] = package
sys.modules["dns"] = package
for module in "ipv4 name rdatatype resolver".split():
sys.modules[f"dns.{module}"] = package
setattr(package, module, package)
def inet_aton(text):
def inet_aton(text: str) -> str:
"""Convert IPv4 address in text form to network form."""
try:
net = ''.join(chr(int(x)) for x in text.split('.'))
if len(net) != 4:
net = "".join(chr(int(x)) for x in text.split("."))
if len(net) != 4: # noqa: PLR2004
raise ValueError
return net
except (TypeError, ValueError):
raise socket.error('invalid ip address %s' % text)
except (TypeError, ValueError) as error:
raise OSError(f"invalid ip address {text}") from error
return net
def from_text(text):
"""Convert text into a Name object"""
if not text.endswith('.'):
text += '.'
def from_text(text: str) -> str:
"""Convert text into a Name object."""
if not text.endswith("."):
text += "."
return text
class Answer:
""""DNS query result"""
class Result:
"""DNS query result."""
def __init__(self, qtype, result):
self.result = result
if qtype == 'A':
self.address = result
elif qtype in ('CNAME', 'NS'):
self.target = result
elif qtype == 'MX':
self.exchange = result
def __init__(self, qtype: RdataType, value: Any) -> None:
"""Initialize the result."""
self.value = value
if qtype == RdataType.A:
self.address = value
elif qtype in (RdataType.CNAME, RdataType.NS):
self.target = value
elif qtype == RdataType.MX:
self.exchange = value
else:
raise ValueError(f'unknown query type: {qtype}')
raise ValueError(f"unknown query type: {qtype}")
def __str__(self):
return str(self.result)
def __str__(self) -> str:
"""Get a string representation of the result."""
return str(self.value)
Answer = List[Result]
class Resolver:
"""DNS stub resolver"""
"""DNS stub resolver."""
def __init__(self):
self.nameservers = None
def __init__(self) -> None:
"""Initialize the resolver."""
self.nameservers: Optional[List] = None
def resolve(self, qname, qtype='A'):
def resolve(self, qname: str, qtype: RdataType = RdataType.A) -> Answer:
"""Query nameservers to find the answer to the question."""
if self.nameservers:
raise ValueError(f'unknown name servers: {self.nameservers}')
if qtype == 1:
qtype = 'A'
raise ValueError(f"unknown name servers: {self.nameservers}")
try:
records = mock_records[qtype]
except KeyError:
raise ValueError(f'unknown query type: {qtype}')
if qname.endswith('.'):
except KeyError as error:
raise ValueError(f"unknown query type: {qtype}") from error
if qname.endswith("."):
qname = qname[:-1]
try:
results = records[qname]
except KeyError:
raise ValueError(f'unknown query result: {qname} {qtype}')
except KeyError as error:
raise ValueError(
f"unknown query result: {qname} {qtype}"
) from error
if not isinstance(results, list):
results = [results]
return [Answer(qtype, result) for result in results]
results = [results] # type: ignore[assignment]
return [Result(qtype, result) for result in results]

@@ -6,16 +6,27 @@ #!/usr/bin/env python3

import os
from base64 import decodebytes
from time import sleep
from typing import Optional
from quixote.publish import Publisher # type: ignore
from quixote.errors import AccessError # type: ignore
from quixote.session import Session, SessionManager # type: ignore
from quixote.directory import Directory, AccessControlled # type: ignore
from quixote.form import widget # type: ignore
from quixote import ( # type: ignore
get_session, get_session_manager, get_path,
redirect, get_request, get_response)
from quixote import ( # type: ignore[import-untyped]
get_path,
get_request,
get_response,
get_session,
get_session_manager,
redirect,
)
from quixote.directory import ( # type: ignore[import-untyped]
AccessControlled,
Directory,
)
from quixote.errors import AccessError # type: ignore[import-untyped]
from quixote.form import widget # type: ignore[import-untyped]
from quixote.publish import Publisher # type: ignore[import-untyped]
from quixote.session import ( # type: ignore[import-untyped]
Session,
SessionManager,
)
HOST = '127.0.0.1'
HOST = "127.0.0.1"
PORT = 8080

@@ -25,9 +36,11 @@

class AlwaysSession(Session):
"""Session that always saves."""
def __init__(self, session_id):
def __init__(self, session_id: str) -> None:
"""Initialize the session."""
Session.__init__(self, session_id)
self.n = 0
self.visit = 0
def has_info(self):
"""Always save."""
def has_info(self) -> bool:
"""Return true to indicate that it should always save."""
return True

@@ -39,7 +52,7 @@

class UnauthorizedError(AccessError):
"""The request requires user authentication.
"""Error used for Basic Authentication.
The request requires user authentication.
This subclass of AccessError sends a 401 instead of a 403,
hinting that the client should try again with authentication.
(from http://quixote.ca/qx/HttpBasicAuthentication)

@@ -52,21 +65,33 @@ """

def __init__(self, realm='Protected', public_msg=None, private_msg=None):
def __init__(
self,
realm: str = "Protected",
public_msg: Optional[str] = None,
private_msg: Optional[str] = None,
) -> None:
"""Initialize the error."""
self.realm = realm
AccessError.__init__(self, public_msg, private_msg)
def format(self):
def format(self) -> str: # noqa: A003
"""Format the error."""
request = get_request()
request.response.set_header(
'WWW-Authenticate', f'Basic realm="{self.realm}"')
"WWW-Authenticate", f'Basic realm="{self.realm}"'
)
return AccessError.format(self)
def create_publisher():
def create_publisher() -> None:
"""Create a publisher for TwillTest, with session management added on."""
session_manager = SessionManager(session_class=AlwaysSession)
return Publisher(TwillTest(), session_manager=session_manager,
display_exceptions='plain')
return Publisher(
TwillTest(),
session_manager=session_manager,
display_exceptions="plain",
)
def message(session):
def message(session: AlwaysSession) -> str:
"""Create a message with session information."""
return f"""\

@@ -82,3 +107,3 @@ <html>

<p>
Your session ID is {session.id}; this is visit #{session.n}.
Your session ID is {session.id}; this is visit #{session.visit}.
<p>

@@ -102,19 +127,58 @@ You are logged in as "{session.user}".

_q_exports = [
'logout', 'increment', 'incrementfail', "", 'restricted',
'login', ('test spaces', 'test_spaces'), 'test_spaces',
'simpleform', 'getform',
'upload_file', 'http_auth', 'formpostredirect',
'exit', 'multisubmitform', "exception",
"plaintext", "xml",
"testform", "testformaction", "test_radiobuttons",
"test_refresh", "test_refresh2",
"test_refresh3", "test_refresh4", "test_refresh5",
"test_checkbox", "test_simple_checkbox", "echo",
"test_checkboxes", 'test_global_form', "two_forms",
'broken_form_1', 'broken_form_2', 'broken_form_3',
'broken_form_4', 'broken_form_5', 'broken_linktext',
'exit', 'display_post', 'display_environ']
_q_exports = (
"",
"logout",
"increment",
"incrementfail",
"restricted",
"login",
("test spaces", "test_spaces"),
"test_spaces",
"simpleform",
"getform",
"upload_file",
"http_auth",
"formpostredirect",
"exit",
"multisubmitform",
"exception",
"plaintext",
"xml",
"sleep",
"testform",
"testformaction",
"test_radiobuttons",
"test_refresh",
"test_refresh2",
"test_refresh3",
"test_refresh4",
"test_refresh5",
"test_checkbox",
"test_simple_checkbox",
"echo",
"test_checkboxes",
"test_global_form",
"two_forms",
"broken_form_1",
"broken_form_2",
"broken_form_3",
"broken_form_4",
"broken_form_5",
"broken_linktext",
"exit",
"display_post",
"display_environ",
)
def test_global_form(self):
def __init__(self) -> None:
"""Initialize the application."""
self.restricted = Restricted()
self.http_auth = HttpAuthRestricted()
def exit(self) -> None: # noqa: A003
"""Exit the application."""
raise SystemExit
def test_global_form(self) -> str:
"""Test the global form."""
return """

@@ -146,28 +210,22 @@ <html>

def display_post(self):
s = ""
request = get_request()
for k, v in request.form.items():
s += f"k: '''{k}''' : '''{v}'''<p>\n"
return s
def display_post(self) -> str:
"""Show the form items."""
return "".join(
f"k: '''{k}''' : '''{v}'''<p>\n"
for k, v in get_request().form.items()
)
def display_environ(self):
s = ""
request = get_request()
for k, v in request.environ.items():
s += f"k: '''{k}''' : '''{v}'''<p>\n"
return s
def display_environ(self) -> str:
"""Show the environment variables."""
return "".join(
f"k: '''{k}''' : '''{v}'''<p>\n"
for k, v in get_request().environ.items()
)
def exit(self):
raise SystemExit
def _q_index(self) -> str:
"""Show index page."""
return message(get_session())
def __init__(self):
self.restricted = Restricted()
self.http_auth = HttpAuthRestricted()
def _q_index(self):
session = get_session()
return message(session)
def broken_form_1(self):
def broken_form_1(self) -> str:
"""Get broken form 1."""
return """\

@@ -178,3 +236,4 @@ <form>

def broken_form_2(self):
def broken_form_2(self) -> str:
"""Get broken form 2."""
return """\

@@ -191,3 +250,4 @@ <form>

def broken_form_3(self):
def broken_form_3(self) -> str:
"""Get broken form 3."""
return """\

@@ -203,3 +263,4 @@ <table>

def broken_form_4(self):
def broken_form_4(self) -> str:
"""Get broken form 4."""
return """\

@@ -214,3 +275,4 @@ <font>

def broken_form_5(self):
def broken_form_5(self) -> str:
"""Get broken form 5."""
return """\

@@ -232,3 +294,4 @@ <div id="loginform">

def broken_linktext(self):
def broken_linktext(self) -> str:
"""Get broken link text."""
return """

@@ -240,4 +303,4 @@ <a href="/">

def test_refresh(self):
"""test simple refresh"""
def test_refresh(self) -> str:
"""Test simple refresh."""
return """\

@@ -248,4 +311,4 @@ <meta http-equiv="refresh" content="2; url=./login">

def test_refresh2(self):
"""test refresh with upper case"""
def test_refresh2(self) -> str:
"""Test refresh with upper case."""
return """\

@@ -256,4 +319,4 @@ <META HTTP-EQUIV="REFRESH" CONTENT="2; URL=./login">

def test_refresh3(self):
"""test circular refresh"""
def test_refresh3(self) -> str:
"""Test circular refresh."""
return """\

@@ -264,4 +327,4 @@ <meta http-equiv="refresh" content="2; url=./test_refresh3">

def test_refresh4(self):
"""test refresh together with similar meta tags"""
def test_refresh4(self) -> str:
"""Test refresh together with similar meta tags."""
return """\

@@ -281,4 +344,4 @@ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">

def test_refresh5(self):
"""check for situation where given URL is quoted."""
def test_refresh5(self) -> str:
"""Check for situation where given URL is quoted."""
return """\

@@ -298,27 +361,39 @@ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">

def exception(self):
raise Exception("500 error -- fail out!")
def exception(self) -> None:
"""Raise a server error."""
raise RuntimeError("500 error -- fail out!")
def test_spaces(self):
def test_spaces(self) -> str:
"""Test spaces."""
return "success"
def increment(self):
def sleep(self) -> str:
"""Test timeouts."""
sleep(0.5)
return "sorry for the delay"
def increment(self) -> str:
"""Visit session."""
session = get_session()
session.n += 1
session.visit += 1
return message(session)
def incrementfail(self):
def incrementfail(self) -> str:
"""Visit session with failure."""
session = get_session()
session.n += 1
raise Exception(message(session))
session.visit += 1
raise RuntimeError(message(session))
def login(self):
def login(self) -> str:
"""Test login."""
request = get_request()
username_widget = widget.StringWidget(name='username', value='')
submit_widget = widget.SubmitWidget(name='submit', value='submit me')
username_widget = widget.StringWidget(name="username", value="")
submit_widget = widget.SubmitWidget(name="submit", value="submit me")
submit_widget2 = widget.SubmitWidget(
name='nosubmit2', value="don't submit")
name="nosubmit2", value="don't submit"
)
if request.form:
assert not submit_widget2.parse(request)
if submit_widget2.parse(request):
raise RuntimeError("Cannot parse request.")
username = username_widget.parse(request)

@@ -328,5 +403,5 @@ if username:

session.set_user(username)
return redirect('./')
return redirect("./")
image_submit = '''<input type=image name='submit you' src=DNE.gif>'''
image_submit = """<input type=image name='submit you' src=DNE.gif>"""

@@ -338,24 +413,29 @@ login = username_widget.render()

def simpleform(self):
"""No submit button..."""
def simpleform(self) -> str:
"""Test non-existing submit button."""
request = get_request()
s1 = widget.StringWidget(name='n', value='').parse(request)
s2 = widget.StringWidget(name='n2', value='').parse(request)
s1 = widget.StringWidget(name="n", value="").parse(request)
s2 = widget.StringWidget(name="n2", value="").parse(request)
return (f"{s1} {s2} "
"<form method=POST>"
"<input type=text name=n><input type=text name=n2>"
"</form>")
return (
f"{s1} {s2} "
"<form method=POST>"
"<input type=text name=n><input type=text name=n2>"
"</form>"
)
def getform(self):
"""Get method..."""
return ("<form method=GET><input type=hidden name=n value=v>"
"<input type=submit value=send></form>")
def getform(self) -> str:
"""Test form with get method."""
return (
"<form method=GET><input type=hidden name=n value=v>"
"<input type=submit value=send></form>"
)
def multisubmitform(self):
def multisubmitform(self) -> str:
"""Test form with multiple submit buttons."""
request = get_request()
submit1 = widget.SubmitWidget('sub_a', value='sub_a')
submit2 = widget.SubmitWidget('sub_b', value='sub_b')
submit1 = widget.SubmitWidget("sub_a", value="sub_a")
submit2 = widget.SubmitWidget("sub_b", value="sub_b")

@@ -373,6 +453,6 @@ s = ""

if not used:
assert False
raise RuntimeError("Not button was used.")
# print out the referer, too.
referer = request.environ.get('HTTP_REFERER')
referer = request.environ.get("HTTP_REFERER")
if referer:

@@ -384,9 +464,10 @@ s += f"<p>referer: {referer}"

def testformaction(self):
def testformaction(self) -> str:
"""Test form actions."""
request = get_request()
keys = [k for k in request.form if request.form[k]]
keys.sort()
keys = sorted(k for k in request.form if request.form[k])
return "==" + " AND ".join(keys) + "=="
def testform(self):
def testform(self) -> str:
"""Test form."""
request = get_request()

@@ -398,17 +479,17 @@

if request.form and 'selecttest' in request.form:
values = request.form['selecttest']
if request.form and "selecttest" in request.form:
values = request.form["selecttest"]
if isinstance(values, str):
values = [values]
values = ' AND '.join(values)
values = " AND ".join(values)
s += f"SELECTTEST: =={values}==<p>"
if request.form:
names = []
for name in ('item', 'item_a', 'item_b', 'item_c'):
items = []
for name in ("item", "item_a", "item_b", "item_c"):
if request.form.get(name):
value = request.form[name]
names.append(f'{name}={value}')
names = ' AND '.join(names)
s += f"NAMETEST: =={names}==<p>"
items.append(f"{name}={value}")
values = " AND ".join(items)
s += f"NAMETEST: =={values}==<p>"

@@ -438,9 +519,10 @@ return f"""\

def two_forms(self):
def two_forms(self) -> str:
"""Test two forms."""
request = get_request()
if request.form:
form = request.form.get('form')
item = request.form.get('item')
s = f'FORM={form} ITEM={item}'
form = request.form.get("form")
item = request.form.get("item")
s = f"FORM={form} ITEM={item}"
else:

@@ -464,8 +546,9 @@ s = "NO FORM"

def test_checkbox(self):
def test_checkbox(self) -> str:
"""Test single checkbox."""
request = get_request()
s = ""
if request.form and 'checkboxtest' in request.form:
value = request.form['checkboxtest']
if request.form and "checkboxtest" in request.form:
value = request.form["checkboxtest"]
if not isinstance(value, str):

@@ -487,10 +570,11 @@ value = value[0]

def test_checkboxes(self):
def test_checkboxes(self) -> str:
"""Test multiple checkboxes."""
request = get_request()
s = ""
if request.form and 'checkboxtest' in request.form:
value = request.form['checkboxtest']
if request.form and "checkboxtest" in request.form:
value = request.form["checkboxtest"]
if not isinstance(value, str):
value = ','.join(value)
value = ",".join(value)

@@ -509,8 +593,9 @@ s += f"CHECKBOXTEST: =={value}==<p>"

def test_simple_checkbox(self):
def test_simple_checkbox(self) -> str:
"""Test simple checkbox."""
request = get_request()
s = ""
if request.form and 'checkboxtest' in request.form:
value = request.form['checkboxtest']
if request.form and "checkboxtest" in request.form:
value = request.form["checkboxtest"]
if not isinstance(value, str):

@@ -531,10 +616,11 @@ value = value[0]

def test_radiobuttons(self):
def test_radiobuttons(self) -> str:
"""Test radio buttons."""
request = get_request()
s = ""
if request.form and 'radiobuttontest' in request.form:
value = request.form['radiobuttontest']
if request.form and "radiobuttontest" in request.form:
value = request.form["radiobuttontest"]
if not isinstance(value, str):
value = ','.join(value)
value = ",".join(value)

@@ -553,3 +639,3 @@ s += f"RADIOBUTTONTEST: =={value}==<p>"

def formpostredirect(self):
def formpostredirect(self) -> str:
"""Test redirect after a form POST."""

@@ -565,13 +651,11 @@ request = get_request()

"""
return redirect(get_path(1) + '/')
return redirect(get_path(1) + "/")
def logout(self):
# expire session
session_manager = get_session_manager()
session_manager.expire_session()
def logout(self) -> str:
"""Test logout."""
get_session_manager().expire_session()
return redirect(get_path(1) + "/") # back to index page
# redirect to index page.
return redirect(get_path(1) + '/')
def plaintext(self):
def plaintext(self) -> str:
"""Test plain text response."""
response = get_response()

@@ -581,3 +665,4 @@ response.set_content_type("text/plain")

def xml(self):
def xml(self) -> str:
"""Test XML response."""
response = get_response()

@@ -587,15 +672,15 @@ response.set_content_type("text/xml")

def echo(self):
def echo(self) -> str:
"""Show form content."""
request = get_request()
if request.form and 'q' in request.form:
return request.form['q']
if request.form and "q" in request.form:
return request.form["q"]
return "<html><body>No Content</body></html>"
def upload_file(self):
def upload_file(self) -> str:
"""Test file upload."""
request = get_request()
if request.form:
contents = request.form['upload'].fp.read()
return contents
else:
return """"\
return request.form["upload"].fp.read()
return """\
<form enctype=multipart/form-data method=POST>

@@ -608,6 +693,8 @@ <input type=file name=upload>

class Restricted(AccessControlled, Directory):
"""A directory with restricted access."""
_q_exports = [""]
_q_exports = ("",)
def _q_access(self):
def _q_access(self) -> None:
"""Check access."""
session = get_session()

@@ -617,3 +704,4 @@ if not session.user:

def _q_index(self):
def _q_index(self) -> str:
"""Show index page."""
return "you made it!"

@@ -623,18 +711,20 @@

class HttpAuthRestricted(AccessControlled, Directory):
"""A directory with restricted access using Basic Authentication."""
_q_exports = [""]
_q_exports = ("",)
def _q_access(self):
def _q_access(self) -> None:
"""Check access."""
r = get_request()
login = passwd = None
ha = r.get_environ('HTTP_AUTHORIZATION', None)
ha = r.get_environ("HTTP_AUTHORIZATION", None)
if ha:
auth_type, auth_string = ha.split(None, 1)
if auth_type.lower() == 'basic':
auth_string = decodebytes(auth_string.encode('utf-8'))
login, passwd = auth_string.split(b':', 1)
login = login.decode('utf-8')
passwd = passwd.decode('utf-8')
if (login, passwd) != ('test', 'password'):
if auth_type.lower() == "basic":
auth_string = decodebytes(auth_string.encode("utf-8"))
login, passwd = auth_string.split(b":", 1)
login = login.decode("utf-8")
passwd = passwd.decode("utf-8")
if (login, passwd) != ("test", "password"):
passwd = None

@@ -652,13 +742,17 @@

def _q_index(self):
def _q_index(self) -> str:
"""Show index page."""
return "you made it!"
if __name__ == '__main__':
from quixote.server.simple_server import run # type: ignore
port = int(os.environ.get('TWILL_TEST_PORT', PORT))
print(f'starting twill test server on port {port}.')
if __name__ == "__main__":
from quixote.server.simple_server import ( # type: ignore[import-untyped]
run,
)
port = int(os.environ.get("TWILL_TEST_PORT", PORT))
print(f"starting twill test server on port {port}.")
try:
run(create_publisher, host=HOST, port=port)
except KeyboardInterrupt:
pass
print("Keyboard interrupt ignored.")
from .utils import execute_script
def test(url):
execute_script('test_back.twill', initial_url=url)
def test(url: str):
execute_script("test_back.twill", initial_url=url)

@@ -7,3 +7,3 @@ """

import os
from pathlib import Path

@@ -13,13 +13,10 @@ from .utils import execute_script, test_dir

def test(url):
def test(url: str):
inp = "unique1\nunique2\n"
execute_script('test_basic.twill', inp, initial_url=url)
execute_script("test_basic.twill", inp, initial_url=url)
def teardown_module():
for filename in 'test_basic.cookies', 'test_basic.out':
try:
os.unlink(os.path.join(test_dir, filename))
except OSError:
pass
for filename in "test_basic.cookies", "test_basic.out":
Path(test_dir, filename).unlink(missing_ok=True)
from twill import browser, commands
def test_links_parsing(url):
def test_links_parsing(url: str): # noqa: ARG001
"""Test parsing a link text inside a span."""
commands.go('/broken_linktext')
commands.go("/broken_linktext")
# make sure link text is found even if it is nested
commands.follow('some text')
commands.follow("some text")
def test_fixing_forms(url):
def test_fixing_forms(url: str):
"""Test parsing of broken HTML forms."""
commands.go(url)
commands.go('/broken_form_1')
assert len(browser.forms) == 1, 'can fix form 1'
commands.go("/broken_form_1")
assert len(browser.forms) == 1, "can fix form 1"
commands.go('/broken_form_2')
assert len(browser.forms) == 1, 'can fix form 2'
commands.go("/broken_form_2")
assert len(browser.forms) == 1, "can fix form 2"
commands.go('/broken_form_3')
assert len(browser.forms) == 1, 'can fix form 3'
commands.go("/broken_form_3")
assert len(browser.forms) == 1, "can fix form 3"
commands.go('/broken_form_4')
assert len(browser.forms) == 2, 'can fix form 4'
commands.go("/broken_form_4")
assert len(browser.forms) == 2, "can fix form 4"
commands.go('/broken_form_5')
assert len(browser.forms) == 1, 'can fix form 5'
commands.go("/broken_form_5")
assert len(browser.forms) == 1, "can fix form 5"
assert set(browser.form().inputs.keys()) == set(
'username password login'.split()), 'should get proper fields'
"username password login".split()
), "should get proper fields"

@@ -6,4 +6,3 @@ """Test the check_links extension."""

def test(url):
execute_script('test_check_links.twill', initial_url=url)
def test(url: str):
execute_script("test_check_links.twill", initial_url=url)

@@ -1,3 +0,2 @@

from pytest import raises
import pytest
import twill

@@ -8,52 +7,52 @@ from twill import browser, commands, namespaces

def test_select_multiple(url):
def test_select_multiple(url: str):
namespaces.new_local_dict()
twill.commands.reset_browser()
commands.reset_browser()
with raises(TwillException):
browser.title
with pytest.raises(TwillException):
browser.title # noqa: B018
commands.go(url)
commands.go('/test_checkboxes')
commands.go("/test_checkboxes")
commands.fv('1', 'checkboxtest', 'one')
commands.fv('1', 'checkboxtest', 'two')
commands.fv('1', 'checkboxtest', 'three')
commands.fv("1", "checkboxtest", "one")
commands.fv("1", "checkboxtest", "two")
commands.fv("1", "checkboxtest", "three")
commands.fv('1', 'checkboxtest', '-one')
commands.fv('1', 'checkboxtest', '-two')
commands.fv('1', 'checkboxtest', '-three')
commands.fv("1", "checkboxtest", "-one")
commands.fv("1", "checkboxtest", "-two")
commands.fv("1", "checkboxtest", "-three")
commands.submit()
assert 'CHECKBOXTEST' not in browser.html
assert "CHECKBOXTEST" not in browser.html
commands.fv('1', 'checkboxtest', '+one')
commands.fv('1', 'checkboxtest', '+two')
commands.fv('1', 'checkboxtest', '+three')
commands.fv("1", "checkboxtest", "+one")
commands.fv("1", "checkboxtest", "+two")
commands.fv("1", "checkboxtest", "+three")
commands.submit()
assert 'CHECKBOXTEST: ==one,two,three==' in browser.html
assert "CHECKBOXTEST: ==one,two,three==" in browser.html
commands.fv('1', 'checkboxtest', '-one')
commands.fv('1', 'checkboxtest', '-two')
commands.fv('1', 'checkboxtest', '-three')
commands.fv("1", "checkboxtest", "-one")
commands.fv("1", "checkboxtest", "-two")
commands.fv("1", "checkboxtest", "-three")
commands.submit()
assert 'CHECKBOXTEST' not in browser.html
assert "CHECKBOXTEST" not in browser.html
def test_select_single(url):
def test_select_single(url: str):
namespaces.new_local_dict()
twill.commands.reset_browser()
commands.reset_browser()
browser = twill.browser
with raises(TwillException):
browser.title
with pytest.raises(TwillException):
browser.title # noqa: B018s
commands.go(url)
commands.go('/test_checkboxes')
commands.go("/test_checkboxes")
# Should not be able to use a bool style for when
# there are multiple checkboxes
for x in ('1', '0', 'True', 'False'):
with raises(KeyError):
commands.fv('1', 'checkboxtest', x)
for x in ("1", "0", "True", "False"):
with pytest.raises(KeyError):
commands.fv("1", "checkboxtest", x)
from . import mock_dns
from .utils import execute_script

@@ -10,3 +9,3 @@

def test(url):
execute_script('test_dns.twill', initial_url=url)
def test(url: str):
execute_script("test_dns.twill", initial_url=url)
from .utils import execute_script
def test(url):
execute_script('test_equiv_refresh.twill', initial_url=url)
def test(url: str):
execute_script("test_equiv_refresh.twill", initial_url=url)
from .utils import execute_script
def test(url):
execute_script('test_find.twill', initial_url=url)
def test(url: str):
execute_script("test_find.twill", initial_url=url)

@@ -1,5 +0,4 @@

from pytest import raises
import pytest
import twill
from twill import namespaces, commands
from twill import commands, namespaces
from twill.errors import TwillAssertionError, TwillException

@@ -10,42 +9,42 @@

def test(url):
def test(url: str):
# test empty page title
namespaces.new_local_dict()
twill.commands.reset_browser()
commands.reset_browser()
browser = twill.browser
with raises(TwillException):
browser.title
with pytest.raises(TwillException):
browser.title # noqa: B018
# now test a few special cases
commands.go(url)
assert browser.title == 'Hello, world!'
assert browser.title == "Hello, world!"
commands.go('/login')
commands.go("/login")
assert browser.title is None
# test no matching forms
with raises(TwillAssertionError):
commands.fv('2', 'submit', '1')
with pytest.raises(TwillAssertionError):
commands.fv("2", "submit", "1")
# test regex match
commands.fv('1', '.*you', '1')
commands.fv("1", ".*you", "1")
# test ambiguous match to value
commands.go('/testform')
commands.fv('1', 'selecttest', 'val')
commands.fv('1', 'selecttest', 'value1')
commands.fv('1', 'selecttest', 'selvalue1')
commands.formclear('1')
commands.go("/testform")
commands.fv("1", "selecttest", "val")
commands.fv("1", "selecttest", "value1")
commands.fv("1", "selecttest", "selvalue1")
commands.formclear("1")
commands.showforms()
with raises(TwillException):
commands.fv('1', 'selecttest', 'value')
with pytest.raises(TwillException):
commands.fv("1", "selecttest", "value")
# test ambiguous match to name
commands.go('/testform')
with raises(TwillException):
commands.fv('1', 'item_', 'value')
commands.go("/testform")
with pytest.raises(TwillException):
commands.fv("1", "item_", "value")
with raises(TwillException):
commands.formfile('1', 'selecttest', 'null')
with pytest.raises(TwillException):
commands.formfile("1", "selecttest", "null")
# test the twill script.
execute_script('test_form.twill', initial_url=url)
execute_script("test_form.twill", initial_url=url)
from .utils import execute_script
def test(url):
def test(url: str):
"""Test the 'formfill' extension stuff."""
execute_script('test_formfill.twill', initial_url=url)
execute_script("test_formfill.twill", initial_url=url)
import os
from pathlib import Path

@@ -7,16 +8,19 @@ from twill.utils import gather_filenames

def test_gather_dir():
this_dir = os.path.dirname(__file__)
test_gather = os.path.join(this_dir, 'test_gather')
cwd = os.getcwd()
test_dir = Path(__file__).parent / "test_gather"
cwd = Path.cwd()
os.chdir(test_gather)
os.chdir(test_dir)
try:
files = gather_filenames(('.',))
if os.sep != '/':
files = [f.replace(os.sep, '/') for f in files]
assert sorted(files) == sorted([
'./00-testme/x-script.twill',
'./01-test/b.twill', './02-test2/c.twill',
'./02-test2/02-subtest/d.twill']), files
files = gather_filenames((".",))
if os.sep != "/":
files = [f.replace(os.sep, "/") for f in files]
assert sorted(files) == sorted(
(
"./00-testme/x-script.twill",
"./01-test/b.twill",
"./02-test2/c.twill",
"./02-test2/02-subtest/d.twill",
)
), files
finally:
os.chdir(cwd)

@@ -0,9 +1,11 @@

from io import StringIO
from .utils import execute_script
def test(url, out):
execute_script('test_global_form.twill', initial_url=url)
out = out.getvalue()
def test(url: str, output: StringIO):
execute_script("test_global_form.twill", initial_url=url)
out = output.getvalue()
assert "Form #1" in out
assert "Form name=login (#2)" in out
assert "Form name=login (#3)" in out

@@ -1,12 +0,9 @@

import os
import sys
from io import StringIO
from pathlib import Path
from pytest import raises
from twill import commands
from twill import namespaces
import pytest
import twill.parse
from twill import commands, namespaces
from twill.errors import TwillAssertionError, TwillNameError
import twill.parse

@@ -19,4 +16,4 @@ from .utils import execute_script, test_dir

def setup_module():
global _log_commands
_log_commands = twill.parse.log_commands(True)
global _log_commands # noqa: PLW0603
_log_commands = twill.parse.log_commands(True) # noqa: FBT003

@@ -28,3 +25,3 @@

def test(url):
def test(url: str):
# capture output

@@ -34,6 +31,6 @@ fp = StringIO()

twill.parse.execute_string('code 200', initial_url=url)
twill.parse.execute_string("code 200", initial_url=url)
# from file
execute_script('test_go.twill', initial_url=url)
execute_script("test_go.twill", initial_url=url)

@@ -43,12 +40,12 @@ twill.set_output(None)

# from stdin
filename = os.path.join(test_dir, 'test_go.twill')
stdin, sys.stdin = sys.stdin, open(filename)
try:
execute_script('-', initial_url=url)
finally:
sys.stdin = stdin
path = Path(test_dir, "test_go.twill")
with open(path) as inp:
stdin, sys.stdin = sys.stdin, inp
try:
execute_script("-", initial_url=url) # from stdin
finally:
sys.stdin = stdin
# from parse.execute_file
twill.parse.execute_file('test_go_exit.twill', initial_url=url)
twill.parse.execute_file("test_go_exit.twill", initial_url=url)

@@ -61,12 +58,12 @@ # also test some failures

# failed assert in a script
with raises(TwillAssertionError):
twill.parse.execute_file('test_go_fail.twill', initial_url=url)
with pytest.raises(TwillAssertionError):
twill.parse.execute_file("test_go_fail.twill", initial_url=url)
commands.go(url)
with raises(TwillAssertionError):
with pytest.raises(TwillAssertionError):
commands.code(400)
# no such command (NameError)
with raises(TwillNameError):
twill.parse.execute_file('test_go_fail2.twill', initial_url=url)
with pytest.raises(TwillNameError):
twill.parse.execute_file("test_go_fail2.twill", initial_url=url)
finally:

@@ -80,15 +77,15 @@ sys.stderr = stderr

with raises(TwillAssertionError):
twill.parse.execute_command('url', ('not this',), gd, ld, "anony")
with pytest.raises(TwillAssertionError):
twill.parse.execute_command("url", ("not this",), gd, ld, "anony")
with raises(TwillAssertionError):
commands.follow('no such link')
with pytest.raises(TwillAssertionError):
commands.follow("no such link")
with raises(TwillAssertionError):
commands.find('no such link')
with pytest.raises(TwillAssertionError):
commands.find("no such link")
with raises(TwillAssertionError):
commands.notfind('Hello')
with pytest.raises(TwillAssertionError):
commands.notfind("Hello")
with raises(SystemExit):
twill.parse.execute_command('exit', ('0',), gd, ld, "anony")
with pytest.raises(SystemExit):
twill.parse.execute_command("exit", ("0",), gd, ld, "anony")
from .utils import execute_script
def test(url):
execute_script('test_headers.twill', initial_url=url)
def test(url: str):
execute_script("test_headers.twill", initial_url=url)
from .utils import execute_script
def test(url):
execute_script('test_http_auth.twill', initial_url=url)
def test(url: str):
execute_script("test_http_auth.twill", initial_url=url)
from .utils import execute_script
def test(url):
execute_script('test_http_codes.twill', initial_url=url)
def test(url: str):
execute_script("test_http_codes.twill", initial_url=url)
from .utils import execute_script
def test(url):
execute_script('test_info.twill', initial_url=url)
def test(url: str):
execute_script("test_info.twill", initial_url=url)
from .utils import execute_script
def test_match_parse(url):
execute_script('test_match_parse.twill', initial_url=url)
def test_match_parse(url: str):
execute_script("test_match_parse.twill", initial_url=url)
"""Test a boatload of miscellaneous functionality."""
import sys
from io import StringIO
from pytest import raises
import pytest
from twill import browser, commands

@@ -15,3 +13,3 @@ from twill.errors import TwillException

assert browser is not None
for attr in ('go', 'reset', 'submit'):
for attr in ("go", "reset", "submit"):
assert hasattr(browser, attr)

@@ -22,9 +20,9 @@

assert browser is not None
for attr in ('go', 'reset', 'submit'):
for attr in ("go", "reset", "submit"):
assert hasattr(browser, attr)
with raises(TwillException): # no page and thus no status code yet
with pytest.raises(TwillException): # no page and thus no status code yet
assert browser.code
with raises(TwillException): # no page and thus no form yet
with pytest.raises(TwillException): # no page and thus no form yet
browser.submit()

@@ -34,8 +32,8 @@

try:
with raises(TwillException):
browser.go('http://0.0.0.0') # URL parses, but is invalid
with pytest.raises(TwillException):
browser.go("http://0.0.0.0") # URL parses, but is invalid
finally:
sys.stderr = stderr
with raises(SystemExit):
with pytest.raises(SystemExit):
commands.exit()

@@ -46,27 +44,27 @@

with raises(TwillException): # no page, cannot tidy yet
with pytest.raises(TwillException): # no page, cannot tidy yet
commands.tidy_ok()
with raises(TwillException): # no page, cannot show yet
with pytest.raises(TwillException): # no page, cannot show yet
commands.show()
commands.debug('http', '1')
commands.debug('http', '0')
commands.debug('http', '+')
commands.debug('http', '-')
commands.debug("http", "1")
commands.debug("http", "0")
commands.debug("http", "+")
commands.debug("http", "-")
commands.debug('commands', '0')
commands.debug('commands', '1')
with raises(TwillException):
commands.debug('nada', '1')
commands.debug("commands", "0")
commands.debug("commands", "1")
with pytest.raises(TwillException):
commands.debug("nada", "1")
commands.config()
commands.config('readonly_controls_writeable')
commands.config("readonly_controls_writeable")
commands.config('readonly_controls_writeable', 1)
commands.config('readonly_controls_writeable', 'on')
with raises(TwillException):
commands.config('readonly_controls_writeable', 'nada')
commands.config("readonly_controls_writeable", 1)
commands.config("readonly_controls_writeable", "on")
with pytest.raises(TwillException):
commands.config("readonly_controls_writeable", "nada")
commands.run("print('Hello!')")
from .utils import execute_script
def test(url):
execute_script('test_multisub.twill', initial_url=url)
def test(url: str):
execute_script("test_multisub.twill", initial_url=url)

@@ -1,4 +0,2 @@

from pytest import raises
import twill
import pytest
from twill import browser, commands, namespaces

@@ -8,18 +6,18 @@ from twill.errors import TwillException

def test_switch_buttons(url):
def test_switch_buttons(url: str):
namespaces.new_local_dict()
twill.commands.reset_browser()
commands.reset_browser()
with raises(TwillException):
browser.title
with pytest.raises(TwillException):
browser.title # noqa: B018
commands.go(url)
commands.go('/test_radiobuttons')
commands.go("/test_radiobuttons")
commands.submit()
assert 'RADIOBUTTONTEST' not in browser.html
assert "RADIOBUTTONTEST" not in browser.html
for x in ('one', 'two', 'three'):
commands.fv('1', 'radiobuttontest', x)
for x in ("one", "two", "three"):
commands.fv("1", "radiobuttontest", x)
commands.submit()
assert f'RADIOBUTTONTEST: =={x}==' in browser.html
assert f"RADIOBUTTONTEST: =={x}==" in browser.html
"""Same as test_basic, but using the command interpreter."""
import os
import sys
from pathlib import Path
from pytest import raises
import pytest
from twill import __url__ as twill_url
from twill import __version__ as twill_version
from twill.errors import TwillNameError
from twill.errors import TwillNameError
from .utils import execute_shell, test_dir
python_version = sys.version.split(None, 1)[0]
def test_shell_specific(url):
execute_shell('test_shell.twill', initial_url=url, fail_on_unknown=True)
def test_shell_specific(url: str):
execute_shell("test_shell.twill", initial_url=url, fail_on_unknown=True)
text = Path(test_dir, "test_shell.out").read_text()
lines = [line for line in text.splitlines() if line and "===" not in line]
expected_lines = [
f"twill version: {twill_version}",
f"Python Version: {python_version}",
f"See {twill_url} for more info.",
"What do YOU think the command 'help' does?!?",
"Help for command exit:",
">> exit [<code>]",
' Exit twill with given exit code (default 0, "no error").',
"Print version information.",
"Imported extension module 'shell_test'.",
"testing extension",
"raising errors",
"ERROR: The flag has not been set",
"ERROR: The flag has been set",
]
def test_shell_fail(url):
with raises(TwillNameError):
execute_shell('test_shell_fail.twill', initial_url=url,
fail_on_unknown=True)
assert lines == expected_lines
def test_most_commands(url):
def test_shell_fail(url: str):
with pytest.raises(TwillNameError):
execute_shell(
"test_shell_fail.twill",
initial_url=url,
fail_on_unknown=True,
)
def test_most_commands(url: str):
inp = "unique1\nunique2\n"
execute_shell('test_basic.twill', inp, initial_url=url)
execute_shell("test_basic.twill", inp, initial_url=url)
def teardown_module():
for filename in 'test_basic.cookies', 'test_basic.out', 'test_shell.out':
try:
os.unlink(os.path.join(test_dir, filename))
except OSError:
pass
for filename in "test_basic.cookies", "test_basic.out", "test_shell.out":
Path(test_dir, filename).unlink(missing_ok=True)

@@ -1,3 +0,2 @@

from pytest import raises
import pytest
from twill import commands

@@ -9,9 +8,9 @@ from twill.errors import TwillException

def test(url):
def test(url: str):
commands.show()
commands.show('html')
commands.show('links')
with raises(TwillException, match='Cannot show "nonsense".'):
commands.show('nonsense')
commands.show("html")
commands.show("links")
with pytest.raises(TwillException, match='Cannot show "nonsense".'):
commands.show("nonsense")
execute_script('test_show.twill', initial_url=url)
execute_script("test_show.twill", initial_url=url)

@@ -7,6 +7,7 @@ """Test the utils.run_tidy function.

import pytest
from twill import utils
from twill.commands import config
from twill.errors import TwillException
bad_html = """<a href="test">you</a> <b>hello."""

@@ -16,13 +17,13 @@

def setup_module():
config('require_tidy', 1)
config("require_tidy", 1)
def teardown_module():
config('require_tidy', 0)
config("require_tidy", 0)
def test_bad_html():
(output, errors) = utils.run_tidy(bad_html)
output, errors = utils.run_tidy(bad_html)
assert errors
(output, errors) = utils.run_tidy(output)
output, errors = utils.run_tidy(output)
assert not errors

@@ -34,8 +35,4 @@

try:
try:
with pytest.raises(TwillException, match="PyTidyLib is not installed"):
utils.run_tidy(bad_html)
except Exception:
pass
else:
assert False, 'bad HTML should raise error'
finally:

@@ -46,3 +43,3 @@ utils.tidylib = tidylib

def test_no_tidylib_but_not_required():
config('require_tidy', 0)
config("require_tidy", 0)
tidylib, utils.tidylib = utils.tidylib, None

@@ -56,20 +53,20 @@ try:

assert errors is None
config('require_tidy', 1)
config("require_tidy", 1)
def test_tidy_options():
good_content = '<h1>Hello, World!</h1>'
(output, errors) = utils.run_tidy(good_content)
good_content = "<h1>Hello, World!</h1>"
output, errors = utils.run_tidy(good_content)
assert errors
config('tidy_show_body_only', 1)
(output, errors) = utils.run_tidy(good_content)
config("tidy_show_body_only", 1)
output, errors = utils.run_tidy(good_content)
assert not errors
config('tidy_show_body_only', 0)
(output, errors) = utils.run_tidy(good_content)
config("tidy_show_body_only", 0)
output, errors = utils.run_tidy(good_content)
assert errors
if __name__ == '__main__':
if __name__ == "__main__":
setup_module()
test_bad_html()
teardown_module()

@@ -1,3 +0,2 @@

from pytest import raises
import pytest
from twill import commands

@@ -7,39 +6,42 @@ from twill.errors import TwillException

def test(url):
commands.go('/two_forms')
commands.find(' NO FORM ')
def test(url: str):
commands.reset_browser()
commands.go(url)
with raises(TwillException):
commands.go("/two_forms")
commands.find(" NO FORM ")
with pytest.raises(TwillException):
commands.submit()
with raises(TwillException):
commands.submit('1')
with pytest.raises(TwillException):
commands.submit("1")
commands.fv('1', 'item', 'foo')
commands.fv("1", "item", "foo")
commands.submit()
commands.find(' FORM=1 ITEM=foo ')
commands.find(" FORM=1 ITEM=foo ")
commands.fv('2', 'item', 'bar')
commands.fv("2", "item", "bar")
commands.submit()
commands.find(' FORM=2 ITEM=bar ')
commands.find(" FORM=2 ITEM=bar ")
with raises(TwillException):
with pytest.raises(TwillException):
commands.submit()
commands.submit('1', '1')
commands.find(' FORM=1 ITEM= ')
commands.submit("1", "1")
commands.find(" FORM=1 ITEM= ")
commands.submit('1', '2')
commands.find(' FORM=2 ITEM= ')
commands.submit("1", "2")
commands.find(" FORM=2 ITEM= ")
with raises(TwillException):
commands.submit('1', '3')
with pytest.raises(TwillException):
commands.submit("1", "3")
commands.fv('1', 'item', 'foo')
commands.fv('2', 'item', 'bar')
commands.fv("1", "item", "foo")
commands.fv("2", "item", "bar")
commands.submit()
commands.find(' FORM=2 ITEM=bar ')
commands.find(" FORM=2 ITEM=bar ")
commands.fv('2', 'item', 'bar')
commands.fv('1', 'item', 'foo')
commands.fv("2", "item", "bar")
commands.fv("1", "item", "foo")
commands.submit()
commands.find(' FORM=1 ITEM=foo ')
commands.find(" FORM=1 ITEM=foo ")

@@ -1,14 +0,13 @@

"""
Test the unit-test support framework using (naturally) a unit test...
"""
"""Test the unit-test support framework using (naturally) a unit test."""
import os
from pathlib import Path
import twill.unit
from quixote.server.simple_server import ( # type: ignore[import-untyped]
run as quixote_run,
)
from .server import create_publisher
from .utils import test_dir
from .server import create_publisher
from quixote.server.simple_server import run as quixote_run # type: ignore
PORT = 8081 # default port to run the server on

@@ -18,3 +17,3 @@ SLEEP = 0.5 # time to wait for the server to start

def run_server(port=PORT):
def run_server(port: int = PORT) -> None:
"""Function to run the server"""

@@ -27,3 +26,3 @@ quixote_run(create_publisher, port=port)

# abspath to the script
script = os.path.join(test_dir, 'test_unit_support.twill')
script = str(Path(test_dir, "test_unit_support.twill"))

@@ -37,3 +36,3 @@ # create test_info object

if __name__ == '__main__':
if __name__ == "__main__":
test()

@@ -1,3 +0,2 @@

from pytest import raises
import pytest
from twill import utils

@@ -9,14 +8,14 @@ from twill.errors import TwillException

make_boolean = utils.make_boolean
assert make_boolean(True)
assert not make_boolean(False)
assert make_boolean('true')
assert not make_boolean('false')
assert make_boolean(True) # noqa: FBT003
assert not make_boolean(False) # noqa: FBT003
assert make_boolean("true")
assert not make_boolean("false")
assert make_boolean(1)
assert not make_boolean(0)
assert make_boolean('1')
assert not make_boolean('0')
assert make_boolean('+')
assert not make_boolean('-')
with raises(TwillException):
make_boolean('no')
assert make_boolean("1")
assert not make_boolean("0")
assert make_boolean("+")
assert not make_boolean("-")
with pytest.raises(TwillException):
make_boolean("no")

@@ -26,3 +25,3 @@

trunc = utils.trunc
assert trunc('hello, world!', 12) == 'hello, w ...'
assert trunc('hello, world!', 13) == 'hello, world!'
assert trunc("hello, world!", 12) == "hello, w ..."
assert trunc("hello, world!", 13) == "hello, world!"

@@ -1,3 +0,2 @@

from pytest import raises
import pytest
from twill import parse

@@ -10,11 +9,13 @@

fut = parse.variable_substitution
args = (dict(foo=7), dict(bar=13, baz='wall'))
assert fut("${foo} * ${bar} bottles on the ${baz}!",
*args) == "7 * 13 bottles on the wall!"
args = ({"foo": 7}, {"bar": 13, "baz": "wall"})
assert (
fut("${foo} * ${bar} bottles on the ${baz}!", *args)
== "7 * 13 bottles on the wall!"
)
assert fut("${foo * bar}", *args) == str(7 * 13)
with raises(ZeroDivisionError):
with pytest.raises(ZeroDivisionError):
fut("${1/0}", {}, {})
def test_variables_script(url):
execute_script('test_variables.twill', initial_url=url)
def test_variables_script(url: str):
execute_script("test_variables.twill", initial_url=url)
from .utils import execute_script
def test(url):
execute_script('test_xml.twill', initial_url=url)
def test(url: str):
execute_script("test_xml.twill", initial_url=url)

@@ -1,21 +0,19 @@

"""Utility functions for testing twill"""
"""Utility functions for testing twill."""
import getpass
import os
import subprocess
import sys
import getpass
import subprocess
import time
from io import StringIO
from pathlib import Path
from typing import Optional, TextIO
import requests
import httpx
import twill
test_dir = os.path.dirname(__file__)
twill_dir = os.path.dirname(twill.__file__)
if os.path.dirname(test_dir) != os.path.dirname(twill_dir):
raise ImportError('twill was not imported from the right directory')
test_dir = Path(__file__).parent # test directory
current_dir = Path.cwd() # current working directory
HOST = '127.0.0.1' # interface to run the server on
HOST = "127.0.0.1" # interface to run the server on
PORT = 8080 # default port to run the server on

@@ -27,37 +25,39 @@ SLEEP = 0.5 # time to wait for the server to start

_cwd = '.' # current working directory
_url = None # current server url
def get_url():
def get_url() -> str:
"""Get the current server URL."""
if _url is None:
raise Exception("server has not yet been started")
raise RuntimeError("Server has not yet been started!")
return _url
def cd_test_dir():
def cd_test_dir() -> None:
"""Make the test directory the current directory."""
global _cwd
_cwd = os.getcwd()
os.chdir(test_dir)
def pop_test_dir():
def pop_test_dir() -> None:
"""Restore the current directory before running the tests."""
os.chdir(_cwd)
os.chdir(current_dir)
def mock_getpass(*args):
"""A mock getpass function."""
def mock_getpass(
prompt: str = "Password: ", # noqa: ARG001
stream: Optional[TextIO] = None, # noqa: ARG001
) -> str:
"""Mock getpass function."""
return "pass"
def execute_script(filename, inp=None, initial_url=None):
def execute_script(
filename: str, inp: Optional[str] = None, initial_url: Optional[str] = None
) -> None:
"""Execute twill script with the given filename."""
if filename != '-':
filename = os.path.join(test_dir, filename)
if filename != "-":
filename = str(Path(test_dir, filename))
if inp:
# use inp as the std input for the actual script commands
# use inp as the stdin for the actual script commands
stdin, sys.stdin = sys.stdin, StringIO(inp)

@@ -73,13 +73,18 @@ real_getpass, getpass.getpass = getpass.getpass, mock_getpass

def execute_shell(filename, inp=None, initial_url=None,
fail_on_unknown=False):
def execute_shell(
filename: str,
inp: Optional[str] = None,
initial_url: Optional[str] = None,
*,
fail_on_unknown: bool = False,
) -> None:
"""Execute twill script with the given filename using the shell."""
# use filename as the stdin *for the shell object only*
if filename != '-':
filename = os.path.join(test_dir, filename)
if filename != "-":
filename = str(Path(test_dir, filename))
cmd_inp = open(filename).read()
cmd_inp += u'\nquit\n'
cmd_inp = StringIO(cmd_inp)
cmd_loop = twill.shell.TwillCommandLoop
with open(filename, encoding="utf-8") as cmd_file:
cmd_content = cmd_file.read()
cmd_content += "\nquit\n"
cmd_inp = StringIO(cmd_content)

@@ -91,5 +96,8 @@ if inp:

try:
s = cmd_loop(initial_url=initial_url, stdin=cmd_inp,
fail_on_unknown=fail_on_unknown)
s.cmdloop()
loop = twill.shell.TwillCommandLoop(
initial_url=initial_url,
stdin=cmd_inp,
fail_on_unknown=fail_on_unknown,
)
loop.cmdloop()
except SystemExit:

@@ -101,6 +109,6 @@ pass

getpass.getpass = real_getpass
cmd_loop.reset() # do not keep as singleton
loop.reset() # reset the singleton
def start_server(port=None):
def start_server(port: Optional[int] = None) -> None:
"""Start a simple test web server.

@@ -113,22 +121,27 @@

"""
global _url
global _url # noqa: PLW0603
if port is None:
port = int(os.environ.get('TWILL_TEST_PORT', PORT))
port = int(os.environ.get("TWILL_TEST_PORT", PORT))
if START:
out = open(LOG or os.devnull, 'w', buffering=1)
print('STARTING:', sys.executable, 'tests/server.py', os.getcwd())
out = open(LOG or os.devnull, "w", buffering=1) # noqa: SIM115
print( # noqa: T201
"Starting:", sys.executable, "tests/server.py", Path.cwd()
)
subprocess.Popen(
[sys.executable, '-u', 'server.py'],
stderr=subprocess.STDOUT, stdout=out)
[sys.executable, "-u", "server.py"], # noqa: S603
stderr=subprocess.STDOUT,
stdout=out,
)
time.sleep(SLEEP) # wait until the server is up and running
print("The server has been started.") # noqa: T201
# noinspection HttpUrlsUsage
_url = f'http://{HOST}:{port}/'
_url = f"http://{HOST}:{port}/"
def stop_server():
def stop_server() -> None:
"""Stop a previously started test web server."""
global _url
global _url # noqa: PLW0603

@@ -138,5 +151,6 @@ if _url:

try:
requests.get(f'{_url}exit')
except Exception:
print('Could not stop the server')
httpx.get(f"{_url}exit", timeout=10)
except Exception as error: # noqa: BLE001
print("ERROR:", error) # noqa: T201
print("Could not stop the server.") # noqa: T201
_url = None
+14
-14
[tox]
envlist = py3{6,7,8,9,10,11}, flake8, mypy, docs, manifest
envlist = py3{8,9,10,11,12}, ruff, mypy, docs, manifest
[testenv:flake8]
basepython = python3.10
deps = flake8>=5.0,<6
[testenv:ruff]
basepython = python3.11
deps = ruff>=0.1.6,<0.2
commands =
flake8 twill tests extras setup.py
ruff check src/twill tests extras
ruff format --check src/twill tests extras
[testenv:mypy]
basepython = python3.10
basepython = python3.11
deps =
mypy==0.981
dnspython>=2,<3
mypy >= 1.6, <1.7
dnspython >=2.4, <3
types-lxml
types-requests
types-setuptools
commands =
mypy twill tests extras setup.py
mypy src/twill tests extras
[testenv:docs]
basepython = python3.10
deps = sphinx>=5.2,<6
basepython = python3.11
deps = sphinx >=7.2, <8
extras =

@@ -30,4 +30,4 @@ docs

[testenv:manifest]
basepython = python3.10
deps = check-manifest>=0.48,<1
basepython = python3.11
deps = check-manifest>=0.49,<1
commands =

@@ -34,0 +34,0 @@ check-manifest -v

[bumpversion]
current_version = 3.1
commit = False
tag = False
[bumpversion:file:README.md]
search = current version {current_version}
replace = current version {new_version}
[bumpversion:file:twill.__init__.py]
search = __version__ = '{current_version}'
replace = __version__ = '{new_version}'
[bumpversion:file:docs/conf.py]
search = release = '{current_version}'
replace = release = '{new_version}'

Sorry, the diff of this file is not supported yet

The MIT License, https://opensource.org/licenses/MIT
Copyright 2005-2022 by C. Titus Brown, Ben R. Taylor, Christoph Zwerschke
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
#!/usr/bin/env python3
import re
import sys
from setuptools import setup
python_version = sys.version_info[:2]
if python_version < (3, 6):
sys.exit("Python {}.{} is not supported by twill.".format(*python_version))
with open("twill/__init__.py") as init_file:
init = init_file.read()
def find(pattern):
match = re.search(pattern, init)
return match.group(1) if match else None
description = find('"""(.*)')
version = find("__version__ = '(.*)'")
url = find("__url__ = '(.*)'")
download_url = find("__download_url__ = '(.*)'")
with open("README.md") as readme_file:
readme = readme_file.read()
require_twill = ['lxml>=4.9,<5', 'requests>=2.27,<3', 'pyparsing>=3.0,<4']
require_docs = ['sphinx>=5.2,<6', 'sphinx_rtd_theme>=1,<2']
require_tidy = ['pytidylib>=0.3,<0.4']
require_quixote = ['quixote>=3.6,<4']
require_wsgi_intercept = ['wsgi_intercept>=1.10,<2']
require_tests = ['pytest>=7,<7.1'] + (
require_tidy + require_quixote + require_wsgi_intercept)
def main():
setup(
name='twill',
version=version,
url=url,
download_url=download_url,
description=description,
author='C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al.',
author_email='titus@idyll.org',
license='MIT',
packages=['twill', 'twill.extensions'],
entry_points=dict(console_scripts=[
'twill=twill.shell:main', 'twill-fork=twill.fork:main']),
maintainer='Christoph Zwerschke',
maintainer_email='cito@online.de',
long_description=readme,
long_description_content_type='text/markdown',
project_urls={
'Source': 'https://github.com/twill-tools/twill',
'Issues': 'https://github.com/twill-tools/twill/issues',
'Documentation': 'https://twill-tools.github.io/twill/',
'ChangeLog': 'https://twill-tools.github.io/twill/changelog.html'
},
classifiers=[
'Development Status :: 6 - Mature',
'Environment :: Console',
'Intended Audience :: Developers',
'Intended Audience :: System Administrators',
'License :: OSI Approved :: MIT License',
'Natural Language :: English',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Other Scripting Engines',
'Topic :: Internet :: WWW/HTTP',
'Topic :: Software Development :: Testing'
],
install_requires=require_twill,
extras_require={
'docs': require_docs,
'tidy': require_tidy,
'tests': require_tests
},
)
if __name__ == '__main__':
main()
"""Test the WSGI intercept code."""
from twill import commands
from wsgi_intercept import ( # type: ignore
requests_intercept, add_wsgi_intercept, remove_wsgi_intercept)
def setup_module():
requests_intercept.install()
def teardown_module():
requests_intercept.uninstall()
app_was_hit = set()
def simple_app(_environ, start_response):
"""Simplest possible application object"""
status = '200 OK'
response_headers = [('Content-type', 'text/plain')]
start_response(status, response_headers)
app_was_hit.add('simple_app')
return [b'WSGI intercept successful!\n']
def write_app(_environ, start_response):
"""Test the 'write_fn' legacy stuff."""
status = '200 OK'
response_headers = [('Content-type', 'text/plain')]
write_fn = start_response(status, response_headers)
app_was_hit.add('write_app')
write_fn(b'WSGI intercept successful!\n')
return []
class IteratorApp:
"""Test some tricky iterator stuff in wsgi_intercept."""
content = [b'Hello, world']
def __call__(self, environ, start_response):
status = '200 OK'
response_headers = [('Content-type', 'text/plain')]
start_response(status, response_headers)
return self
def __iter__(self):
self._iter = iter(self.content)
return self
def __next__(self):
return next(self._iter)
def test_intercept():
app_was_hit.clear()
add_wsgi_intercept('localhost', 8080, lambda: simple_app)
assert not app_was_hit
commands.go('http://localhost:8080/')
commands.show()
commands.find("WSGI intercept successful")
assert 'simple_app' in app_was_hit
remove_wsgi_intercept('localhost', 8080)
def test_write_intercept():
app_was_hit.clear()
add_wsgi_intercept('localhost', 8080, lambda: write_app)
assert not app_was_hit
commands.go('http://localhost:8080/')
commands.show()
commands.find("WSGI intercept successful")
assert 'write_app' in app_was_hit
remove_wsgi_intercept('localhost', 8080)
def test_iter_stuff():
add_wsgi_intercept('localhost', 80, IteratorApp)
commands.go('http://localhost:80/')
commands.show()
commands.find("Hello, world")
commands.notfind("Hello, worldHello, world")
remove_wsgi_intercept('localhost', 80)
[console_scripts]
twill = twill.shell:main
twill-fork = twill.fork:main
Metadata-Version: 2.1
Name: twill
Version: 3.1
Summary: twill web browsing and testing language and associated utilities.
Home-page: https://github.com/twill-tools/twill
Download-URL: https://pypi.org/project/twill/
Author: C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al.
Author-email: titus@idyll.org
Maintainer: Christoph Zwerschke
Maintainer-email: cito@online.de
License: MIT
Project-URL: Source, https://github.com/twill-tools/twill
Project-URL: Issues, https://github.com/twill-tools/twill/issues
Project-URL: Documentation, https://twill-tools.github.io/twill/
Project-URL: ChangeLog, https://twill-tools.github.io/twill/changelog.html
Classifier: Development Status :: 6 - Mature
Classifier: Environment :: Console
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: System Administrators
Classifier: License :: OSI Approved :: MIT License
Classifier: Natural Language :: English
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Other Scripting Engines
Classifier: Topic :: Internet :: WWW/HTTP
Classifier: Topic :: Software Development :: Testing
Description-Content-Type: text/markdown
Provides-Extra: docs
Provides-Extra: tidy
Provides-Extra: tests
License-File: LICENSE
twill: a simple scripting language for web browsing
===================================================
twill is a simple scripting language intended for programmatic or automated browsing of websites.
The current version 3.1 supports Python 3.6 to 3.11.
See also the [changelog](https://twill-tools.github.io/twill/changelog.html) for a summary of the things that have been changed and improved since version 2.0, and the [acknowledgements](https://twill-tools.github.io/twill/overview.html#acknowledgements) for a short overview of the earlier history of twill.
The full [documentation](https://twill-tools.github.io/twill/) is included in the distribution and provided online.
Copyright (c) 2005-2022 by C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al.
Newer versions have been created and are maintained by [Christoph Zwerschke](https://github.com/Cito).
twill is available for use, modification, and distribution under the MIT license.
lxml<5,>=4.9
requests<3,>=2.27
pyparsing<4,>=3.0
[docs]
sphinx<6,>=5.2
sphinx_rtd_theme<2,>=1
[tests]
pytest<7.1,>=7
pytidylib<0.4,>=0.3
quixote<4,>=3.6
wsgi_intercept<2,>=1.10
[tidy]
pytidylib<0.4,>=0.3
.bumpversion.cfg
.flake8
LICENSE
MANIFEST.in
README.md
setup.cfg
setup.py
tox.ini
docs/Makefile
docs/browsing.rst
docs/changelog.rst
docs/commands.rst
docs/conf.py
docs/developer.rst
docs/examples.rst
docs/extensions.rst
docs/index.rst
docs/install.rst
docs/make.bat
docs/other.rst
docs/overview.rst
docs/python-api.rst
docs/requirements.txt
docs/testing.rst
extras/examples/discard-sf-mailman-msgs.twill
extras/examples/extend-example.py
extras/examples/quixote-demo.twill
extras/examples/set-user-agent.twill
extras/maxq/README.txt
extras/maxq/TwillScriptGenerator.java
tests/README.txt
tests/__init__.py
tests/conftest.py
tests/mock_dns.py
tests/server.py
tests/test_back.py
tests/test_back.twill
tests/test_basic.py
tests/test_basic.twill
tests/test_broken_html.py
tests/test_buildbot.py
tests/test_check_links.py
tests/test_check_links.twill
tests/test_checkbox.py
tests/test_dns.py
tests/test_dns.twill
tests/test_equiv_refresh.py
tests/test_equiv_refresh.twill
tests/test_find.py
tests/test_find.twill
tests/test_form.py
tests/test_form.twill
tests/test_formfill.py
tests/test_formfill.twill
tests/test_gather.py
tests/test_global_form.py
tests/test_global_form.twill
tests/test_go.py
tests/test_go.twill
tests/test_go_exit.twill
tests/test_go_fail.twill
tests/test_go_fail2.twill
tests/test_headers.py
tests/test_headers.twill
tests/test_http_auth.py
tests/test_http_auth.twill
tests/test_http_codes.py
tests/test_http_codes.twill
tests/test_info.py
tests/test_info.twill
tests/test_match_parse.py
tests/test_match_parse.twill
tests/test_misc.py
tests/test_multisub.py
tests/test_multisub.twill
tests/test_radiobutton.py
tests/test_shell.py
tests/test_shell.twill
tests/test_shell_fail.twill
tests/test_show.py
tests/test_show.twill
tests/test_tidy.py
tests/test_two_forms.py
tests/test_unit_support.py
tests/test_unit_support.twill
tests/test_utils.py
tests/test_variables.py
tests/test_variables.twill
tests/test_wsgi_intercept.py
tests/test_xml.py
tests/test_xml.twill
tests/utils.py
tests/test_gather/00-testme/x-script.twill
tests/test_gather/00-testme/y-script.txt
tests/test_gather/01-test/a.notwill
tests/test_gather/01-test/b.twill
tests/test_gather/02-test2/c.bak
tests/test_gather/02-test2/c.twill
tests/test_gather/02-test2/02-subtest/d.twill
twill/__init__.py
twill/__main__.py
twill/agents.py
twill/browser.py
twill/commands.py
twill/errors.py
twill/fork.py
twill/namespaces.py
twill/parse.py
twill/shell.py
twill/unit.py
twill/utils.py
twill.egg-info/PKG-INFO
twill.egg-info/SOURCES.txt
twill.egg-info/dependency_links.txt
twill.egg-info/entry_points.txt
twill.egg-info/requires.txt
twill.egg-info/top_level.txt
twill/extensions/__init__.py
twill/extensions/argparse.py
twill/extensions/check_links.py
twill/extensions/csv_iterate.py
twill/extensions/dirstack.py
twill/extensions/dns_check.py
twill/extensions/formfill.py
twill/extensions/mailman_sf.py
twill/extensions/match_parse.py
twill/extensions/require.py
twill/extensions/shell_test_extension.py
# This file is part of the twill source distribution.
#
# twill is an extensible scriptlet language for testing Web apps,
# available at https://github.com/twill-tools/twill.
#
# Copyright (c) 2005-2022
# by C. Titus Brown, Ben R. Taylor, Christoph Zwerschke et al.
#
# This program and all associated source code files are released under the
# terms of the MIT license; please see the included LICENSE file for more
# information, or go to https://opensource.org/licenses/mit-license.php.
"""twill web browsing and testing language and associated utilities.
A scripting system for automating web browsing. Useful for testing
web pages or grabbing data from password-protected sites automatically.
"""
import logging
import sys
import os.path
__version__ = '3.1'
__url__ = 'https://github.com/twill-tools/twill'
__download_url__ = 'https://pypi.org/project/twill/'
__all__ = [
'browser', 'execute_file', 'execute_string',
'log', 'set_log_level', 'set_output', 'set_err_out',
'twill_ext', 'TwillCommandLoop']
this_dir = os.path.dirname(__file__)
# Add extensions directory at the *end* of sys.path.
# This means that user extensions will take priority over twill extensions.
extensions = os.path.join(this_dir, 'extensions')
sys.path.append(extensions)
twill_ext = '.twill' # file extension for twill scripts
log_levels = dict(
CRITICAL=logging.CRITICAL,
ERROR=logging.ERROR,
WARNING=logging.WARNING,
INFO=logging.INFO,
DEBUG=logging.DEBUG,
NOTSET=logging.NOTSET)
log = logging.getLogger(__name__)
handler = None
stdout, stderr = sys.stdout, sys.stderr
def set_log_level(level=None):
"""Set the logging level.
If no level is passed, use INFO as logging level.
"""
if level is None:
level = logging.INFO
if isinstance(level, str):
level = log_levels[level.upper()]
log.setLevel(level)
def set_output(stream=None):
"""Set the standard output.
If no stream is passed, use standard output.
"""
global handler
if stream is None:
stream = stdout
if handler:
log.removeHandler(handler)
handler = logging.StreamHandler(stream)
log.addHandler(handler)
sys.stdout = stream
def set_err_out(stream=None):
"""Set the error output.
If no stream is passed, use standard error.
"""
if stream is None:
stream = stderr
sys.stderr = stream
def shutdown():
"""Shut down and flush the logging system."""
sys.stdout.flush()
sys.stderr.flush()
logging.shutdown()
set_log_level()
set_output()
# a convenience function:
from .browser import browser # noqa: ignore=E402
# the two core components of twill:
from .parse import execute_file, execute_string # noqa: ignore=E402
from .shell import TwillCommandLoop # noqa: ignore=E402
# initialize global dict
from . import namespaces # noqa: ignore=E402
namespaces.init_global_dict()
"""main module for the twill package"""
from . import shell
if __name__ == '__main__':
shell.main()
"""
Map of various User-Agent string shortcuts that can be used for testing.
"""
from typing import Dict
# noinspection HttpUrlsUsage
agents: Dict[str, str] = dict(
# Desktop
chrome_40='Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36'
' (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36',
chrome_107='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
' (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
edge_12='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
' (KHTML, like Gecko) Chrome/42.0.2311.135'
' Safari/537.36 Edge/12.246',
edge_107='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
' (KHTML, like Gecko) Chrome/107.0.0.0'
' Safari/537.36 Edg/107.0.1418.26',
firefox_40='Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0)'
' Gecko/20100101 Firefox/40.1',
firefox_106='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:106.0)'
' Gecko/20100101 Firefox/106.0',
ie_3='Mozilla/2.0 (compatible; MSIE 3.0; Windows 3.1)',
ie_4='Mozilla/4.0 (compatible; MSIE 4.0; Windows NT 5.0)',
ie_5='Mozilla/4.0 (compatible; MSIE 5.0; Windows NT 5.0)',
ie_6='Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
ie_7='Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
ie_8='Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)',
ie_9='Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0)',
ie_10='Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
ie_11='Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko',
ie_mobile_9='Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5;'
' Trident/5.0; IEMobile/9.0)',
opera_7='Opera/7.0 (Windows NT 5.1; U) [en]',
opera_8='Opera/8.00 (Windows NT 5.1; U; en)',
opera_9='Opera/9.00 (Windows NT 5.2; U; en)',
opera_10='Opera/9.80 (Windows NT 6.1; U; en) Presto/2.2.15 Version/10.00',
opera_11='Opera/9.80 (Windows NT 6.1; U; en) Presto/2.7.62 Version/11.00',
opera_12='Opera/12.0 (Windows NT 5.1; U; en)'
' Presto/22.9.168 Version/12.00',
opera_mini_7='Opera/9.80 (Android; Opera Mini/7.0.29952/28.2075; en)'
' Presto/2.8.119 Version/11.10',
opera_mini_9='Opera/9.80 (J2ME/MIDP; Opera Mini/9 (Compatible; MSIE:9.0;'
' iPhone; BlackBerry9700; AppleWebKit/24.746; en)'
' Presto/2.5.25 Version/10.54',
konqueror_3='Mozilla/5.0 (compatible; Konqueror/3.0; Linux)',
konqueror_4='Mozilla/5.0 (compatible; Konqueror/4.0; Linux)'
' KHTML/4.0.3 (like Gecko)',
lynx_2_8='Lynx/2.8.7rel.2 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/1.0.0a',
w3m_0_5='w3m/0.5.2 (Linux i686; it; Debian-3.0.6-3)',
netscape_3='Mozilla/3.0 (X11; I; AIX 2)',
netscape_4='Mozilla/4.0 (compatible; Mozilla/5.0 ; Linux i686)',
netscape_4_5='Mozilla/4.5 [en] (X11; I; SunOS 5.6 sun4u)',
netscape_7='Mozilla/5.0 (X11; U; SunOS sun4u; en-US; rv:1.0.1)'
' Gecko/20020921 Netscape/7.0',
netscape_9='Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.8pre)'
' Gecko/20071015 Firefox/2.0.0.7 Navigator/9.0',
palemoon_25='Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:25.6)'
' Gecko/20150723 Firefox/31.9 PaleMoon/25.6.0',
safari_1='Mozilla/5.0 (Macintosh; PPC Mac OS X; en)'
' AppleWebKit/85.7 (KHTML, like Gecko) Safari/85.6',
safari_2='Mozilla/5.0 (Macintosh; PPC Mac OS; en)'
' AppleWebKit/412 (KHTML, like Gecko) Safari/412',
safari_3='Mozilla/5.0 (Macintosh; Intel Mac OS X; en)'
' AppleWebKit/522.7 (KHTML, like Gecko) Version/3.0 Safari/522.7',
safari_4='Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_6; en)'
' AppleWebKit/530.9+ (KHTML, like Gecko)'
'Version/4.0 Safari/528.16',
safari_5='Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en)'
' AppleWebKit/534.1+ (KHTML, like Gecko)'
' Version/5.0 Safari/533.16',
safari_6='Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536'
'.26 (KHTML, like Gecko)'
' Version/6.0 Mobile/10A5355d Safari/8536.25',
safari_7='Mozilla/5.0 (iPad; CPU OS 7_1_2 like Mac OS X) AppleWebKit/537'
'.51.2 (KHTML, like Gecko)'
' Version/7.0 Mobile/11D257 Safari/9537.53',
safari_605='Mozilla/5.0 (Macintosh; Intel Mac OS X 13_0)'
' AppleWebKit/605.1.15 (KHTML, like Gecko)'
' Version/16.1 Safari/605.1.15',
vivaldi_5='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36'
' (KHTML, like Gecko) Chrome/107.0.0.0'
' Safari/537.36 Vivaldi/5.4.2753.51',
# Android phones
galaxy_s7='Mozilla/5.0 (Linux; Android 7.0; SM-G930VC Build/NRD90M; wv)'
' AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0'
' Chrome/58.0.3029.83 Mobile Safari/537.36',
galaxy_s10='Mozilla/5.0 (Linux; Android 9; SM-G973U Build/PPR1.180610.011)'
' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100'
' Mobile Safari/537.36',
galaxy_s20='Mozilla/5.0 (Linux; Android 10;'
' SM-G980F Build/QP1A.190711.020; wv) AppleWebKit/537.36'
' (KHTML, like Gecko) Version/4.0 Chrome/78.0.3904.96'
' Mobile Safari/537.36',
galaxy_s22='Mozilla/5.0 (Linux; Android 12;'
' SM-S906N Build/QP1A.190711.020; wv) AppleWebKit/537.36'
' (KHTML, like Gecko) Version/4.0 Chrome/80.0.3987.119'
' Mobile Safari/537.36',
google_pixel='Mozilla/5.0 (Linux; Android 7.1.1; Google Pixel'
' Build/NMF26F; wv) AppleWebKit/537.36 (KHTML, like Gecko)'
' Version/4.0 Chrome/54.0.2840.85 Mobile Safari/537.36',
google_pixel4='Mozilla/5.0 (Linux; Android 10; Google Pixel 4'
' Build/QD1A.190821.014.C2; wv) AppleWebKit/537.36'
' (KHTML, like Gecko) Version/4.0 Chrome/78.0.3904.108'
' Mobile Safari/537.36',
google_pixel_6='Mozilla/5.0 (Linux; Android 12; Pixel 6'
' Build/SD1A.210817.023; wv) AppleWebKit/537.36'
' (KHTML, like Gecko) Version/4.0 Chrome/94.0.4606.71'
' Mobile Safari/537.36',
nexus_6p='Mozilla/5.0 (Linux; Android 6.0.1; Nexus 6P Build/MMB29P)'
' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.83'
' Mobile Safari/537.36',
sony_xperia_1='Mozilla/5.0 (Linux; Android 9;'
' J8110 Build/55.0.A.0.552; wv) AppleWebKit/537.36'
' (KHTML, like Gecko) Version/4.0 Chrome/71.0.3578.99'
' Mobile Safari/537.36',
htc_one_x10='Mozilla/5.0 (Linux; Android 6.0; HTC One'
' X10 Build/MRA58K; wv) AppleWebKit/537.36'
' (KHTML, like Gecko) Version/4.0 Chrome/61.0.3163.98'
' Mobile Safari/537.36',
# iPhones
iphone_6='Mozilla/5.0 (Apple-iPhone7C2/1202.466; U; CPU like Mac OS X; en)'
' AppleWebKit/420+ (KHTML, like Gecko) Version/3.0'
' Mobile/1A543 Safari/419.3',
iphone_7='Mozilla/5.0 (iPhone9,3; U; CPU iPhone OS 10_0_1 like Mac OS X)'
' AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0'
' Mobile/14A403 Safari/602.1',
iphone_8='Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X)'
' AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0'
' Mobile/15A5341f Safari/604.1',
iphone_x='Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X)'
' AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0'
' Mobile/15A372 Safari/604.1',
iphone_11='Mozilla/5.0 (iPhone12,1; U; CPU iPhone OS 13_0 like Mac OS X)'
' AppleWebKit/602.1.50 (KHTML, like Gecko)'
' Version/10.0 Mobile/15E148 Safari/602.1',
iphone_12='Mozilla/5.0 (iPhone13,2; U; CPU iPhone OS 14_0 like Mac OS X)'
' AppleWebKit/602.1.50 (KHTML, like Gecko)'
' Version/10.0 Mobile/15E148 Safari/602.1',
iphone_13_pro_max='Mozilla/5.0 (iPhone14,3; U; CPU iPhone OS 15_0'
' like Mac OS X) AppleWebKit/602.1.50'
' (KHTML, like Gecko) Version/10.0'
' Mobile/19A346 Safari/602.1',
iphone_se_3='Mozilla/5.0 (iPhone14,6; U; CPU iPhone OS 15_4'
' like Mac OS X) AppleWebKit/602.1.50'
' (KHTML, like Gecko) Version/10.0 Mobile/19E241 Safari/602.1',
# MS Windows phones
ms_lumia_650='Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft;'
' RM-1152) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254',
ms_lumia_950='Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft;'
' Lumia 950) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/46.0.2486.0 Mobile Safari/537.36 Edge/13.1058',
# Tablets
galaxy_tab_s8='Mozilla/5.0 (Linux; Android 12;'
' SM-X906C Build/QP1A.190711.020; wv) AppleWebKit/537.36'
' (KHTML, like Gecko) Version/4.0 Chrome/80.0.3987.119'
' Mobile Safari/537.36',
lenovo_yoga_tab_11='Mozilla/5.0 (Linux; Android 11; Lenovo YT-J706X)'
' AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/96.0.4664.45 Safari/537.36',
sony_xperia_tab_z4='Mozilla/5.0 (Linux; Android 6.0.1;'
' SGP771 Build/32.2.A.0.253; wv) AppleWebKit/537.36'
' (KHTML, like Gecko) Version/4.0'
' Chrome/52.0.2743.98 Safari/537.36',
galaxy_tab_s3='Mozilla/5.0 (Linux; Android 7.0; SM-T827R4 Build/NRD90M)'
' AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/60.0.3112.116 Safari/537.36',
amazon_fire_hdx_7='Mozilla/5.0 (Linux; Android 4.4.3; KFTHWI Build/KTU84M)'
' AppleWebKit/537.36 (KHTML, like Gecko) Silk/47.1.79'
' like Chrome/47.0.2526.80 Safari/537.36',
lg_g_pad_7='Mozilla/5.0 (Linux; Android 5.0.2; LG-V410/V41020c'
' Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko)'
' Version/4.0 Chrome/34.0.1847.118 Safari/537.36',
# E-Readers
kindle_4='Mozilla/5.0 (X11; U; Linux armv7l like Android; en-us)'
' AppleWebKit/531.2+ (KHTML, like Gecko) Version/5.0'
' Safari/533.2+ Kindle/3.0+',
kindle_3='Mozilla/5.0 (Linux; U; en-US) AppleWebKit/528.5+'
' (KHTML, like Gecko, Safari/528.5+) Version/4.0 Kindle/3.0'
' (screen 600x800; rotate)',
# Set tops
chromecast='Mozilla/5.0 (CrKey armv7l 1.5.16041) AppleWebKit/537.36'
' (KHTML, like Gecko) Chrome/31.0.1650.0 Safari/537.36',
amazon_4k_fire_tv='Mozilla/5.0 (Linux; Android 5.1; AFTS Build/LMY47O)'
' AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0'
' Chrome/41.99900.2250.0242 Safari/537.36',
nexus_player='Dalvik/2.1.0 (Linux; U; Android 6.0.1;'
' Nexus Player Build/MMB29T)',
apple_tv_6='AppleTV11,1/11.1',
apple_tv_5='AppleTV6,2/11.1',
apple_tv_4='AppleTV5,3/9.1.1',
# Game consoles
playstation_5='Mozilla/5.0 (PlayStation; PlayStation 5/2.26)'
' AppleWebKit/605.1.15 (KHTML, like Gecko)'
' Version/13.0 Safari/605.1.15',
playstation_4='Mozilla/5.0 (PlayStation 4 3.11) AppleWebKit/537.73'
' (KHTML, like Gecko)',
xbox_x='Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox Series X)'
' AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/48.0.2564.82 Safari/537.36 Edge/20.02',
xbox_one='Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Xbox; Xbox One)'
' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0'
' Mobile Safari/537.36 Edge/13.10586',
nintendo_switch='Mozilla/5.0 (Nintendo Switch; WifiWebAuthApplet)'
' AppleWebKit/601.6 (KHTML, like Gecko) NF/4.0.0.5.10'
' NintendoBrowser/5.1.0.13343',
# Bots
google_bot_2='Mozilla/5.0 (compatible; Googlebot/2.1;'
' +http://www.google.com/bot.html)',
bing_bot_2='Mozilla/5.0 (compatible; bingbot/2.0;'
' +http://www.bing.com/bingbot.htm)',
yahoo_bot='Mozilla/5.0 (compatible; Yahoo! Slurp;'
' http://help.yahoo.com/help/us/ysearch/slurp)',
)
"""This module implements the TwillBrowser."""
import pickle
import re
from typing import (
cast, Callable, Dict, IO, List, Optional, Sequence, Tuple, Union)
from urllib.parse import urljoin
from requests import Session
from requests.auth import HTTPBasicAuth
from requests.cookies import RequestsCookieJar
from requests.exceptions import InvalidSchema, ConnectionError
from requests.structures import CaseInsensitiveDict
from . import log, __version__
from .utils import (
get_equiv_refresh_interval, html_to_tree, print_form, trunc, unique_match,
CheckboxGroup, FieldElement, FormElement, HtmlElement,
InputElement, Link, UrlWithRealm, RadioGroup, Response, ResultWrapper)
from .errors import TwillException
__all__ = ['browser']
def _disable_insecure_request_warnings() -> None:
"""Disable insecure request warnings."""
try:
from requests.packages import urllib3 # type: ignore
except ImportError:
import urllib3 # type: ignore
# noinspection PyUnresolvedReferences
insecure_request_warning = urllib3.exceptions.InsecureRequestWarning
urllib3.disable_warnings(insecure_request_warning)
def _set_http_connection_debuglevel(level: int) -> None:
"""Set the debug level for the connection pool."""
from http.client import HTTPConnection
HTTPConnection.debuglevel = level
class TwillBrowser:
"""A simple, stateful browser"""
user_agent = f'TwillBrowser/{__version__}'
def __init__(self):
self.result: Optional[ResultWrapper] = None
self.last_submit_button: Optional[InputElement] = None
self.first_error: Optional[str] = None
# whether meta refresh will be displayed
self.show_refresh = False
# debug level to be used for the connection pool
self._debug_level = 0
# whether the SSL cert will be verified, or can be a ca bundle path
self.verify = False
# Session stores cookies
self._session = Session()
# A lxml FormElement, None until a form is selected
# replaces self._browser.form from mechanize
self._form: Optional[FormElement] = None
self._form_files: Dict[str, IO] = {}
# A dict of HTTPBasicAuth from requests, keyed off URL
self._auth: Dict[UrlWithRealm, HTTPBasicAuth] = {}
# callables to be called after each page load.
self._post_load_hooks: List[Callable] = []
self._history: List[ResultWrapper] = []
# set default headers
self.reset_headers()
def _assert_result_for(self, what: str) -> ResultWrapper:
if not self.result:
raise TwillException(f"Cannot get {what} since there is no page.")
return self.result
@property
def debug_level(self) -> int:
return self._debug_level
@debug_level.setter
def debug_level(self, level: int) -> None:
_set_http_connection_debuglevel(level)
self._debug_level = level
def reset(self):
"""Reset the browser"""
self.__init__()
@property
def creds(self) -> Dict[UrlWithRealm, HTTPBasicAuth]:
"""Get the credentials for basic authentication."""
return self._auth
def add_creds(self, url: UrlWithRealm, user: str, password: str) -> None:
"""Set the credentials for basic authentication."""
self._auth[url] = HTTPBasicAuth(user, password)
def go(self, url: str) -> None:
"""Visit given URL."""
try_urls: List[str] = []
if '://' in url:
try_urls.append(url)
else: # URL does not have a schema
# if this is a relative URL, then assume that we want to tack it
# onto the end of the current URL
current_url = self.url
if current_url:
try_urls.append(urljoin(current_url, url))
# if this is an absolute URL, it may be just missing the 'http://'
# at the beginning, try fixing that (mimic browser behavior)
if not url.startswith(('.', '/', '?')):
# noinspection HttpUrlsUsage
try_urls.append(f'http://{url}')
try_urls.append(f'https://{url}')
for try_url in try_urls:
try:
self._journey('open', try_url)
except (IOError,
ConnectionError, InvalidSchema, UnicodeError) as error:
log.info("cannot go to '%s': %s", try_url, error)
else:
break
else:
raise TwillException(f"cannot go to '{url}'")
log.info('==> at %s', self.url)
def reload(self) -> None:
"""Tell the browser to reload the current page."""
self._journey('reload')
log.info('==> reloaded')
def back(self) -> None:
"""Return to previous page, if possible."""
try:
self._journey('back')
log.info('==> back to %s', self.url)
except TwillException:
log.warning('==> back at empty page')
@property
def code(self) -> int:
"""Get the HTTP status code received for the current page."""
return self._assert_result_for('status code').http_code
@property
def encoding(self) -> Optional[str]:
"""Get the encoding used by the server for the current page."""
return None if self.result is None else self.result.encoding
@property
def html(self) -> str:
"""Get the HTML for the current page."""
return self._assert_result_for('HTML').text
@property
def dump(self) -> bytes:
"""Get the binary content of the current page."""
return self._assert_result_for('content dump').content
@property
def title(self) -> Optional[str]:
return self._assert_result_for('title').title
@property
def url(self):
"""Get the URL of the current page."""
return self.result.url if self.result else None
def find_link(self, pattern: str) -> Optional[Link]:
"""Find the first link matching the given regular expression pattern.
The pattern is searched in the URL and in the link text.
"""
return self._assert_result_for('links').find_link(pattern)
def follow_link(self, link: Union[str, Link]) -> None:
"""Follow the given link."""
self._journey('follow_link', link)
log.info('==> at %s', self.url)
@property
def headers(self) -> CaseInsensitiveDict:
"""Return the request headers currently used by the browser."""
return self._session.headers
def reset_headers(self):
"""Reset the request headers currently used by the browser."""
self.headers.clear()
self.headers.update({
'Accept': 'text/html; */*',
'User-Agent': self.user_agent})
@property
def response_headers(self):
"""Get the headers returned with the current page."""
return self._assert_result_for('headers').headers
@property
def agent_string(self) -> Optional[str]:
"""Get the user agent string."""
return self.headers.get('User-Agent')
@agent_string.setter
def agent_string(self, agent: str) -> None:
"""Set the user agent string to the given value."""
self.headers['User-Agent'] = agent
def show_forms(self) -> None:
"""Pretty-print all forms on the page.
Include the global form (form elements outside <form> pairs)
as forms[0] if present.
"""
for n, form in enumerate(self.forms, 1):
print_form(form, n)
def show_links(self) -> None:
"""Pretty-print all links on the page."""
info = log.info
links = self.links
if links:
info('\nLinks (%d links total):\n', len(links))
for n, link in enumerate(links, 1):
info('\t%d. %s ==> %s', n, trunc(link.text, 40), link.url)
info('')
else:
info('\n** no links **\n')
def show_history(self) -> None:
"""Pretty-print the history of links visited."""
info = log.info
history = self._history
if history:
info('\nHistory (%d pages total):\n', len(history))
for n, page in enumerate(history, 1):
info('\t%d. %s', n, page.url)
info('')
else:
info('\n** no history **\n')
@property
def links(self) -> List[Link]:
"""Return a list of all links on the page."""
return self._assert_result_for('links').links
@property
def history(self) -> List[ResultWrapper]:
"""Return a list of all pages visited by the browser."""
return self._history
@property
def forms(self) -> List[FormElement]:
"""Return a list of forms on the page.
This includes the global form at index 0 if present.
"""
return self._assert_result_for('forms').forms
def form(self, name: Union[str, int] = 1) -> Optional[FormElement]:
"""Return the first form that matches the given form name."""
return self._assert_result_for('form').form(name)
def form_field(self, form: FormElement = None,
name_or_num: Union[str, int] = 1) -> FieldElement:
"""Return the control that matches the given field name.
Must be a *unique* regex/exact string match, but the returned
control can also be a CheckboxGroup or RadioGroup list.
Raises a TwillException if no such field or multiple fields are found.
"""
if form is None:
form = self._form
if form is None:
raise TwillException("Must specify a form for the field")
inputs = form.inputs
found_multiple = False
name = name_or_num if isinstance(name_or_num, str) else None
if name:
if name in form.fields:
match_name = [c for c in inputs if c.name == name]
if len(match_name) > 1:
if all(getattr(c, 'type', None) == 'checkbox'
for c in match_name):
return CheckboxGroup(
cast(List[InputElement], match_name))
if all(getattr(c, 'type', None) == 'radio'
for c in match_name):
return RadioGroup(cast(List[InputElement], match_name))
else:
match_name = None
# test exact match to id
match_id = [c for c in inputs if c.get('id') == name]
if match_id:
if unique_match(match_id):
return match_id[0]
found_multiple = True
# test exact match to name
if match_name:
if unique_match(match_name):
return match_name[0]
found_multiple = True
num = name_or_num if isinstance(name_or_num, int) else None
if num is None and name and name.isdigit():
try:
num = int(name)
except ValueError:
pass
# test field index
if num is not None:
try:
return list(inputs)[num - 1]
except IndexError:
pass
if name:
# test regex match
regex = re.compile(name)
match_name = [c for c in inputs
if c.name and regex.search(c.name)]
if match_name:
if unique_match(match_name):
return match_name[0]
found_multiple = True
# test field values
match_value = [c for c in inputs if c.value == name]
if match_value:
if len(match_value) == 1:
return match_value[0]
found_multiple = True
# error out
if found_multiple:
raise TwillException(f'multiple matches to "{name_or_num}"')
raise TwillException(f'no field matches "{name_or_num}"')
def add_form_file(self, field_name: str, fp: IO) -> None:
self._form_files[field_name] = fp
def clicked(self, form: FormElement, control: FieldElement) -> None:
"""Record a 'click' in a specific form."""
if self._form != form:
# construct a function to choose a particular form;
# select_form can use this to pick out a precise form.
self._form = form
self.last_submit_button = None
# record the last submit button clicked.
if getattr(control, 'type', None) in ('submit', 'image'):
self.last_submit_button = cast(InputElement, control)
def submit(self, field_name: Optional[Union[str, int]] = None,
form_name: Optional[Union[str, int]] = None) -> None:
"""Submit the last or specified form using the given field."""
forms = self.forms
if not forms:
raise TwillException("There are no forms on this page.")
ctl: Optional[InputElement] = None
form = self._form if form_name is None else self.form(form_name)
if form is None:
if len(forms) > 1:
raise TwillException(
"There is more than one form on this page;"
" therefore you must specify a form explicitly"
" or select one (use 'fv') before submitting.")
form = forms[0]
action = form.action or ''
if '://' not in action:
form.action = urljoin(self.url, action)
# no field name? see if we can use the last submit button clicked...
if field_name is None:
if form is not self._form or self.last_submit_button is None:
# get first submit button in form.
submits = [c for c in form.inputs
if getattr(c, 'type', None) in ('submit', 'image')]
if submits:
ctl = cast(InputElement, submits[0])
else:
ctl = self.last_submit_button
else:
# field name given; find it
ctl = cast(InputElement, self.form_field(form, field_name))
# now set up the submission by building the request object that
# will be sent in the form submission.
if ctl is None:
log.debug('Note: submit without using a submit button')
else:
log.info(
"Note: submit is using submit button:"
" name='%s', value='%s'", ctl.get('name'), ctl.value)
# Add referer information. This may require upgrading the
# request object to have an 'add_unredirected_header' function.
# @BRT: For now, the referrer is always the current page
# @CTB: this seems like an issue for further work.
# Note: We do not set Content-Type from form.attrib.get('enctype'),
# since Requests does a much better job at setting the proper one.
headers = {'Referer': self.url}
payload = form.form_values()
if ctl is not None:
name = ctl.get('name')
if name:
payload.append((name, ctl.value or ''))
encoded_payload = self._encode_payload(payload)
# now actually GO
if form.method == 'POST':
if self._form_files:
r = self._session.post(
form.action, data=encoded_payload, headers=headers,
files=self._form_files)
else:
r = self._session.post(
form.action, data=encoded_payload, headers=headers)
else:
r = self._session.get(
form.action, params=encoded_payload, headers=headers)
self._form = None
self._form_files.clear()
self.last_submit_button = None
if self.result is not None:
self._history.append(self.result)
self.result = ResultWrapper(r)
def cookies(self) -> RequestsCookieJar:
"""Get all cookies from the current session."""
return self._session.cookies
def save_cookies(self, filename: str) -> None:
"""Save cookies into the given file."""
with open(filename, 'wb') as f:
pickle.dump(self._session.cookies, f)
def load_cookies(self, filename: str) -> None:
"""Load cookies from the given file."""
with open(filename, 'rb') as f:
self._session.cookies = pickle.load(f)
def clear_cookies(self) -> None:
"""Delete all the cookies."""
self._session.cookies.clear()
def show_cookies(self) -> None:
"""Pretty-print all the cookies."""
info = log.info
cookies = self._session.cookies
n = len(cookies)
if n:
log.info('\nThere are %d cookie(s) in the cookie jar.\n', n)
for n, cookie in enumerate(cookies, 1):
info('\t%d. %s', n, cookie)
info('')
else:
log.info('\nThere are no cookies in the cookie jar.\n', n)
def decode(self, value: Union[bytes, str]):
"""Decode a value using the current encoding."""
if isinstance(value, bytes) and self.encoding:
value = value.decode(self.encoding)
return value
def xpath(self, path: str) -> List[HtmlElement]:
"""Evaluate an xpath expression."""
return self._assert_result_for('xpath').xpath(path)
def _encode_payload(
self, payload: Sequence[Tuple[str, Union[str, bytes]]]
) -> List[Tuple[str, Union[str, bytes]]]:
"""Encode a payload with the current encoding if not utf-8."""
encoding = self.encoding
if not encoding or encoding.lower() in ('utf8', 'utf-8'):
return list(payload)
return [(name, val if isinstance(val, bytes) else val.encode(encoding))
for name, val in payload]
@staticmethod
def _get_meta_refresh(
response: Response) -> Tuple[Optional[int], Optional[str]]:
"""Get meta refresh interval and url from a response."""
try:
tree = html_to_tree(response.text)
except ValueError:
# may happen when there is an XML encoding declaration
tree = html_to_tree(response.content)
try:
content = tree.xpath( # "refresh" is case insensitive
"//meta[translate(@http-equiv,'REFSH','refsh')="
"'refresh'][1]/@content")[0]
interval, url = content.split(';', 1)
interval = int(interval)
if interval < 0:
raise ValueError
url = url.strip().strip('"').strip().strip("'").strip()
url = url.split('=', 1)
if url[0].strip().lower() != 'url':
raise IndexError
url = url[1].strip().strip('"').strip().strip("'").strip()
except (IndexError, ValueError):
interval = url = None
else:
if '://' not in url: # relative URL, adapt
url = urljoin(response.url, url)
return interval, url
_re_basic_auth = re.compile('Basic realm="(.*)"', re.I)
def _journey(self, func_name, *args, **_kwargs):
"""Execute the function with the given name and arguments.
The name should be one of 'open', 'reload', 'back', or 'follow_link'.
This method then runs that function with the given arguments and turns
the results into a nice friendly standard ResultWrapper object, which
is stored as self.result.
(Idea stolen from Python Browsing Probe (PBP).)
"""
self._form = None
self._form_files.clear()
self.last_submit_button = None
if func_name == 'open':
url = args[0]
elif func_name == 'follow_link':
url = args[0]
try:
url = url.url
except AttributeError:
pass # this is already a url
if '://' not in url and self.url:
url = urljoin(self.url, url)
elif func_name == 'reload':
url = self.url
elif func_name == 'back':
try:
self.result = self._history.pop()
return
except IndexError:
raise TwillException
else:
raise TwillException(f"Unknown function {func_name!r}")
r = self._session.get(url, verify=self.verify)
if r.status_code == 401:
header = r.headers.get('WWW-Authenticate')
realm = self._re_basic_auth.match(header)
if realm:
realm = realm.group(1)
auth = self._auth.get((url, realm)) or self._auth.get(url)
if auth:
r = self._session.get(url, auth=auth, verify=self.verify)
# handle redirection via meta refresh (not handled in requests)
refresh_interval = get_equiv_refresh_interval()
if refresh_interval:
visited = set() # break circular refresh chains
while True:
interval, url = self._get_meta_refresh(r)
if not url:
break
if interval >= refresh_interval:
(log.info if self.show_refresh else log.debug)(
'Meta refresh interval too long: %d', interval)
break
if url in visited:
log.warning('Circular meta refresh detected!')
break
(log.info if self.show_refresh else log.debug)(
'Meta refresh to new URL: %s', url)
r = self._session.get(url)
visited.add(url)
if func_name in ('follow_link', 'open'):
# If we're really reloading and just didn't say so, don't store
if self.result is not None and self.result.url != r.url:
self._history.append(self.result)
self.result = ResultWrapper(r)
browser = TwillBrowser() # the global twill browser instance
_disable_insecure_request_warnings() # should not warn for HTTP requests
"""
Implementation of all the individual 'twill' commands available
through twill-sh.
"""
import getpass
import re
import time
import sys
from typing import Any, Dict, Optional
from os.path import sep
from . import log, set_output, set_err_out, utils
from .agents import agents
from .browser import browser
from .errors import TwillException, TwillAssertionError
from .namespaces import get_twill_glocals
# noinspection SpellCheckingInspection
__all__ = [
'add_auth', 'add_cleanup', 'add_extra_header', 'agent',
'back', 'browser',
'clear_cookies', 'clear_extra_headers',
'code', 'config',
'debug', 'echo', 'exit', 'extend_with',
'find', 'follow',
'form_action', 'formaction', 'fa',
'form_clear', 'formclear', 'form_file', 'formfile',
'form_value', 'formvalue', 'fv',
'get_input', 'getinput', 'get_password', 'getpassword',
'go', 'info', 'load_cookies', 'not_find', 'notfind', 'options',
'redirect_error', 'redirect_output',
'reload', 'reset_browser', 'reset_error', 'reset_output',
'run', 'run_file', 'runfile', 'rf',
'save_cookies', 'save_html',
'setglobal', 'set_global', 'setlocal', 'set_local',
'show', 'showcookies', 'show_cookies', 'show_extra_headers',
'showforms', 'show_forms', 'showhistory', 'show_history',
'showhtml', 'show_html', 'showlinks', 'show_links',
'sleep', 'submit',
'tidy_ok', 'title', 'url']
def reset_browser():
""">> reset_browser
Reset the browser completely.
"""
browser.reset()
options.clear()
options.update(default_options)
# noinspection PyShadowingBuiltins
def exit(code: str = '0') -> None:
"""twill command: exit [<code>]
Exit twill, with the given exit code (defaults to 0, "no error").
"""
raise SystemExit(int(code))
def go(url: str) -> None:
""">> go <url>
Visit the URL given.
"""
browser.go(url)
def reload() -> None:
""">> reload
Reload the current URL.
"""
browser.reload()
def code(should_be: str) -> None:
""">> code <int>
Check to make sure the response code for the last page is as given.
"""
if browser.code != int(should_be):
raise TwillAssertionError(f"code is {browser.code} != {should_be}")
def tidy_ok() -> None:
""">> tidy_ok
Assert that 'tidy' does not produce any warnings or errors when run on
the current page.
If 'tidy' cannot be run, will fail silently (unless 'require_tidy' option
is true; see 'config' command).
"""
page = browser.html
if page is None:
raise TwillAssertionError("not viewing HTML!")
clean_page, errors = utils.run_tidy(page)
if clean_page is None: # tidy doesn't exist...
if options.get('require_tidy'):
raise TwillAssertionError("cannot run 'tidy'")
elif errors:
raise TwillAssertionError(f"tidy errors:\n====\n{errors}\n====\n")
def url(should_be: str) -> str:
""">> url <pattern>
Check to make sure that the current URL matches the regex pattern.
The local variable __match__ is set to the matching part of the URL.
"""
regex = re.compile(should_be)
current_url = browser.url
if current_url is None:
current_url = ''
m = None
else:
m = regex.search(current_url)
if not m:
raise TwillAssertionError(
f"current url is '{current_url}';\n"
f"does not match '{should_be}'\n")
match_str = m.group(1 if m.groups() else 0)
global_dict, local_dict = get_twill_glocals()
local_dict['__match__'] = match_str
return match_str
def follow(what: str) -> str:
""">> follow <pattern>
Find the first link on the page matching the given regex pattern and
then visit it.
"""
link = browser.find_link(what)
if link:
browser.follow_link(link)
return browser.url
raise TwillAssertionError(f"no links match to '{what}'")
_find_flags = dict(i=re.IGNORECASE, m=re.MULTILINE, s=re.DOTALL)
def _parse_find_flags(flags: str) -> int:
"""Helper function to parse the find flags."""
re_flags = 0
for char in flags:
try:
re_flags |= _find_flags[char]
except IndexError:
raise TwillAssertionError(f"unknown 'find' flag {char!r}")
return re_flags
def find(what: str, flags='') -> str:
""">> find <pattern> [<flags>]
Succeed if the regular expression pattern can be found on the page.
Sets the local variable __match__ to the matching text.
Flags is a string consisting of the following characters:
* i: ignore case
* m: multi-line
* s: dot matches all
* x: use XPath expressions instead of regular expression
For explanations of regular expressions, please see the Python re module
documentation.
"""
page = browser.html
local_dict = get_twill_glocals()[1]
if 'x' in flags:
elements = browser.xpath(what)
if not elements:
raise TwillAssertionError(f"no element to path '{what}'")
match_str = elements[0].text or ''
else:
match = re.search(what, page, flags=_parse_find_flags(flags))
if not match:
raise TwillAssertionError(f"no match to '{what}'")
match_str = match.group(1 if match.groups() else 0)
local_dict['__match__'] = match_str
return match_str
def not_find(what: str, flags='') -> None:
""">> not_find <pattern> [<flags>]
Fail if the regular expression pattern can be found on the page.
"""
try:
find(what, flags)
except TwillAssertionError:
pass
else:
raise TwillAssertionError(f"match to '{what}'")
# noinspection SpellCheckingInspection
notfind = not_find # backward compatibility and convenience
def back() -> None:
""">> back
Return to the previous page.
"""
browser.back()
def show(what: Optional[str] = None) -> None:
""">> show [<objects>]
Show the specified objects (html, cookies, forms, links, history).
"""
if not what:
what = 'html'
command = None
if what.isalpha():
command_name = f'show_{what}'
if command_name in __all__:
command = globals().get(command_name)
if not command:
raise TwillException(f'Cannot show "{what}".')
command()
def show_html() -> None:
""">> show_html
Show the HTML for the current page or show the specified objects
(which can be cookies, forms, history or links).
Note: Use browser.html to get the HTML programmatically.
"""
html = browser.html.strip()
log.info('')
log.info(html)
log.info('')
# noinspection SpellCheckingInspection
showhtml = show_html # backward compatibility and consistency
def echo(*strs: str) -> None:
""">> echo <list> <of> <strings>
Echo the arguments to the screen.
"""
log.info(' '.join(map(str, strs)))
def save_html(filename: Optional[str] = None) -> None:
""">> save_html [<filename>]
Save the HTML for the current page into <filename>.
If no filename given, construct the filename from the URL.
"""
html = browser.html
if html is None:
log.warning("No page to save.")
return
if filename is None:
url = browser.url
url = url.split('?', 1)[0]
filename = url.rsplit('/', 1)[-1]
if not filename:
filename = 'index.html'
log.info("Using filename '%s'.", filename)
encoding = browser.encoding or 'utf-8'
try:
with open(filename, 'w', encoding=encoding) as f:
f.write(html)
except UnicodeEncodeError:
if encoding == 'utf-8':
raise
with open(filename, 'w', encoding='utf-8') as f:
f.write(html)
def sleep(interval: str = "1") -> None:
""">> sleep [<interval>]
Sleep for the specified amount of time.
If no interval is given, sleep for 1 second.
"""
time.sleep(float(interval))
def agent(what: str) -> None:
""">> agent <agent>
Set the agent string (identifying the browser brand).
Some convenient shortcuts:
chrome_107, firefox_106, safari_605, edge_107, ie_11.
See twill.agents for a list of all available shortcuts.
"""
what = what.strip()
agent = agents.get(what, what)
browser.agent_string = agent
def submit(submit_button: Optional[str] = None,
form_name: Optional[str] = None) -> None:
""">> submit [<submit_button> [<form_name>]]
Submit the current form (the one last clicked on) by clicking on the
given submission button. If no 'submit_button' is given, submit the
current form by using the last clicked submit button.
The form to submit is the last form clicked on with a 'form_value' command
unless explicitly specified given the
The button used to submit is chosen based on 'submit_button'.
If 'submit_button' is given, it's matched against buttons using
the same rules that 'form_value' uses. If 'button_name' is not given,
this function uses the last submit button clicked on by 'form_value'.
If none can be found, it submits the form with no submit button clicked.
"""
browser.submit(submit_button, form_name)
def show_forms() -> None:
""">> show_forms
Show all the forms on the current page.
Note: Use browser.forms to get the forms programmatically.
"""
browser.show_forms()
# noinspection SpellCheckingInspection
showforms = show_forms # backward compatibility and convenience
def show_links() -> None:
""">> show_links
Show all the links on the current page.
Note: Use browser.links to get the links programmatically.
"""
browser.show_links()
# noinspection SpellCheckingInspection
showlinks = show_links # backward compatibility and convenience
def show_history() -> None:
""">> show_history
Show the browser history (what URLs were visited).
Note: Use browser.history to get the history programmatically.
"""
browser.show_history()
# noinspection SpellCheckingInspection
showhistory = show_history # backward compatibility and convenience
def form_clear(form_name: str) -> None:
""">> form_clear <form_name>
Run 'clear' on all the controls in this form.
"""
form = browser.form(form_name)
if form is None:
raise TwillAssertionError("Form not found")
for control in form.inputs:
if not ('readonly' in control.attrib
or 'disabled' in control.attrib
or getattr(control, 'type', None)
in ('submit', 'image', 'hidden')):
del control.value
browser.last_submit_button = None
# noinspection SpellCheckingInspection
formclear = form_clear # backward compatibility and convenience
def form_value(form_name: str, field_name: str, value: str) -> None:
""">> form_value <form_name> <field_name> <value>
Set value of a form field.
There are some ambiguities in the way 'form_value' deals with lists:
'form_value' will *add* the given value to a list of multiple selection,
for lists that allow it.
Forms are matched against 'form_name' as follows:
1. regex match to actual form name;
2. if 'form_name' is an integer, it's tried as an index.
Form controls are matched against 'field_name' as follows:
1. unique exact match to control name;
2. unique regex match to control name;
3. if field_name is an integer, it's tried as an index;
4. unique & exact match to submit-button values.
'form_value' ignores read-only fields completely; if they're readonly,
nothing is done, unless the config options ('config' command) are
changed.
'form_value' is available as 'fv' as well.
"""
form = browser.form(form_name)
if form is None:
raise TwillAssertionError("Form not found")
control = browser.form_field(form, field_name)
browser.clicked(form, control)
attrib = getattr(control, 'attrib', {})
if 'readonly' in attrib:
if options['readonly_controls_writeable']:
log.info('Forcing read-only form field to writeable.')
del attrib['readonly']
else:
log.info('Form field is read-only or ignorable; nothing done.')
return
if getattr(control, 'type', None) == 'file':
raise TwillException(
'form field is for file upload; use "form_file" instead')
value = browser.decode(value)
utils.set_form_control_value(control, value)
# noinspection SpellCheckingInspection
fv = formvalue = form_value # backward compatibility and convenience
def form_action(form_name: str, action_url: str) -> None:
""">> form_action <form_name> <action_url>
Sets action parameter on form to action_url.
'form_action' is available as 'fa' as well.
"""
form = browser.form(form_name)
if form is None:
raise TwillAssertionError("Form not found")
log.info("Setting action for form %s to %s.", form, action_url)
form.action = action_url
# noinspection SpellCheckingInspection
fa = formaction = form_action # backward compatibility and convenience
def form_file(form_name: str, field_name: str, filename: str,
content_type: Optional[str] = None) -> None:
""">> form_file <form_name> <field_name> <filename> [<content_type>]
Upload a file via an "upload file" form field.
"""
filename = filename.replace('/', sep)
form = browser.form(form_name)
if form is None:
raise TwillAssertionError("Form not found")
control = browser.form_field(form, field_name)
if getattr(control, 'type', None) != 'file':
raise TwillException('ERROR: field is not a file upload field!')
browser.clicked(form, control)
plain = content_type and content_type.startswith(('plain/', 'html/'))
fp = open(filename, 'r' if plain else 'rb')
browser.add_form_file(field_name, fp)
log.info(
'Added file "%s" to file upload field "%s".', filename, field_name)
# noinspection SpellCheckingInspection
formfile = form_file # backward compatibility and convenience
def extend_with(module_name: str) -> None:
""">> extend_with <module_name>
Import contents of given module.
"""
global_dict, local_dict = get_twill_glocals()
exec(f"from {module_name} import *", global_dict)
# now add the commands into the commands available for the shell,
# and print out some nice stuff about what the extension module does.
mod = sys.modules[module_name]
from . import parse, shell
fn_list = getattr(mod, '__all__', None)
if fn_list is None:
fn_list = [fn for fn in dir(mod) if callable(getattr(mod, fn))]
for command in fn_list:
fn = getattr(mod, command)
shell.add_command(command, fn.__doc__)
parse.command_list.append(command)
info, debug = log.info, log.debug
info("Imported extension module '%s'.", module_name)
debug("(at %s)", mod.__file__)
if shell.interactive:
if mod.__doc__:
info("\nDescription:\n\n%s\n", mod.__doc__.strip())
else:
if fn_list:
info('New commands:\n')
for name in fn_list:
info('\t%s', name)
info('')
def get_input(prompt: str) -> str:
""">> get_input <prompt>
Get input, store it in '__input__'.
"""
local_dict = get_twill_glocals()[1]
inp = input(prompt)
local_dict['__input__'] = inp
return inp
# noinspection SpellCheckingInspection
getinput = get_input # backward compatibility and convenience
def get_password(prompt: str) -> str:
""">> get_password <prompt>
Get a password ("invisible input"), store it in '__password__'.
"""
local_dict = get_twill_glocals()[1]
# we use sys.stdin here in order to get the same behaviour on Unix
# as on other platforms and for better testability of this function
inp = getpass.getpass(prompt, sys.stdin)
local_dict['__password__'] = inp
return inp
# noinspection SpellCheckingInspection
getpassword = get_password # backward compatibility and convenience
def save_cookies(filename: str) -> None:
""">> save_cookies <filename>
Save all the current cookies to the given file.
"""
browser.save_cookies(filename)
def load_cookies(filename: str) -> None:
""">> load_cookies <filename>
Clear the cookie jar and load cookies from the given file.
"""
browser.load_cookies(filename)
def clear_cookies() -> None:
""">> clear_cookies
Clear the cookie jar.
"""
browser.clear_cookies()
def show_cookies() -> None:
""">> show_cookies
Show all the cookies in the cookie jar.
Note: Use browser.cookies to get the cookies programmatically.
"""
browser.show_cookies()
# noinspection SpellCheckingInspection
showcookies = show_cookies # backward compatibility and convenience
def add_auth(realm: str, uri: str, user: str, passwd: str) -> None:
""">> add_auth <realm> <uri> <user> <passwd>
Add HTTP Basic Authentication information for the given realm/uri.
"""
if realm is not None:
browser.add_creds((uri, realm), user, passwd)
log.info(
"Added auth info: realm '%s' / URI '%s' / user '%s'.",
realm, uri, user)
if realm is None or options['with_default_realm']:
browser.add_creds(uri, user, passwd)
if realm is None:
log.info("Added auth info: URI '%s' / user '%s'.", uri, user)
def debug(what: str, level: str) -> None:
""">> debug <what> <level>
<what> can be:
* http (any level >= 1), to display the HTTP transactions.
* commands (any level >= 1), to display the commands being executed.
* equiv-refresh (any level >= 1) to display HTTP-EQUIV refresh handling.
"""
from . import parse
try:
num_level = int(level)
except ValueError:
num_level = 1 if utils.make_boolean(level) else 0
log.info('DEBUG: Setting %s debugging to level %d.', what, num_level)
if what == 'http':
browser.debug_level = num_level
elif what == 'equiv-refresh':
browser.show_refresh = num_level > 0
elif what == 'commands':
parse.log_commands(num_level > 0)
else:
raise TwillException(f'Unknown debugging type: "{what}"')
def run(cmd: str) -> None:
""">> run <command>
<command> can be any valid Python command; 'exec' is used to run it.
"""
# @CTB: use pyparsing to grok the command? make sure that quoting works...
# execute command.
global_dict, local_dict = get_twill_glocals()
# set __url__
local_dict['__cmd__'] = cmd
local_dict['__url__'] = browser.url
exec(cmd, global_dict, local_dict)
def run_file(*args: str) -> None:
""">> run_file <file1> [<file2> ...]
Execute the given twill scripts or directories of twill scripts.
'run_file' is available as 'rf' as well.
"""
from . import parse
filenames = utils.gather_filenames(args)
for filename in filenames:
parse.execute_file(filename, no_reset=True)
# noinspection SpellCheckingInspection
rf = runfile = run_file # backward compatibility and convenience
def add_cleanup(*args: str) -> None:
""">> add_cleanup <file1> [<file2> ...]
Execute the given twill scripts after the current twill script.
"""
local_dict = get_twill_glocals()[1]
cleanups = local_dict.setdefault('__cleanups__', [])
filenames = utils.gather_filenames(args)
log.debug('Adding cleanup scripts: %s', ', '.join(filenames))
cleanups.extend(reversed(filenames))
def set_global(name: str, value: str) -> None:
"""set_global <name> <value>
Sets the variable <name> to the value <value> in the global namespace.
"""
global_dict, local_dict = get_twill_glocals()
global_dict[name] = value
# noinspection SpellCheckingInspection
setglobal = set_global # backward compatibility and convenience
def set_local(name: str, value: str) -> None:
"""set_local <name> <value>
Sets the variable <name> to the value <value> in the local namespace.
"""
global_dict, local_dict = get_twill_glocals()
local_dict[name] = value
# noinspection SpellCheckingInspection
setlocal = set_local # backward compatibility and convenience
def title(what: str) -> str:
""">> title <pattern>
Succeed if the regular expression pattern is in the page title.
"""
regex = re.compile(what)
title = browser.title
if title is None:
log.info("The page has no title.")
else:
log.info("The title is '%s'.", title)
m = regex.search(title) if title else None
if m is None:
raise TwillAssertionError(f"The title does not contain '{what}'.")
if m.groups():
match_str = m.group(1)
else:
match_str = m.group(0)
global_dict, local_dict = get_twill_glocals()
local_dict['__match__'] = match_str
return match_str
def redirect_output(filename: str) -> None:
""">> redirect_output <filename>
Append all twill output to the given file.
"""
fp = open(filename, 'a')
set_output(fp)
def reset_output() -> None:
""">> reset_output
Reset twill output to go to the screen.
"""
set_output(None)
def redirect_error(filename: str) -> None:
""">> redirect_error <filename>
Append all twill error output to the given file.
"""
fp = open(filename, 'a')
set_err_out(fp)
def reset_error() -> None:
""">> reset_error
Reset twill error output to go to the screen.
"""
set_err_out(None)
def add_extra_header(header_key: str, header_value: str) -> None:
""">> add_header <name> <value>
Add an HTTP header to each HTTP request. See 'show_extra_headers' and
'clear_extra_headers'.
"""
browser.headers[header_key] = header_value
def show_extra_headers() -> None:
""">> show_extra_headers
Show any extra headers being added to each HTTP request.
"""
info = log.info
headers = browser.headers
if headers:
info('\nThe following HTTP headers are added to each request:\n')
for key, value in headers.items():
info('\t"%s" = "%s"', key, value)
info('')
else:
info('** no extra HTTP headers **')
def clear_extra_headers() -> None:
""">> clear_extra_headers
Remove all user-defined HTTP headers. See 'add_extra_header' and
'show_extra_headers'.
"""
browser.reset_headers()
default_options: Dict[str, Any] = dict(
equiv_refresh_interval=2,
readonly_controls_writeable=False,
require_tidy=False,
with_default_realm=False)
options = default_options.copy() # the global options dictionary
def config(key: Optional[str] = None, value: Any = None) -> None:
""">> config [<key> [<int value>]]
Configure/report various options. If no <value> is given, report
the current key value; if no <key> given, report current settings.
Options starting with "tidy_" will be used to configure HTML tidy.
So far:
* 'equiv_refresh_interval', default 2 -- time limit for HTTP-EQUIV=REFRESH
* 'readonly_controls_writeable', default False -- all controls writeable
* 'require_tidy', default False -- *require* that tidy be installed
* 'with_default_realm', default False -- use a default realm for HTTP AUTH
"""
info = log.info
if key is None:
keys = sorted(options)
info('\nCurrent configuration:\n')
for k in keys:
info('\t%s : %s', k, options[k])
info('')
else:
v = options.get(key)
if v is None and not key.startswith('tidy_'):
log.error("no such configuration key '%s'", key)
info("valid keys are: %s", ', '.join(sorted(options)))
raise TwillException("no such configuration key: '%s'" % (key,))
elif value is None:
info('\nkey %s: value %s\n', key, v)
else:
if isinstance(v, bool):
value = utils.make_boolean(value)
elif isinstance(v, int):
value = utils.make_int(value)
options[key] = value
def info() -> None:
""">> info
Report information on current page.
"""
current_url = browser.url
if current_url is None:
log.warning("We're not on a page!")
return
content_type = browser.response_headers['content-type']
is_html = content_type and content_type.split(';', 1)[0] == 'text/html'
code = browser.code
info = log.info
info('\tURL: %s', current_url)
info('\tHTTP code: %s', code)
info('\tContent type: %s%s', content_type, ' (HTML)' if is_html else '')
if is_html:
title = browser.title
info('\tPage title: %s', title)
forms = browser.forms
if len(forms):
info('\tThis page contains %d form(s)', len(forms))
info('')
"""twill exceptions"""
class TwillException(Exception):
"""General twill exception."""
pass
class TwillAssertionError(TwillException):
"""AssertionError to raise upon failure of some twill command."""
pass
class TwillNameError(TwillException):
"""Error to raise when an unknown command is called."""
pass
# twill extensions
"""
Extension functions for parsing sys.argv.
Commands:
get_args -- load all command-line arguments after the last --
into $arg1...$argN.
"""
from twill import log, namespaces, shell
__all__ = ['get_args']
def get_args(require=0):
""">> get_args [<require>]
Load the command line arguments after the last '--' into $arg1...$argN,
optionally requiring at least 'require' such arguments.
"""
global_dict, local_dict = namespaces.get_twill_glocals()
require = int(require)
if len(shell.twill_args) < require:
from twill.errors import TwillAssertionError
given = len(shell.twill_args)
raise TwillAssertionError(
f"too few arguments; {given} rather than {require}")
if shell.twill_args:
for n, arg in enumerate(shell.twill_args, 1):
global_dict[f"arg{n}"] = arg
n = len(shell.twill_args)
log.info("get_args: loaded %d args as $arg1..$arg%d.", n, n)
else:
log.info("no arguments to parse!")
"""
Extension functions to check all of the links on a page.
Usage:
check_links [ <pattern> ]
Make sure that all the HTTP links on the current page can be visited
successfully. If 'pattern' is given, check only URLs that match that
regular expression.
If option 'check_links.only_collect_bad_links' is on, then all bad
links are silently collected across all calls to check_links. The
function 'report_bad_links' can then be used to report all the links,
together with their referring pages.
"""
import re
from typing import Dict, List, Set
from twill import browser, commands, log, utils
from twill.errors import TwillAssertionError
__all__ = ['check_links', 'report_bad_links', 'good_urls', 'bad_urls']
# first, set up config options & persistent 'bad links' memory...
if commands.options.get('check_links.only_collection_bad_links') is None:
commands.options['check_links.only_collect_bad_links'] = False
good_urls: Set[str] = set()
bad_urls: Dict[str, Set[str]] = dict()
def check_links(pattern=''):
""">> check_links [<pattern>]
Make sure that all the HTTP links on the current page can be visited
with an HTTP response 200 (success). If 'pattern' is given, interpret
it as a regular expression that link URLs must contain in order to be
tested, e.g.
check_links https://.*\\.google\\.com
would check only links to google URLs. Note that because 'follow'
is used to visit the pages, the referrer URL is properly set on the
visit.
"""
debug, info = log.debug, log.info
debug('in check_links')
# compile the regex
regex = re.compile(pattern) if pattern else None
# iterate over all links, collecting those that match
#
# note that in the case of duplicate URLs, only one of the
# links is actually followed!
collected_urls: Set[str] = set()
links = browser.links
if not links:
debug("no links to check!?")
return
for link in links:
url = link.url
url = url.split('#', 1)[0] # get rid of subpage pointers
# noinspection HttpUrlsUsage
if not url.startswith(('http://', 'https://')):
debug("url '%s' is not an HTTP link; ignoring", url)
continue
if regex:
if regex.search(url):
collected_urls.add(url)
debug("Gathered URL %s -- matched pattern", url)
else:
debug("URL %s doesn't match pattern", url)
else:
collected_urls.add(url)
debug("Gathered URL %s.", url)
# now, for each unique and unchecked URL, follow the link
failed: List[str] = []
for url in sorted(collected_urls):
debug("Checking %s", url)
if url in good_urls:
debug('... already known as good')
elif url in bad_urls:
debug('... already collected as broken')
else:
try:
browser.follow_link(url)
except Exception: # count as failure
code = 404
else:
code = browser.code
browser.back()
if code == 200:
debug('...success!')
good_urls.add(url)
else:
debug('...failure!')
failed.append(url)
if commands.options['check_links.only_collect_bad_links']:
for url in failed:
referrers = bad_urls.setdefault(url, set())
info('*** %s', browser.url)
referrers.add(browser.url)
elif failed:
info('\nCould not follow %d links:\n', len(failed))
for url in failed:
info('* %s', url)
raise TwillAssertionError("broken links on page")
else:
info('\nNo broken links were detected.\n')
def report_bad_links(fail_if_exist='true', flush_bad_links='true'):
""">> report_bad_links [<fail-if-exist> [<flush-bad-links>]]
Report all the links collected across check_links runs (collected
if and only if the config option check_links.only_collect_bad_links
is set).
If <fail-if-exist> is false (true by default) then the command will
fail after reporting any bad links.
If <flush-bad-links> is false (true by default) then the list of
bad links will be retained across the function call.
"""
fail_if_exist = utils.make_boolean(fail_if_exist)
flush_bad_links = utils.make_boolean(flush_bad_links)
info = log.info
if not bad_urls:
info('\nNo bad links to report.\n')
return
info('\nCould not follow %d links', len(bad_urls))
for url in sorted(bad_urls):
referrers = sorted(bad_urls[url])
info("\tlink '%s' (occurs on: %s)", url, ','.join(referrers))
if flush_bad_links:
bad_urls.clear()
if fail_if_exist:
raise TwillAssertionError("broken links encountered")
"""
An extension function to iterate over a list of comma-separated values.
Function 'csv_iterate' reads a file containing one or more rows of
comma-separated columns, assigns them to col1...colN, and, for each row,
executes the given twill script.
"""
import csv
from twill import execute_file, log, namespaces
__all__ = ['csv_iterate']
def csv_iterate(file_name, script_name):
""">> csv_iterate <csv_file> <script>
For each line in <csv_file>, read in a list of comma-separated values,
put them in $col1...$colN, and execute <script>.
"""
global_dict, local_dict = namespaces.get_twill_glocals()
reader = csv.reader(open(file_name))
for i, row in enumerate(reader, 1):
log.debug('csv_iterate: on row %d of %s', i, file_name)
for j, col in enumerate(row, 1):
global_dict[f"col{j}"] = col
execute_file(script_name, no_reset=True)
"""
Extension functions for manipulating the current working directory (cwd).
Commands:
chdir -- push the cwd onto the directory stack & change to the new location.
popd -- change to the last directory on the directory stack.
"""
import os
from twill import commands, log
__all__ = ['chdir', 'popd']
_dir_stack = []
def chdir(where):
""">> chdir <where>
Change to the new location, after saving the current directory onto
the directory stack. The global variable __dir__ is set to the cwd.
"""
cwd = os.getcwd()
_dir_stack.append(cwd)
log.debug('current directory: "%s"', cwd)
os.chdir(where)
log.info('changed directory to "%s"', where)
commands.setglobal('__dir__', where)
def popd():
""">> popd
Change back to the last directory on the directory stack. The global
variable __dir__ is set to the cwd.
"""
where = _dir_stack.pop()
os.chdir(where)
log.info('popped back to directory "%s"', where)
commands.setglobal('__dir__', where)
"""
Extension functions to help query/assert name service information.
Functions:
* dns_resolves -- assert that a host resolves to a specific IP address.
* dns_a -- assert that a host directly resolves to a specific IP address
* dns_cname -- assert that a host is an alias for another hostname.
* dnx_mx -- assert that a given host is a mail exchanger for the given name.
* dns_ns -- assert that a given hostname is a name server for the given name.
"""
import socket
from twill.errors import TwillAssertionError
try:
from dns.ipv4 import inet_aton
from dns.name import from_text
from dns.resolver import Resolver
except ImportError:
raise Exception(
"ERROR: must have dnspython installed to use the DNS extension module")
def dns_a(host, ipaddress, server=None):
""">> dns_a <name> <ipaddress> [<name server>]
Assert that <name> resolves to <ipaddress> (and is an A record).
Optionally use the given name server.
"""
if not is_ip_addr(ipaddress):
raise Exception(
"<ipaddress> parameter must be an IP address, not a hostname")
for answer in _resolve(host, 'A', server):
if ipaddress == answer.address:
return True
raise TwillAssertionError
def dns_cname(host, cname, server=None):
""">> dns_cname <name> <alias_for> [<name server>]
Assert that <name> is a CNAME alias for <alias_for> Optionally use
<name server>.
"""
if is_ip_addr(cname):
raise Exception(
"<alias_for> parameter must be a hostname, not an IP address")
cname = from_text(cname)
for answer in _resolve(host, 'CNAME', server):
if cname == answer.target:
return True
raise TwillAssertionError
def dns_resolves(host, ipaddress, server=None):
""">> dns_resolves <name> <name2/ipaddress> [<name server>]
Assert that <name> ultimately resolves to the given IP address (or
the same IP address that 'name2' resolves to). Optionally use the
given name server.
"""
if not is_ip_addr(ipaddress):
ipaddress = _resolve_name(ipaddress, server)
for answer in _resolve(host, 1, server):
if ipaddress == answer.address:
return True
raise TwillAssertionError
def dns_mx(host, mailserver, server=None):
""">> dns_mx <name> <mailserver> [<name server>]
Assert that <mailserver> is a mailserver for <name>.
"""
mailserver = from_text(mailserver)
for rdata in _resolve(host, 'MX', server):
if mailserver == rdata.exchange:
return True
raise TwillAssertionError
def dns_ns(host, query_ns, server=None):
""">> dns_ns <domain> <nameserver> [<name server to use>]
Assert that <nameserver> is a mailserver for <domain>.
"""
query_ns = from_text(query_ns)
for answer in _resolve(host, 'NS', server):
if query_ns == answer.target:
return True
raise TwillAssertionError
def is_ip_addr(text):
"""Check the 'name' to see if it's just an IP address."""
try:
inet_aton(text)
return True
except socket.error:
return False
def _resolve_name(name, server):
"""Resolve the given name to an IP address."""
if is_ip_addr(name):
return name
resolver = Resolver()
if server:
resolver.nameservers = [_resolve_name(server, None)]
answers = resolver.resolve(name)
return str(answers[0])
def _resolve(query, query_type, server):
"""Resolve, perhaps via the given name server (None to use default)."""
resolver = Resolver()
if server:
resolver.nameservers = [_resolve_name(server, None)]
return resolver.resolve(query, query_type)
"""
Extension functions for easier form filling.
(This module is a dumping ground for features that may ultimately get
added into the main twill command set.)
Commands:
* fv_match -- fill in *all* fields that match a regex (unlike 'form_value'
which will complain about multiple matches). Useful for forms
with lots of repeated field names -- 'field-1', 'field-2', etc.
* fv_multi -- fill in multiple form fields at once, e.g.
fv_multi <form_name> field1=value1 field2=value2 field3=value3
* fv_multi_sub -- same as 'fv_multi', followed by a 'submit'.
"""
import re
from twill import browser, commands, log, utils
__all__ = ['fv_match', 'fv_multi_match', 'fv_multi', 'fv_multi_sub']
def fv_match(form_name: str, field_pattern: str, value: str) -> None:
""">> fv_match <form_name> <field_pattern> <value>
Set value of *all* form fields with a name that matches the given
regular expression pattern.
(Unlike 'form_value' or 'fv', this will not complain about multiple
matches!)
"""
form = browser.form(form_name)
if form is None:
log.error("no such form '%s'", form_name)
return
regex = re.compile(field_pattern)
matches = [ctl for ctl in form.inputs
if regex.search(str(ctl.get('name')))]
if matches:
log.info('-- matches %d', len(matches))
n = 0
for control in matches:
browser.clicked(form, control)
if 'readonly' in control.attrib:
continue
n += 1
utils.set_form_control_value(control, value)
log.info('set %d values total', n)
def fv_multi_match(form_name: str, field_pattern: str, *values: str) -> None:
""">> fv_multi_match <form_name> <field_pattern> <value>...
Set value of each consecutive form field matching the given pattern with
the next specified value. If there are no more values, use the last for
all remaining form fields.
"""
form = browser.form(form_name)
if form is None:
log.error("no such form '%s'", form_name)
return
regex = re.compile(field_pattern)
matches = [
ctl for ctl in form.inputs if regex.search(str(ctl.get('name')))]
if matches:
log.info('-- matches %d, values %d', len(matches), len(values))
for n, control in enumerate(matches):
browser.clicked(form, control)
if 'readonly' in control.attrib:
continue
try:
utils.set_form_control_value(control, values[n])
except IndexError:
utils.set_form_control_value(control, values[-1])
log.info('set %d values total', n)
def fv_multi(form_name: str, *pairs: str) -> None:
""">> fv_multi <form_name> <pair>...
Set multiple form fields; each pair should be of the form
field_name=value
The pair will be split around the first '=', and
'fv <form_name> field_name value' will be executed in the order the
pairs are given.
"""
for pair in pairs:
field_name, value = pair.split('=', 1)
commands.fv(form_name, field_name, value)
def fv_multi_sub(form_name: str, *pairs: str) -> None:
""">> fv_multi_sub <form_name> <pair>...
Set multiple form fields (as with 'fv_multi') and then submit().
"""
for pair in pairs:
field_name, value = pair.split('=', 1)
commands.fv(form_name, field_name, value)
commands.submit()
"""
Extension functions to discard all moderated messages in a SourceForge-based
mailman queue.
(Currently there is no way to do this without manually selecting 'discard'
for each and every message.)
"""
import re
from twill import browser, log, utils
__all__ = ['discard_all_messages', 'exit_if_empty']
def exit_if_empty():
""">> exit_if_empty
Exit the script currently running, if there are no deferred messages
on the current page.
"""
form = browser.form()
if not form:
log.error("No messages; exiting.")
raise SystemExit
def discard_all_messages():
""">> discard_all_messages
Set all buttons to "discard".
"""
_form_value_by_regex_setall('1', '^\\d+$', '3')
def _form_value_by_regex_setall(form_name, field_name, value):
form = browser.form(form_name)
if not form:
log.error("no such form '%s'", form_name)
return
regex = re.compile(field_name)
matches = [ctl for ctl in form.controls if regex.search(str(ctl.name))]
if matches:
log.info('-- matches %d', len(matches))
n = 0
for control in matches:
browser.clicked(form, control)
if not control.readonly:
utils.set_form_control_value(control, value)
n += 1
log.info('set %d values total', n)
"""
Suresh's extension for slicing and dicing variables with regular expressions.
"""
import re
from twill import browser, log
from twill.namespaces import get_twill_glocals
def showvar(which):
""">> showvar var
Shows the value of the variable 'var'.
"""
global_dict, local_dict = get_twill_glocals()
d = global_dict.copy()
d.update(local_dict)
log.info(d.get(str(which)))
def split(what):
""">> split <regex>
Sets __matchlist__ to re.split(regex, page).
"""
page = browser.html
m = re.split(what, page)
global_dict, local_dict = get_twill_glocals()
local_dict['__matchlist__'] = m
def findall(what):
""">> findall <regex>
Sets __matchlist__ to re.findall(regex, page).
"""
page = browser.html
regex = re.compile(what, re.DOTALL)
m = regex.findall(page)
global_dict, local_dict = get_twill_glocals()
local_dict['__matchlist__'] = m
def getmatch(where, what):
""">> getmatch into_var expression
Evaluates an expression against __match__ and puts it into 'into_var'.
"""
global_dict, local_dict = get_twill_glocals()
match = local_dict['__match__']
local_dict[where] = _do_eval(match, what)
def setmatch(what):
""">> setmatch expression
Sets each element __matchlist__ to eval(expression); 'm' is set
to each element of __matchlist__ prior to processing.
"""
global_dict, local_dict = get_twill_glocals()
match = local_dict['__matchlist__']
if isinstance(match, str):
match = [match]
new_match = [_do_eval(m, what) for m in match]
local_dict['__matchlist__'] = new_match
def _do_eval(match, exp):
"""Used internally to evaluate an expression."""
return eval(exp, globals(), {'m': match})
def popmatch(which):
""">> popmatch index
Pops __matchlist__[i] into __match__.
"""
global_dict, local_dict = get_twill_glocals()
matchlist = local_dict['__matchlist__']
match = matchlist.pop(int(which))
local_dict['__match__'] = match
"""
A simple set of extensions to manage post-load requirements for pages.
Commands:
require -- turn on post-load requirements; either 'success' or
'links_ok'.
no_require -- turn off requirements.
skip_require -- for the next page visit, skip requirements processing.
flush_visited -- flush the list of already visited pages
(for links checking)
"""
from twill import browser, commands, log
__all__ = ['require', 'skip_require', 'flush_visited', 'no_require']
_requirements = [] # what requirements to satisfy
ignore_once = False # reset after each hook call
ignore_always = False # never reset
def skip_require():
"""
>> skip_require
Skip the post-page-load requirements.
"""
global ignore_once
ignore_once = True
def require(what):
""">> require <what>
After each page is loaded, require that 'what' be satisfied. 'what'
can be:
* 'success' -- HTTP return code is 200
* 'links_ok' -- all of the links on the page load OK (see 'check_links'
extension module)
"""
global _requirements
# install the post-load hook function.
# noinspection PyProtectedMember
hooks = browser._post_load_hooks
if _require_post_load_hook not in hooks:
log.debug('INSTALLING POST-LOAD HOOK')
hooks.append(_require_post_load_hook)
# add the requirement.
if what not in _requirements:
log.debug('Adding requirement: %s', what)
_requirements.append(what)
def no_require():
""">> no_require
Remove all post-load requirements.
"""
# noinspection PyProtectedMember
hooks = browser._post_load_hooks
hooks = [fn for fn in hooks if fn != _require_post_load_hook]
browser._post_load_hooks = hooks
global _requirements
_requirements = []
def flush_visited():
""">> flush_visited
Flush the list of pages successfully visited already.
"""
from .check_links import good_urls # type: ignore
good_urls.clear()
def _require_post_load_hook(action, *_args, **_kwargs):
"""Post load hook function to be called after each page is loaded.
See TwillBrowser._journey() for more information.
"""
if action == 'back': # do nothing on a 'back'
return
global ignore_once
global ignore_always
if ignore_once or ignore_always:
ignore_once = False
return
for what in _requirements:
if what == 'success':
log.debug('REQUIRING success')
commands.code("200")
elif what == 'links_ok':
from check_links import check_links, good_urls # type: ignore
ignore_always = True
log.debug('REQUIRING functioning links')
log.debug('(already visited:)')
log.debug("\n\t".join(sorted(good_urls)))
try:
check_links()
finally:
ignore_always = False
"""Used in test_shell, to test default command execution & extensions."""
flag = False
__all__ = ['flag_true', 'assert_flag']
def flag_true():
global flag
flag = True
def assert_flag():
global flag
assert flag
"""twill multiprocess execution system."""
import sys
import os
import time
from optparse import OptionParser
from . import execute_file, set_log_level
from pickle import load, dump
# make sure that the current working directory is in the path
if '' not in sys.path:
sys.path.append('')
def main():
try:
fork = os.fork
except AttributeError:
sys.exit('Error: Must use Unix to be able to fork processes.')
parser = OptionParser()
add = parser.add_option
add('-u', '--url', nargs=1, action="store", dest="url",
help="start at the given URL before each script")
add('-n', '--number', nargs=1, action="store", dest="number",
default=1, type="int",
help="number of times to run the given script(s)")
add('-p', '--processes', nargs=1, action="store",
dest="processes", default=1, type="int",
help="number of processes to execute in parallel")
options, args = parser.parse_args()
if not args:
sys.exit('Error: Must specify one or more scripts to execute.')
average_number = options.number // options.processes
last_number = average_number + options.number % options.processes
child_pids = []
is_parent = True
repeat = 0
# start a bunch of child processes and record their pids in the parent
for i in range(options.processes):
pid = fork()
if pid:
child_pids.append(pid)
else:
repeat = average_number if i else last_number
is_parent = False
break
# set the children up to run and record their stats
failed = False
if is_parent:
time.sleep(1)
total_time = total_exec = 0
# iterate over all the child pids, wait until they finish,
# and then sum statistics
for child_pid in child_pids[:]:
child_pid, status = os.waitpid(child_pid, 0)
if status: # failure
print(f'[twill-fork parent: process {child_pid} FAILED:'
f' exit status {status}]')
print('[twill-fork parent:'
' (not counting stats for this process)]')
failed = True
else: # record statistics, otherwise
filename = '.status.%d' % (child_pid,)
with open(filename) as fp:
this_time, n_executed = load(fp)
os.unlink(filename)
total_time += this_time
total_exec += n_executed
# summarize
print('\n----\n')
print(f'number of processes: {options.processes}')
print(f'total executed: {total_exec}')
print(f'total time to execute: {total_time:.2f} s')
if total_exec:
avg_time = 1000 * total_time / total_exec
print(f'average time: {avg_time:.2f} ms')
else:
print('(nothing completed, no average!)')
print()
else:
pid = os.getpid()
print(f'[twill-fork: pid {pid} : executing {repeat} times]')
start_time = time.time()
set_log_level('warning')
for i in range(repeat):
for filename in args:
execute_file(filename, initial_url=options.url)
end_time = time.time()
this_time = end_time - start_time
# write statistics
filename = f'.status.{pid}'
with open(filename, 'w') as fp:
info = (this_time, repeat)
dump(info, fp)
sys.exit(-1 if failed else 0)
if __name__ == '__main__':
main()
"""Global and local dictionaries, and initialization/utility functions."""
global_dict = {} # the global dictionary
def init_global_dict():
"""Initialize the global dictionary with twill commands.
This must be done after all the other modules are loaded, so that all
the commands are already defined.
"""
# noinspection PyCompatibility
from . import commands, parse
cmd_list = commands.__all__
global_dict.update((cmd, getattr(commands, cmd)) for cmd in cmd_list)
parse.command_list.extend(cmd_list)
_local_dict_stack = [] # local dictionaries
def new_local_dict():
"""Initialize a new local dictionary & push it onto the stack."""
d = {}
_local_dict_stack.append(d)
return d
def pop_local_dict():
"""Get rid of the current local dictionary."""
return _local_dict_stack.pop()
def get_twill_glocals():
"""Return both global and current local dictionary."""
global global_dict, _local_dict_stack
assert global_dict is not None, "must initialize global namespace first!"
if not _local_dict_stack:
new_local_dict()
return global_dict, _local_dict_stack[-1]
"""Code parsing and evaluation for the twill mini-language."""
import re
import sys
from io import StringIO
from typing import List
from pyparsing import (
CharsNotIn, Combine, Group, Literal, Optional, ParseException,
pyparsing_unicode, removeQuotes, restOfLine, Word, ZeroOrMore)
# noinspection PyCompatibility
from . import commands, log, namespaces
from .browser import browser
from .errors import TwillNameError
# pyparsing stuff
# allow characters in full 8bit range
char_range = pyparsing_unicode.Latin1
alphas, alphanums = char_range.alphas, char_range.alphanums
printables = char_range.printables
# basically, a valid Python identifier:
command_word = Word(alphas + '_', alphanums + '_')
command = command_word.setResultsName('command')
command.setName('command')
# arguments to it.
# we need to reimplement all this junk from pyparsing because pcre's
# idea of escapable characters contains a lot more than the C-like
# thing pyparsing implements
_bslash = '\\'
_sglQuote = Literal("'")
_dblQuote = Literal('"')
_escapables = printables
_escapedChar = Word(_bslash, _escapables, exact=2)
dblQuotedString = Combine(
_dblQuote + ZeroOrMore(CharsNotIn('\\"\n\r') | _escapedChar | '""') +
_dblQuote).streamline().setName("string enclosed in double quotes")
sglQuotedString = Combine(
_sglQuote + ZeroOrMore(CharsNotIn("\\'\n\r") | _escapedChar | "''") +
_sglQuote).streamline().setName('string enclosed in single quotes')
quotedArg = (dblQuotedString | sglQuotedString)
quotedArg.setParseAction(removeQuotes)
quotedArg.setName('quotedArg')
plainArgChars = printables.replace('#', '').replace('"', '').replace("'", "")
plainArg = Word(plainArgChars)
plainArg.setName('plainArg')
arguments_group = Group(ZeroOrMore(quotedArg | plainArg))
arguments = arguments_group.setResultsName('arguments')
arguments.setName('arguments')
# comment line.
comment = Literal('#') + restOfLine
comment = comment.suppress()
comment.setName('comment')
full_command = comment | (command + arguments + Optional(comment))
full_command.setName('full_command')
command_list: List[str] = [] # filled in by namespaces.init_global_dict().
def process_args(args, globals_dict, locals_dict):
"""Process string arguments.
Take a list of string arguments parsed via pyparsing and evaluate
the special variables ('__*').
Return a new list.
"""
new_args: List[str] = []
for arg in args:
# __variable substitution
if arg.startswith('__'):
try:
val = eval(arg, globals_dict, locals_dict)
except NameError: # not in dictionary; don't interpret
val = arg
log.info('VAL IS %s FOR %s', val, arg)
if isinstance(val, str):
new_args.append(val)
else:
new_args.extend(val)
# $variable substitution
elif arg.startswith('$') and not arg.startswith('${'):
try:
val = str(eval(arg[1:], globals_dict, locals_dict))
except NameError: # not in dictionary; don't interpret
val = arg
new_args.append(val)
else:
new_args.append(variable_substitution(
arg, globals_dict, locals_dict))
new_args = [arg.replace('\\n', '\n') for arg in new_args]
return new_args
def execute_command(cmd, args, globals_dict, locals_dict, cmdinfo):
"""Actually execute the command.
Side effects: __args__ is set to the argument tuple, __cmd__ is set to
the command.
"""
global command_list # all supported commands
# execute command
locals_dict['__cmd__'] = cmd
locals_dict['__args__'] = args
if cmd not in command_list:
raise TwillNameError(f"unknown twill command: '{cmd}'")
eval_str = f"{cmd}(*__args__)"
# compile the code object so that we can get 'cmdinfo' into the
# error tracebacks
codeobj = compile(eval_str, cmdinfo, 'eval')
# eval the codeobj in the appropriate dictionary
result = eval(codeobj, globals_dict, locals_dict)
# set __url__
locals_dict['__url__'] = browser.url
return result
_log_commands = log.debug # type: ignore
def parse_command(line, globals_dict, locals_dict):
"""Parse command."""
try:
res = full_command.parseString(line)
except ParseException as e:
log.error('PARSE ERROR: %s', e)
res = None
if res:
_log_commands("twill: executing cmd '%s'", line.strip())
args = process_args(res.arguments.asList(), globals_dict, locals_dict)
return res.command, args
return None, None # e.g. a comment
def execute_string(buf, **kw):
"""Execute commands from a string buffer."""
fp = StringIO(buf)
kw['source'] = ['<string buffer>']
if 'no_reset' not in kw:
kw['no_reset'] = True
_execute_script(fp, **kw)
def execute_file(filename, **kw):
"""Execute commands from a file."""
inp = sys.stdin if filename == '-' else open(filename, encoding='utf-8')
log.info('\n>> Running twill file %s', filename)
kw['source'] = filename
_execute_script(inp, **kw)
def _execute_script(inp, **kw):
"""Execute lines taken from a file-like iterator."""
# initialize new local dictionary and get global and current local
namespaces.new_local_dict()
globals_dict, locals_dict = namespaces.get_twill_glocals()
locals_dict['__url__'] = browser.url
# reset browser
if not kw.get('no_reset'):
commands.reset_browser()
# go to a specific URL?
init_url = kw.get('initial_url')
if init_url:
commands.go(init_url)
locals_dict['__url__'] = browser.url
# should we catch exceptions on failure?
catch_errors = kw.get('never_fail')
# source_info stuff
source_info = kw.get('source', "<input>")
try:
for n, line in enumerate(inp, 1):
line = line.strip()
if not line: # skip empty lines
continue
cmd_info = f'{source_info}:{n}'
log.info('AT LINE: %s', cmd_info)
cmd, args = parse_command(line, globals_dict, locals_dict)
if cmd is None:
continue
try:
execute_command(cmd, args, globals_dict, locals_dict, cmd_info)
except SystemExit:
# abort script execution if a SystemExit is raised
return
except Exception as e:
error_type = e.__class__.__name__ or 'Error'
error = f"{error_type} raised on line {n} of '{source_info}'"
if line:
error += f" while executing\n>> {line}"
log.error("\nOops! %s", error)
if not browser.first_error:
browser.first_error = error
log.error("\nError: %s", str(e).strip())
if not catch_errors:
raise
finally:
cleanups = locals_dict.get('__cleanups__')
if cleanups:
error = browser.first_error
result = browser.result
for filename in reversed(cleanups):
log.info('\n>> Running twill cleanup file %s', filename)
try:
inp = open(filename, encoding='utf-8')
_execute_script(inp, source=filename, no_reset=True)
except Exception as e:
log.error('>> Cannot run cleanup file %s: %s', filename, e)
browser.reset()
browser.first_error = error
browser.result = result
namespaces.pop_local_dict()
def log_commands(flag):
"""Turn printing of commands as they are executed on or off."""
global _log_commands
old_flag = _log_commands is log.info
_log_commands = log.info if flag else log.debug
return old_flag
_re_variable = re.compile(r"\${(.*?)}")
def variable_substitution(raw_str, globals_dict, locals_dict):
s = []
pos = 0
for m in _re_variable.finditer(raw_str):
s.append(raw_str[pos:m.start()])
try:
s.append(str(eval(m.group(1), globals_dict, locals_dict)))
except NameError:
s.append(m.group())
pos = m.end()
s.append(raw_str[pos:])
return ''.join(s)
"""
A command-line interpreter for twill.
This is an implementation of a command-line interpreter based on the
'Cmd' class in the 'cmd' package of the default Python distribution.
"""
import os
import sys
import traceback
from cmd import Cmd
from io import TextIOWrapper
from optparse import OptionParser
from typing import Any, Callable, List, Optional
try:
from readline import read_history_file, write_history_file # type: ignore
except ImportError:
read_history_file = write_history_file = None # type: ignore
# noinspection PyCompatibility
from . import (
commands, execute_file,
log, log_levels, set_log_level, set_output,
namespaces, parse, shutdown, __url__, __version__)
from .browser import browser
from .utils import gather_filenames, Singleton
__all__ = ['main']
python_version = sys.version.split(None, 1)[0]
version_info = f"""
twill version: {__version__}
Python Version: {python_version}
See {__url__} for more info.
"""
def make_cmd_fn(cmd: str) -> Callable[[str], None]:
"""Make a command function.
Dynamically define a twill shell command function based on an imported
function name. (This is where the twill.commands functions actually
get executed.)
"""
def do_cmd(rest_of_line: str, cmd: str = cmd) -> None:
global_dict, local_dict = namespaces.get_twill_glocals()
args = []
if rest_of_line.strip():
try:
args = parse.arguments.parseString(rest_of_line)[0]
args = parse.process_args(args, global_dict, local_dict)
except Exception as e:
log.error('\nINPUT ERROR: %s\n', e)
return
try:
parse.execute_command(
cmd, args, global_dict, local_dict, '<shell>')
except SystemExit:
raise
except Exception as e:
log.error('\nERROR: %s\n', e)
log.debug(traceback.format_exc())
return do_cmd
def make_help_cmd(cmd: str, docstring: str) -> Callable[[str], None]:
"""Make a help command function.
Dynamically define a twill shell help function for the given
command/docstring.
"""
def help_cmd(message: str = docstring, cmd: str = cmd) -> None:
message = message.strip()
width = 7 + len(cmd)
for line in message.splitlines():
w = len(line.rstrip())
if w > width:
width = w
info = log.info
info('\n%s' % ('=' * width,))
info('\nHelp for command %s:\n', cmd)
info(message)
info('\n%s\n' % ('=' * width,))
return help_cmd
def add_command(cmd: str, docstring: str) -> None:
"""Add a command with given docstring to the shell."""
shell = get_command_shell()
if shell:
shell.add_command(cmd, docstring)
class TwillCommandLoop(Singleton, Cmd):
"""The command-line interpreter for twill commands.
This is a Singleton object: you can't create more than one
of shell at a time.
Note: most of the do_ and help_ functions are dynamically created
by the metaclass.
"""
def __init__(
self, stdin: Optional[TextIOWrapper] = None,
initial_url: Optional[str] = None,
fail_on_unknown: bool = False) -> None:
Cmd.__init__(self, stdin=stdin)
self.use_rawinput = stdin is None
# initialize a new local namespace.
namespaces.new_local_dict()
# import readline history, if available/possible.
if read_history_file:
try:
read_history_file('.twill-history')
except IOError:
pass
# fail on unknown commands? for test-shell, primarily.
self.fail_on_unknown = fail_on_unknown
# handle initial URL argument
if initial_url:
commands.go(initial_url)
self._set_prompt()
self.names: List[str] = []
global_dict, local_dict = namespaces.get_twill_glocals()
# add all of the commands from twill
for command in parse.command_list:
fn = global_dict.get(command)
self.add_command(command, fn.__doc__)
def add_command(self, command: str, docstring: str) -> None:
"""Add the given command into the lexicon of all commands."""
do_name = f'do_{command}'
do_cmd = make_cmd_fn(command)
setattr(self, do_name, do_cmd)
if docstring:
help_cmd = make_help_cmd(command, docstring)
help_name = f'help_{command}'
setattr(self, help_name, help_cmd)
self.names.append(do_name)
def get_names(self) -> List[str]:
"""Return the list of commands."""
return self.names
def complete_form_value(
self, text: str, line: str, _begin: int, _end: int) -> List[str]:
"""Command arg completion for the form_value command.
The twill command has the following syntax:
form_value <form_name> <field_name> <value>
"""
cmd, args = parse.parse_command(line + '.', {}, {})
place = len(args)
if place == 1:
return self.provide_form_name(text)
if place == 2:
form_name = args[0]
return self.provide_field_name(form_name, text)
return []
complete_fv = complete_form_value # alias
@staticmethod
def provide_form_name(prefix: str) -> List[str]:
"""Provide the list of form names on the given page."""
names = []
forms = browser.forms
for form in forms:
form_id = form.attrib.get('id')
if form_id and form_id.startswith(prefix):
names.append(form_id)
continue
name = form.attrib.get('name')
if name and name.startswith(prefix):
names.append(name)
return names
@staticmethod
def provide_field_name(form_name: str, prefix: str) -> List[str]:
"""Provide the list of fields for the given form_name or number."""
names = []
form = browser.form(form_name)
if form is not None:
for field in form.inputs:
field_id = field.attrib.get('id')
if field_id and field_id.startswith(prefix):
names.append(field_id)
continue
name = field.name
if name and name.startswith(prefix):
names.append(name)
return names
def _set_prompt(self) -> None:
""""Set the prompt to the current page."""
url = browser.url
if url is None:
url = " *empty page* "
self.prompt = f"current page: {url}\n>> "
def precmd(self, line: str) -> str:
"""Run before each command; save."""
return line
def postcmd(self, stop: bool, line: str) -> bool:
""""Run after each command; set prompt."""
self._set_prompt()
return stop
def default(self, line: str) -> None:
""""Called when an unknown command is executed."""
# empty lines ==> emptyline(); here we just want to remove
# leading whitespace.
line = line.strip()
# look for command
global_dict, local_dict = namespaces.get_twill_glocals()
cmd, args = parse.parse_command(line, global_dict, local_dict)
# ignore comments & empty stuff
if cmd is None:
return
try:
parse.execute_command(
cmd, args, global_dict, local_dict, '<shell>')
except SystemExit:
raise
except Exception as e:
log.error('\nERROR: %s\n', e)
if self.fail_on_unknown:
raise
def emptyline(self) -> Any:
"""Handle empty lines (by ignoring them)."""
pass
@staticmethod
def do_EOF(*_args: str) -> None:
"""Exit on CTRL-D"""
if write_history_file:
write_history_file('.twill-history')
raise SystemExit()
@staticmethod
def help_help() -> None:
"""Show help for the help command."""
log.info("\nWhat do YOU think the command 'help' does?!?\n")
@staticmethod
def do_version(*_args: str) -> None:
"""Show the version number of twill."""
log.info(version_info)
@staticmethod
def help_version() -> None:
"""Show help for the version command."""
log.info("\nPrint version information.\n")
def do_exit(self, *_args: str) -> None:
"""Exit the twill shell."""
raise SystemExit()
@staticmethod
def help_exit() -> None:
"""Show help for the exit command."""
log.info("\nExit twill.\n")
do_quit = do_exit
help_quit = help_exit
def get_command_shell() -> Optional[TwillCommandLoop]:
"""Get the command shell."""
return getattr(TwillCommandLoop, '__it__', None)
twill_args: List[str] = [] # contains sys.argv *after* last '--'
interactive = False # 'True' if interacting with user
def main():
global twill_args, interactive
# show the shorthand name for usage
if sys.argv[0].endswith('-script.py'):
sys.argv[0] = sys.argv[0].rsplit('-', 1)[0]
# make sure that the current working directory is in the path
if '' not in sys.path:
sys.path.append('')
parser = OptionParser()
add = parser.add_option
add('-d', '--dump-html', action='store', dest='dumpfile',
help="dump HTML to this file on error")
add('-f', '--fail', action='store_true', dest='fail',
help='fail exit on first file to fail')
add('-i', '--interactive', action='store_true', dest='interactive',
help='drop into an interactive shell (after running files)')
add('-l', '--loglevel', nargs=1, action='store', dest='loglevel',
help='set the logging level')
add('-n', '--never-fail', action='store_true', dest='never_fail',
help='continue executing scripts past errors')
add('-o', '--output', nargs=1, action='store', dest='outfile',
help="print log to output file")
add('-q', '--quiet', action='store_true', dest='quiet',
help='do not show normal output')
add('-u', '--url', nargs=1, action='store', dest='url',
help='start at the given URL before each script')
add('-v', '--version', action='store_true', dest='show_version',
help='show version information and exit')
add('-w', '--show-error-in-browser', action='store_true',
dest='show_browser', help="show dumped HTML in a web browser ")
# parse arguments
sys_args = sys.argv[1:]
if '--' in sys_args:
for last in range(len(sys_args) - 1, -1, -1):
if sys_args[last] == '--':
twill_args = sys_args[last + 1:]
sys_args = sys_args[:last]
break
options, args = parser.parse_args(sys_args)
if options.show_version:
log.info(version_info)
sys.exit(0)
quiet = options.quiet
show_browser = options.show_browser
dump_file = options.dumpfile
out_file = options.outfile
log_level = options.loglevel
interactive = options.interactive or not args
if out_file:
out_file = out_file.lstrip('=').lstrip() or None
if out_file == '-':
out_file = None
if interactive and (quiet or out_file or dump_file or show_browser):
sys.exit("Interactive mode is incompatible with -q, -o, -d and -w")
if options.show_browser and (not dump_file or dump_file == '-'):
sys.exit("Please also specify a dump file with -d")
if out_file:
try:
out_file = open(out_file, 'w')
except IOError as e:
sys.exit(f"Invalid output file '{out_file}': {e}")
if log_level:
log_level = log_level.lstrip('=').lstrip() or None
if log_level.upper() not in log_levels:
log_level_names = ', '.join(sorted(log_levels))
sys.exit(f"Valid log levels are: {log_level_names}")
set_log_level(log_level)
if quiet:
out_file = open(os.devnull, 'w')
set_output(out_file)
# first find and run any scripts put on the command line
failed = False
if args:
success = []
failure = []
filenames = gather_filenames(args)
dump = None
for filename in filenames:
try:
interactive = False
execute_file(filename, initial_url=options.url,
never_fail=options.never_fail)
success.append(filename)
except Exception as e:
if dump_file:
dump = browser.dump
if options.fail:
raise
else:
if browser.first_error:
log.error('\nFirst error: %s', browser.first_error)
log.error('\n*** ERROR: %s', e)
log.debug(traceback.format_exc())
failure.append(filename)
log.info('--')
if dump:
if dump_file == '-':
log.info('HTML when error was encountered:\n\n%s\n--',
dump.strip())
else:
try:
with open(dump_file, 'wb') as f:
f.write(dump)
except IOError as e:
log.error('Could not dump to %s: %s\n', dump_file, e)
else:
log.info('HTML has been dumped to %s\n', dump_file)
log.info('%d of %d files SUCCEEDED.',
len(success), len(success) + len(failure))
if len(failure):
log.error('Failed:\n\t%s', '\n\t'.join(failure))
failed = True
if dump and show_browser:
import webbrowser
url = 'file:///' + os.path.abspath(dump_file).replace(os.sep, '/')
log.debug('Running web browser on %s', url)
webbrowser.open(url)
# if no scripts to run or -i is set, drop into an interactive shell
if interactive:
welcome_msg = "" if args else "\n -= Welcome to twill =-\n"
shell = TwillCommandLoop(initial_url=options.url)
while True:
try:
shell.cmdloop(welcome_msg)
except KeyboardInterrupt:
print()
break
except SystemExit:
raise
welcome_msg = ""
shutdown()
if failed:
sys.exit(1)
sys.exit(0)
if __name__ == '__main__':
main()
"""Support functionality for using twill in unit tests."""
import sys
import time
from io import StringIO
from multiprocessing import Process
from .parse import execute_file
HOST = '127.0.0.1' # interface to run the server on
PORT = 8080 # default port to run the server on
SLEEP = 0 # time to wait for the server to start
class TestInfo:
"""Test info container.
Object containing info for a test: script to run, server function to
run, and port to run it on. Note that information about server port
*must* be decided by the end of the __init__ function.
The optional sleep argument specifies how many seconds to wait for the
server to set itself up. Default is 0.
"""
def __init__(self, script, server_fn, port=PORT, sleep=SLEEP):
self.script = script
self.server_fn = server_fn
self.port = port
self.stdout = None
self.stderr = None
self.sleep = sleep
def start_server(self):
# save old stdout/stderr
stdout, stderr = sys.stdout, sys.stderr
# create new stdout/stderr
self.stdout = sys.stdout = StringIO()
self.stderr = sys.stderr = StringIO()
try:
self.server_fn()
finally:
# restore stdout/stderr
sys.stdout, sys.stderr = stdout, stderr
def run_script(self):
"""Run the given twill script on the given server."""
time.sleep(self.sleep)
url = self.url
execute_file(self.script, initial_url=url)
@property
def url(self):
""""Get the test server URL."""
# noinspection HttpUrlsUsage
return f"http://{HOST}:{self.port}/"
def run_test(test_info):
"""Run test on a web site where the site is running in a sub process."""
# run server
server_process = Process(target=test_info.start_server)
server_process.start()
# wait for server process to spin up
timeout = max(1, test_info.sleep)
wait = min(0.125, 0.125 * timeout)
waited = 0
while not server_process.is_alive() and waited < timeout:
time.sleep(wait)
waited += wait
# run twill test script
try:
test_info.run_script()
finally:
server_process.terminate()
"""
Various ugly utility functions for twill.
Apart from various simple utility functions, twill's robust parsing
code is implemented in the ConfigurableParsingFactory class.
"""
import os
import re
from typing import Any, List, NamedTuple, Optional, Union, Sequence, Tuple
from requests import Response
from requests.structures import CaseInsensitiveDict
from lxml.html import (
fromstring as html_to_tree, tostring as tree_to_html,
CheckboxGroup, FormElement, HtmlElement, InputElement,
MultipleSelectOptions, RadioGroup, SelectElement, TextareaElement)
try:
import tidylib # type: ignore
except (ImportError, OSError):
# ImportError can be raised when PyTidyLib package is not installed
# OSError can be raised when the HTML Tidy shared library is not installed
tidylib = None
from . import log, twill_ext
from .errors import TwillException
__all__ = [
'gather_filenames', 'get_equiv_refresh_interval', 'html_to_tree',
'is_hidden_filename', 'is_twill_filename', 'print_form',
'make_boolean', 'make_int', 'make_twill_filename',
'run_tidy', 'tree_to_html', 'trunc', 'unique_match',
'CheckboxGroup', 'FieldElement', 'FormElement',
'HtmlElement', 'InputElement', 'Link', 'RadioGroup',
'ResultWrapper', 'SelectElement', 'Singleton', 'TextareaElement',
'UrlWithRealm', 'Response']
FieldElement = Union[
CheckboxGroup, InputElement, RadioGroup, SelectElement, TextareaElement]
class Link(NamedTuple):
text: str
url: str
# Depending on the configuration, realms can be ignored
UrlWithRealm = Union[str, Tuple[str, str]]
class Singleton:
"""A mixin class to create singleton objects."""
def __new__(cls, *args, **kwargs):
it = cls.__dict__.get('__it__')
if it is not None:
return it
cls.__it__ = it = object.__new__(cls)
return it
@classmethod
def reset(cls):
cls.__it__ = None
class ResultWrapper:
"""Deal with request results, and present them in a unified form.
These objects are returned by browser._journey()-wrapped functions.
"""
def __init__(self, response: Response) -> None:
self.response = response
self.encoding = response.encoding
try:
self.tree = html_to_tree(self.text)
except ValueError:
# may happen when there is an XML encoding declaration
self.tree = html_to_tree(self.content)
self.xpath = self.tree.xpath
self._fix_forms()
@property
def url(self) -> str:
""""Get the url of the result page."""
return self.response.url
@property
def http_code(self) -> int:
"""Get the http status code of the result page."""
return self.response.status_code
@property
def text(self) -> str:
"""Get the text of the result page."""
return self.response.text
@property
def content(self) -> bytes:
"""Get the binary content of the result page."""
return self.response.content
@property
def headers(self) -> CaseInsensitiveDict:
"""Get the headers of the result page."""
return self.response.headers
@property
def title(self) -> Optional[str]:
"""Get the title of the result page."""
try:
return self.xpath('//title[1]/text()')[0]
except IndexError:
return None
@property
def links(self) -> List[Link]:
"""Get all links in the result page."""
return [Link(a.text_content(), a.get('href'))
for a in self.xpath('//a[@href]')]
def find_link(self, pattern: str) -> Optional[Link]:
"""Find a link with a given pattern on the result page."""
regex = re.compile(pattern)
for link in self.links:
if regex.search(link.text) or regex.search(link.url):
return link
return None
def form(self, name_or_num: Union[str, int] = 1) -> Optional[FormElement]:
"""Get the form with the given name or number on the result page.
Returns None if no such form can be found on the result page.
"""
forms = self.forms
if isinstance(name_or_num, str):
# first, try ID
for form in forms:
form_id = form.get('id')
if form_id and form_id == name_or_num:
return form
# next, try regex with name
regex = re.compile(name_or_num)
for form in forms:
name = form.get('name')
if name and regex.search(name):
return form
# last, try number
try:
num = int(name_or_num) - 1
if not 0 <= num < len(forms):
raise IndexError
except (ValueError, IndexError):
return None
else:
return forms[num]
def _fix_forms(self) -> None:
"""Fix forms on the page for use with twill."""
# put all stray fields into a form
orphans = self.xpath('//input[not(ancestor::form)]')
if orphans:
form_parts = [b'<form>'] + [
tree_to_html(orphan) for orphan in orphans] + [b'</form>']
self.forms = html_to_tree(b''.join(form_parts)).forms
self.forms.extend(self.tree.forms)
else:
self.forms = self.tree.forms
# convert all submit button elements to input elements, since
# otherwise lxml will not recognize them as form input fields
for form in self.forms:
for button in form.xpath("//button[@type='submit']"):
button.tag = 'input'
def trunc(s: Optional[str], length: int) -> str:
"""Truncate a string to a given length.
The string is truncated by cutting off the last (length-4) characters
and replacing them with ' ...'
"""
if s and len(s) > length:
return s[:length - 4] + ' ...'
return s or ''
def print_form(form: FormElement, n: int) -> None:
"""Pretty-print the given form, with the assigned number."""
info = log.info
name = form.get('name')
info('\nForm name=%s (#%d)', name, n) if name else info('\nForm #%d', n)
if form.inputs is not None:
info('## __Name__________________'
' __Type___ __ID________ __Value__________________')
for n, field in enumerate(form.inputs, 1):
value = field.value
value_options = getattr(field, 'value_options', None)
if value_options:
items = ', '.join(
f"'{getattr(opt, 'name', opt)}'"
for opt in value_options)
value_displayed = f'{value} of {items}'
else:
value_displayed = f'{value}'
field_name = field.name
field_type = getattr(field, 'type', 'select')
field_id = field.get('id')
strings = (
f'{n:2}',
f'{trunc(field_name, 24):24}',
f'{trunc(field_type, 9):9}',
f'{trunc(field_id, 12):12}',
trunc(value_displayed, 40))
info(' '.join(strings))
info('')
def make_boolean(value: Any) -> bool:
"""Convert the input value into a boolean."""
value = str(value).lower().strip()
# true/false
if value in ('true', 'false'):
return value == 'true'
# 0/nonzero
try:
ival = int(value)
except ValueError:
pass
else:
return bool(ival)
# +/-
if value in ('+', '-'):
return value == '+'
# on/off
if value in ('on', 'off'):
return value == 'on'
raise TwillException(f"unable to convert '{value}' into true/false")
def make_int(value: Any) -> int:
"""Convert the input value into an int."""
try:
ival = int(value)
except Exception:
pass
else:
return ival
raise TwillException(f"unable to convert '{value}' into an int")
def set_form_control_value(control: FieldElement, value: str) -> None:
"""Set the given control to the given value
The controls can be checkboxes, select elements etc.
"""
if isinstance(control, InputElement):
if control.checkable:
try:
boolean_value = make_boolean(value)
except TwillException:
# if there's more than one checkbox,
# it should be a CheckboxGroup, see below.
pass
else:
control.checked = boolean_value
elif control.type not in ('submit', 'image'):
control.value = value
elif isinstance(control, (TextareaElement, RadioGroup)):
control.value = value
elif isinstance(control, CheckboxGroup):
if value.startswith('-'):
value = value[1:]
try:
control.value.remove(value)
except KeyError:
pass
else:
if value.startswith('+'):
value = value[1:]
control.value.add(value)
elif isinstance(control, SelectElement):
# for ListControls we need to find the right *value*,
# and figure out if we want to *select* or *deselect*
if value.startswith('-'):
add = False
value = value[1:]
else:
add = True
if value.startswith('+'):
value = value[1:]
# now, select the value.
option_values = [val.strip() for val in control.value_options]
options = control.getchildren() # type: ignore
option_names = [(c.text or '').strip() for c in options]
for name, opt in zip(option_names, option_values):
if value not in (name, opt):
continue
if isinstance(control.value, MultipleSelectOptions):
if add:
control.value.add(opt)
elif opt in control.value:
control.value.remove(opt)
else:
control.value = opt if add else ""
break
else:
raise TwillException('Attempt to set an invalid value')
else:
raise TwillException('Attempt to set value on invalid control')
def _all_the_same_submit(matches: Sequence[FieldElement]) -> bool:
"""Check if a list of controls all belong to the same control.
For use with checkboxes, hidden, and submit buttons.
"""
name = value = None
for match in matches:
if not isinstance(match, InputElement):
return False
if match.type not in ('submit', 'hidden'):
return False
if name is None:
name = match.name
value = match.value
elif match.name != name or match.value != value:
return False
return True
def _all_the_same_checkbox(matches: Sequence[FieldElement]) -> bool:
"""Check if a list of controls all belong to the same checkbox.
Hidden controls can combine with checkboxes, to allow form
processors to ensure a False value is returned even if user
does not check the checkbox. Without the hidden control, no
value would be returned.
"""
name = None
for match in matches:
if not isinstance(match, InputElement):
return False
if match.type not in ('checkbox', 'hidden'):
return False
if name is None:
name = match.name
else:
if match.name != name:
return False
return True
def unique_match(matches: Sequence[FieldElement]) -> bool:
"""Check whether a match is unique"""
return (len(matches) == 1 or
_all_the_same_checkbox(matches) or _all_the_same_submit(matches))
def run_tidy(html: str) -> Tuple[Optional[str], Optional[str]]:
"""Run HTML Tidy on the given HTML string.
Return a 2-tuple (output, errors). (None, None) will be returned if
PyTidyLib (or the required shared library for tidy) isn't installed.
"""
from .commands import options
require_tidy = options.get('require_tidy')
if not tidylib:
if require_tidy:
raise TwillException(
'Option require_tidy is set, but PyTidyLib is not installed')
return None, None
opts = {key[5:].replace('_', '-'): value
for key, value in options.items() if key.startswith('tidy_')}
clean_html, errors = tidylib.tidy_document(html, opts)
return clean_html, errors
def get_equiv_refresh_interval() -> Optional[int]:
"""Get the smallest interval for which the browser should follow redirects.
Redirection happens if the given interval is smaller than this.
"""
from .commands import options
return options.get('equiv_refresh_interval')
def is_hidden_filename(filename: str) -> bool:
"""Check if this is a hidden file (starting with a dot)."""
return filename not in (
'.', '..') and os.path.basename(filename).startswith('.')
def is_twill_filename(filename: str) -> bool:
"""Check if the given filename has the twill file extension."""
return filename.endswith(twill_ext) and not is_hidden_filename(filename)
def make_twill_filename(name: str) -> str:
"""Add the twill extension to the name of a script if necessary."""
if name not in ('.', '..'):
twill_name, ext = os.path.splitext(name)
if not ext:
twill_name += twill_ext
if os.path.exists(twill_name):
name = twill_name
return name
def gather_filenames(args: Sequence[str]) -> List[str]:
"""Collect script files from within directories."""
names: List[str] = []
for arg in args:
name = make_twill_filename(arg)
if os.path.isdir(name):
for dir_path, dir_names, filenames in os.walk(arg):
dir_names[:] = [
d for d in dir_names if not is_hidden_filename(d)]
for filename in filenames:
if not is_twill_filename(filename):
continue
filename = os.path.join(dir_path, filename)
names.append(filename)
else:
names.append(name)
return names

Sorry, the diff of this file is not supported yet