
Security News
AGENTS.md Gains Traction as an Open Format for AI Coding Agents
AGENTS.md is a fast-growing open format giving AI coding agents a shared, predictable way to understand project setup, style, and workflows.
Lightweight OCR MCP Server for AI Models - Privacy-focused image recognition with MCP protocol support
一个基于Python的轻量级OCR MCP服务器,专门为AI模型提供验证码识别和图像预处理功能。系统采用ddddocr引擎,符合MCP 2024-11-05协议标准,完全免费使用。
# 安装OCR MCP包
pip install ocr-mcp
# 验证安装
ocr-mcp-server --help
# 全局安装 (推荐使用最新版本)
npm install -g ocr-mcp@1.0.3
# 或使用npx直接运行
npx ocr-mcp
# 克隆项目
git clone <repository-url>
cd free_ocr
# 安装依赖
pip install -r requirements.txt
# 验证安装
python verify_mcp_installation.py
# 使用命令行工具启动
ocr-mcp-server
# 或使用Python模块启动
python -m ocr_mcp
# 使用npx启动
npx --package=ocr-mcp python -m ocr_mcp
# 或者直接使用Python模块(推荐)
python -m ocr_mcp
# 启动MCP服务器
python -m ocr_mcp.server
服务器启动成功后会显示:
INFO - ddddocr引擎初始化成功
INFO - 已注册 2 个工具: ['captcha_recognize', 'image_preprocess']
INFO - MCP服务器已启动,等待客户端连接...
重要: 由于ddddocr与新版Pillow的兼容性问题,本项目已配置使用兼容的依赖版本。
PIL.Image.ANTIALIAS
错误,请运行:pip install "Pillow>=10.1.0,<11.0.0"
pip uninstall ddddocr -y
pip install ddddocr
import asyncio
import base64
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
async def main():
# 连接到OCR MCP服务器 (根据安装方式选择命令)
server_params = StdioServerParameters(
# PyPI安装后使用
command="ocr-mcp-server",
args=[]
# 或者使用Python模块方式
# command="python",
# args=["-m", "ocr_mcp"]
)
async with stdio_client(server_params) as (read, write):
async with ClientSession(read, write) as session:
# 初始化连接
await session.initialize()
# 获取可用工具列表
tools = await session.list_tools()
print(f"可用工具: {[tool.name for tool in tools.tools]}")
# 读取验证码图片
with open("captcha.png", "rb") as f:
image_data = base64.b64encode(f.read()).decode()
# 调用验证码识别工具
result = await session.call_tool(
"captcha_recognize",
{
"image_data": image_data,
"options": {
"preprocess": True,
"confidence_threshold": 0.8
}
}
)
print(f"识别结果: {result.content[0].text}")
if __name__ == "__main__":
asyncio.run(main())
# 直接识别验证码 (独立模式)
python -c "
import base64
from ocr_mcp.tools.captcha_tool import CaptchaTool
# 读取图片
with open('captcha.png', 'rb') as f:
image_data = base64.b64encode(f.read()).decode()
# 识别验证码
tool = CaptchaTool()
result = tool.execute(image_data=image_data)
print(result)
"
识别各种类型的验证码图片,基于ddddocr引擎提供高精度识别。
参数:
image_data
(string): Base64编码的图像数据options
(object, 可选):
preprocess
(boolean): 是否进行图像预处理,默认trueconfidence_threshold
(float): 置信度阈值(0-1),默认0.8返回值:
{
"type": "text",
"text": "识别结果: ABCD\n置信度: 0.95\n处理时间: 0.12秒"
}
对图像进行预处理优化,包括去噪、对比度增强、尺寸调整等操作。
参数:
image_data
(string): Base64编码的图像数据operations
(array): 预处理操作列表,可选值: ["denoise", "enhance", "resize"]options
(object, 可选):
contrast
(number): 对比度增强因子,默认1.5sharpness
(number): 锐化增强因子,默认1.2max_width
(integer): 最大宽度,默认800max_height
(integer): 最大高度,默认600return_processed_image
(boolean): 是否返回处理后的图像数据,默认false返回值:
{
"type": "text",
"text": "预处理完成\n应用操作: 去噪, 增强\n处理时间: 0.08秒"
}
适用于AI模型集成,如Claude Desktop、ChatGPT等。
# 启动MCP服务器
python -m ocr_mcp.server
适用于直接命令行使用,无需MCP框架。
# 使用安装的命令行工具
ocr-mcp-server --standalone captcha.png
# 或使用Python模块
python -m ocr_mcp --standalone captcha.png
# 单文件识别
python standalone_ocr.py captcha.png
# 批量识别
python standalone_ocr.py *.png --batch
# 跳过预处理(更快但可能精度较低)
python standalone_ocr.py image.jpg --no-preprocess
# 查看帮助
python standalone_ocr.py --help
独立模式特性:
在Claude Desktop配置文件中添加OCR服务器。详细配置选项请参考服务器配置部分。
{
"mcpServers": {
"ocr-server": {
"command": "ocr-mcp-server",
"args": []
}
}
}
{
"mcpServers": {
"ocr-server": {
"command": "npx",
"args": ["--package=ocr-mcp@1.0.3", "python", "-m", "ocr_mcp"]
}
}
}
{
"mcpServers": {
"ocr-server": {
"command": "python",
"args": ["-m", "ocr_mcp"]
}
}
}
class OCRAssistant:
def __init__(self):
self.mcp_session = None
async def connect_ocr_server(self):
"""连接到OCR MCP服务器"""
server_params = StdioServerParameters(
command="python",
args=["-m", "ocr_mcp"]
)
self.read, self.write = await stdio_client(server_params).__aenter__()
self.mcp_session = await ClientSession(self.read, self.write).__aenter__()
await self.mcp_session.initialize()
async def recognize_captcha(self, image_path: str) -> str:
"""识别验证码"""
with open(image_path, "rb") as f:
image_data = base64.b64encode(f.read()).decode()
result = await self.mcp_session.call_tool(
"captcha_recognize",
{"image_data": image_data}
)
return result.content[0].text
使用npx可以快速启动OCR MCP服务器:
# 使用npx启动OCR服务器(自动安装并运行)
npx --package=ocr-mcp python -m ocr_mcp
# 带环境变量启动
OCR_LOG_LEVEL=DEBUG npx --package=ocr-mcp python -m ocr_mcp
# 或者直接使用Python模块
python -m ocr_mcp
Claude Desktop配置(NPX方式):
{
"mcpServers": {
"ocr-server": {
"command": "npx",
"args": ["--package=ocr-mcp", "python", "-m", "ocr_mcp"],
"env": {
"OCR_LOG_LEVEL": "INFO"
}
}
}
}
使用uvx(uv的执行器)可以在隔离环境中运行OCR服务器:
# 使用uvx启动OCR服务器
uvx ocr-mcp
# 指定Python版本
uvx --python 3.11 ocr-mcp
# 带环境变量启动
OCR_LOG_LEVEL=DEBUG uvx ocr-mcp
# 使用uv运行(替代方案)
uv run --with ocr-mcp ocr-mcp-server
Claude Desktop配置(UVX方式):
{
"mcpServers": {
"ocr-server": {
"command": "uvx",
"args": ["ocr-mcp"],
"env": {
"OCR_LOG_LEVEL": "INFO",
"UV_PYTHON": "3.11"
}
}
}
}
UVX优势:
# 运行所有测试
pytest
# 运行测试并显示覆盖率
pytest --cov=ocr_mcp
# 运行特定测试文件
pytest tests/test_captcha_tool.py
# 运行测试并生成详细报告
pytest -v --cov=ocr_mcp --cov-report=html
ocr-mcp
pip install ocr-mcp
ocr-mcp-server
ocr-mcp
npm install -g ocr-mcp
ocr-mcp
或 npx ocr-mcp
free_ocr/
├── ocr_mcp/ # Python主包
│ ├── __init__.py
│ ├── __main__.py # 入口模块
│ ├── server.py # MCP服务器核心
│ ├── tools/ # 工具模块
│ │ ├── __init__.py
│ │ ├── base_tool.py # 工具基类
│ │ ├── captcha_tool.py # 验证码识别工具
│ │ └── preprocess_tool.py # 图像预处理工具
│ └── utils/ # 工具函数
│ ├── __init__.py
│ ├── logger.py # 日志工具
│ └── image_utils.py # 图像处理工具
├── src/ # TypeScript源码
│ └── index.ts # NPM包入口
├── bin/ # 命令行脚本
│ └── ocr-mcp.js # NPM命令行工具
├── dist/ # 构建产物
│ ├── *.whl # Python wheel包
│ ├── *.tar.gz # Python源码包
│ ├── index.js # 编译后的JS文件
│ └── index.d.ts # TypeScript声明文件
├── tests/ # 测试文件
│ ├── __init__.py
│ ├── conftest.py # 测试配置
│ ├── test_image_utils.py # 图像工具测试
│ ├── test_captcha_tool.py # 验证码工具测试
│ └── test_preprocess_tool.py # 预处理工具测试
├── examples/ # 使用示例
├── requirements.txt # Python依赖列表
├── pyproject.toml # Python项目配置
├── package.json # NPM项目配置
├── tsconfig.json # TypeScript配置
├── MANIFEST.in # Python包文件清单
└── README.md # 项目文档
# 基本启动
python -m ocr_mcp
# 指定端口和主机
python -m ocr_mcp --host 0.0.0.0 --port 8080
# 启用调试模式
python -m ocr_mcp --debug
# 设置最大并发连接数
python -m ocr_mcp --max-connections 10
在Claude Desktop的配置文件中添加OCR服务器:
PyPI安装后的配置 (推荐):
{
"mcpServers": {
"ocr-server": {
"command": "ocr-mcp-server",
"args": [],
"env": {
"OCR_LOG_LEVEL": "INFO",
"OCR_MAX_IMAGE_SIZE": "5"
}
}
}
}
NPM安装后的配置:
{
"mcpServers": {
"ocr-mcp": {
"command": "npx",
"args": ["--package=ocr-mcp", "python", "-m", "ocr_mcp"]
}
}
}
源码安装后的配置:
{
"mcpServers": {
"ocr-server": {
"command": "python",
"args": ["-m", "ocr_mcp"],
"env": {
"OCR_LOG_LEVEL": "INFO",
"OCR_MAX_IMAGE_SIZE": "5"
}
}
}
}
from mcp import StdioServerParameters
# PyPI安装后的配置 (推荐)
server_params = StdioServerParameters(
command="ocr-mcp-server",
args=[],
env={
"OCR_LOG_LEVEL": "DEBUG",
"OCR_MAX_IMAGE_SIZE": "10"
}
)
# 或者使用Python模块方式
# server_params = StdioServerParameters(
# command="python",
# args=["-m", "ocr_mcp"],
# env={
# "OCR_LOG_LEVEL": "DEBUG",
# "OCR_MAX_IMAGE_SIZE": "10"
# }
# )
# 日志级别
export OCR_LOG_LEVEL=INFO
# 最大图像大小(MB)
export OCR_MAX_IMAGE_SIZE=5
# 代码格式化
black ocr_mcp/
# 代码检查
flake8 ocr_mcp/
# 类型检查
mypy ocr_mcp/
BaseTool
类from ocr_mcp.tools.base_tool import BaseTool
class CustomTool(BaseTool):
@property
def name(self) -> str:
return "custom_tool"
@property
def description(self) -> str:
return "自定义工具描述"
@property
def input_schema(self) -> Dict[str, Any]:
return {
"type": "object",
"properties": {
"param": {"type": "string"}
},
"required": ["param"]
}
async def execute(self, **kwargs) -> Any:
# 实现工具逻辑
return {"result": "success"}
Q: 连接MCP服务器失败 A: 检查Python环境和依赖安装,确保服务器进程正常启动
Q: 识别准确率低 A: 尝试启用图像预处理,或检查图像质量
Q: 处理速度慢 A: 减少图像尺寸,确保图像清晰度
错误码 | 错误类型 | 描述 |
---|---|---|
-32600 | Invalid Request | 无效的JSON-RPC请求 |
-32601 | Method not found | 工具不存在 |
-32602 | Invalid params | 参数格式错误 |
-32603 | Internal error | 服务器内部错误 |
-1001 | Image decode error | 图像解码失败 |
-1002 | OCR processing error | OCR处理失败 |
本项目采用MIT许可证。详见LICENSE文件。
欢迎提交Issue和Pull Request!
git checkout -b feature/AmazingFeature
)git commit -m 'Add some AmazingFeature'
)git push origin feature/AmazingFeature
)如果您在使用过程中遇到问题,请:
# 安装构建工具
pip install build twine
# 构建包
python -m build
# 检查包
twine check dist/*
# 安装依赖
npm install
# 编译TypeScript
npm run build
# 打包
npm pack
# 发布到测试环境
twine upload --repository testpypi dist/*
# 发布到正式环境
twine upload dist/*
# 登录NPM
npm login
# 发布包
npm publish
# 或发布到测试环境
npm publish --tag beta
# 更新版本号
npm version patch # 补丁版本
npm version minor # 次要版本
npm version major # 主要版本
# 同步更新pyproject.toml中的版本号
# 然后重新构建和发布
OCR MCP系统 - 为AI模型提供强大的验证码识别能力 🚀
FAQs
Lightweight OCR MCP Server for AI Models - Privacy-focused image recognition with MCP protocol support
The npm package ocr-mcp receives a total of 0 weekly downloads. As such, ocr-mcp popularity was classified as not popular.
We found that ocr-mcp demonstrated a healthy version release cadence and project activity because the last version was released less than a year ago. It has 1 open source maintainer collaborating on the project.
Did you know?
Socket for GitHub automatically highlights issues in each pull request and monitors the health of all your open source dependencies. Discover the contents of your packages and block harmful activity before you install or update your dependencies.
Security News
AGENTS.md is a fast-growing open format giving AI coding agents a shared, predictable way to understand project setup, style, and workflows.
Security News
/Research
Malicious npm package impersonates Nodemailer and drains wallets by hijacking crypto transactions across multiple blockchains.
Security News
This episode explores the hard problem of reachability analysis, from static analysis limits to handling dynamic languages and massive dependency trees.