[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"extension-skill-aidenwu0209-paddleocr-doc-parsing-it":3,"guides-for-aidenwu0209-paddleocr-doc-parsing":223,"similar-k170fjdnm4zmjtz1rgs8zwq4418663pv":224},{"_creationTime":4,"_id":5,"children":6,"community":7,"display":9,"evaluation":21,"identity":190,"isFallback":195,"parentExtension":196,"providers":197,"relations":201,"repo":203,"workflow":220},1778053339109.673,"k170fjdnm4zmjtz1rgs8zwq4418663pv",[],{"reviewCount":8},0,{"description":10,"installMethods":11,"name":12,"sourceUrl":13,"tags":14},"Use this skill to extract structured Markdown/JSON from PDFs and document images—tables with cell-level precision, formulas as LaTeX, figures, seals, charts, headers/footers, multi-column layout and correct reading order. Trigger terms: 文档解析, 版面分析, 版面还原, 表格提取, 公式识别, 多栏排版, 扫描件结构化, 发票, 财报, 复杂 PDF, PDF转Markdown, 图表, 阅读顺序; reading order, formula, LaTeX, layout parsing, structure extraction, PP-StructureV3, PaddleOCR-VL.",{},"PaddleOCR Document Parsing","https://github.com/aidenwu0209/paddleocr-skills/tree/HEAD/skills/paddleocr-doc-parsing",[15,16,17,18,19,20],"pdf","document-parsing","ocr","layout-analysis","paddleocr","python",{"_creationTime":22,"_id":23,"extensionId":5,"locale":24,"result":25,"trustSignals":178,"workflow":188},1778053352237.296,"kn729a3bez5232c14npca7f5q9866mrb","en",{"checks":26,"evaluatedAt":168,"extensionSummary":169,"promptVersionExtension":170,"promptVersionScoring":171,"rationale":172,"score":173,"summary":174,"tags":175,"targetMarket":176,"tier":177},[27,32,35,38,42,45,49,53,56,59,63,68,71,75,78,81,84,87,90,93,97,101,105,110,114,117,120,123,127,130,133,136,139,142,146,149,152,155,158,161,165],{"category":28,"check":29,"severity":30,"summary":31},"Practical Utility","Problem relevance","pass","The description clearly identifies a concrete user problem: extracting structured data (tables, formulas, layout) from PDFs and document images.",{"category":28,"check":33,"severity":30,"summary":34},"Unique selling proposition","The skill offers value beyond a simple prompt by providing a dedicated toolset for complex document parsing, integrating PaddleOCR's capabilities for structured extraction.",{"category":28,"check":36,"severity":30,"summary":37},"Production readiness","The skill is production-ready, providing a complete lifecycle for document parsing, including input handling, API interaction, error reporting, and output management.",{"category":39,"check":40,"severity":30,"summary":41},"Scope","Single responsibility principle","The skill focuses solely on document parsing using PaddleOCR, adhering to a single responsibility.",{"category":39,"check":43,"severity":30,"summary":44},"Description quality","The description is accurate, concise, readable, and accurately reflects the skill's behavior.",{"category":46,"check":47,"severity":30,"summary":48},"Invocation","Scoped tools","The skill uses a single, well-defined tool (`layout_caller.py`) with specific parameters, adhering to the principle of narrow verb-noun specialists.",{"category":50,"check":51,"severity":30,"summary":52},"Documentation","Configuration & parameter reference","All parameters and configuration requirements (environment variables) are documented, including fallback behavior for file types and output paths.",{"category":39,"check":54,"severity":30,"summary":55},"Tool naming","The primary tool, `layout_caller.py`, is descriptively named.",{"category":39,"check":57,"severity":30,"summary":58},"Minimal I/O surface","Input parameters are well-defined (file URL/path, type, output options), and the output structure is documented, avoiding unnecessary data dumps.",{"category":60,"check":61,"severity":30,"summary":62},"License","License usability","The extension is licensed under Apache-2.0, a permissive open-source license, clearly stated in the LICENSE file and manifest.",{"category":64,"check":65,"severity":66,"summary":67},"Maintenance","Commit recency","not_applicable","No commit history available for evaluation.",{"category":64,"check":69,"severity":30,"summary":70},"Dependency Management","Dependencies are managed using PEP 723 inline metadata, which uv resolves automatically, providing a robust update mechanism.",{"category":72,"check":73,"severity":30,"summary":74},"Security","Secret Management","Secrets are handled via environment variables, with clear instructions not to paste them into chat, and the script does not echo resolved secrets.",{"category":72,"check":76,"severity":30,"summary":77},"Injection","The skill processes external data (files, URLs) as untrusted input and does not execute instructions contained within.",{"category":72,"check":79,"severity":30,"summary":80},"Transitive Supply-Chain Grenades","The skill relies on bundled Python scripts and environment variables for configuration; it does not fetch or execute external code at runtime.",{"category":72,"check":82,"severity":30,"summary":83},"Sandbox Isolation","The skill operates within its execution environment and does not attempt to modify files outside its designated scope.",{"category":72,"check":85,"severity":30,"summary":86},"Sandbox escape primitives","No detached process spawns or retry loops around denied tool calls were detected in the bundled scripts.",{"category":72,"check":88,"severity":30,"summary":89},"Data Exfiltration","The skill's documented purpose and implementation do not involve reading or submitting confidential data to third parties.",{"category":72,"check":91,"severity":30,"summary":92},"Hidden Text Tricks","Bundled files appear to be free of hidden steering tricks, control characters, or other obfuscation methods.",{"category":94,"check":95,"severity":30,"summary":96},"Hooks","Opaque code execution","The bundled Python scripts are plain, readable source code, not obfuscated or dynamically executed.",{"category":98,"check":99,"severity":30,"summary":100},"Portability","Structural Assumption","The skill makes no structural assumptions about user project organization and handles file paths relative to the execution context.",{"category":102,"check":103,"severity":66,"summary":104},"Trust","Issues Attention","No GitHub issues data available for evaluation.",{"category":106,"check":107,"severity":108,"summary":109},"Versioning","Release Management","warning","No manifest version (SKILL.md, package.json, etc.) or GitHub release tags are present, and installation instructions reference HEAD.",{"category":111,"check":112,"severity":30,"summary":113},"Code Execution","Validation","Input arguments are validated through argparse and file type detection. Output structure is documented.",{"category":72,"check":115,"severity":30,"summary":116},"Unguarded Destructive Operations","The skill is read-only and does not perform any destructive operations.",{"category":111,"check":118,"severity":30,"summary":119},"Error Handling","Errors are caught, categorized (INPUT_ERROR, CONFIG_ERROR, API_ERROR), and reported meaningfully with a `result.ok: false` structure.",{"category":111,"check":121,"severity":30,"summary":122},"Logging","The script logs to stderr for informational messages (e.g., saved file path) and includes structured error reporting, fulfilling audit requirements.",{"category":124,"check":125,"severity":30,"summary":126},"Compliance","GDPR","The skill processes document content but does not explicitly operate on personal data without sanitization, and its primary function is data extraction, not personal data processing.",{"category":124,"check":128,"severity":30,"summary":129},"Target market","The skill is globally applicable as it processes document content without regional biases in its logic or output.",{"category":98,"check":131,"severity":30,"summary":132},"Runtime stability","The skill declares Python 3.9+ and relies on `uv` for dependency resolution, ensuring cross-platform compatibility for standard Python environments.",{"category":46,"check":134,"severity":30,"summary":135},"Precise Purpose","The description clearly states the skill's purpose (extracting structured data from PDFs/images) and its non-goals (simple OCR, speed-critical tasks).",{"category":46,"check":137,"severity":30,"summary":138},"Concise Frontmatter","The frontmatter is dense and self-contained, accurately summarizing the core capability and providing relevant trigger terms.",{"category":50,"check":140,"severity":30,"summary":141},"Concise Body","The SKILL.md body is concise, detailing usage and configuration while deferring schema details to a separate reference file.",{"category":143,"check":144,"severity":30,"summary":145},"Context","Progressive Disclosure","Detailed output schema information is provided in a separate `references/output_schema.md` file, adhering to progressive disclosure.",{"category":143,"check":147,"severity":66,"summary":148},"Forked exploration","This skill is not an exploration or audit-style skill; it performs a direct task and returns results, thus `context: fork` is not applicable.",{"category":28,"check":150,"severity":30,"summary":151},"Usage examples","Sufficient and clear examples are provided for common use cases (URL, file path, stdout, custom output) demonstrating inputs and expected usage.",{"category":28,"check":153,"severity":30,"summary":154},"Edge cases","The skill handles various edge cases including authentication errors, rate limits, unsupported formats, empty files, and provides guidance on large files and retries.",{"category":111,"check":156,"severity":66,"summary":157},"Tool Fallback","This skill directly calls an external API and does not rely on optional tools like an MCP server.",{"category":98,"check":159,"severity":30,"summary":160},"Stack assumptions","The skill clearly states its stack assumptions: Python 3.9+, uv for dependency management, and internet access for API calls.",{"category":162,"check":163,"severity":30,"summary":164},"Safety","Halt on unexpected state","The skill halts execution and reports errors via a structured JSON output (`ok: false`) upon encountering unexpected states like configuration errors or API failures.",{"category":98,"check":166,"severity":30,"summary":167},"Cross-skill coupling","The skill is self-contained and does not implicitly rely on other skills; its functionality is standalone.",1778053351926,"This skill leverages the PaddleOCR API to parse complex documents, extracting text, tables, formulas, and layout information into structured Markdown or JSON. It supports both local files and URLs, with options for output customization and error handling.","2.0.0","3.4.0","The PaddleOCR Document Parsing skill is exceptionally well-implemented and documented. It adheres to all best practices for security, usability, and portability. The only minor deficiency is the lack of a manifest version, which is a common oversight in standalone skills.",98,"A robust and well-documented skill for extracting structured data from PDFs and document images using PaddleOCR.",[15,16,17,18,19,20],"global","verified",{"codeQuality":179,"collectedAt":180,"documentation":181,"maintenance":183,"security":184,"testCoverage":187},{},1778053340552,{"descriptionLength":182,"readmeSize":8},419,{},{"hasNpmPackage":185,"license":186,"smitheryVerified":185},false,"Apache-2.0",{"hasCi":185,"hasTests":185},{"updatedAt":189},1778053352237,{"githubOwner":191,"githubRepo":192,"locale":24,"slug":193,"type":194},"aidenwu0209","paddleocr-skills","paddleocr-doc-parsing","skill",true,null,{"extract":198,"llm":200},{"commitSha":199,"license":186},"ca41406b66e5a475f43b073a5b731dfd1b9c50b1",{"promptVersionExtension":170,"promptVersionScoring":171,"score":173,"targetMarket":176,"tier":177},{"repoId":202},"kd7b1t00prnctc7258swvw0hs5865sjq",{"_creationTime":204,"_id":202,"identity":205,"providers":207,"workflow":217},1777995558409.8687,{"githubOwner":191,"githubRepo":192,"sourceUrl":206},"https://github.com/aidenwu0209/paddleocr-skills",{"discover":208,"github":211},{"sources":209},[210],"skills-sh",{"closedIssues90d":8,"forks":212,"license":186,"openIssues90d":8,"pushedAt":213,"readmeSize":214,"stars":215,"topics":216},2,1778047331000,2405,20,[],{"discoverAt":218,"extractAt":219,"githubAt":219,"updatedAt":219},1777995558409,1778053339816,{"anyEnrichmentAt":221,"extractAt":222,"githubAt":221,"llmAt":189,"updatedAt":189},1778053339393,1778053339109,[],[225,255,274,294,312,337],{"_creationTime":226,"_id":227,"community":228,"display":229,"identity":239,"providers":243,"relations":249,"workflow":251},1778053148350.4373,"k1776t2fdx4h35mkwpc5h201dd866zms",{"reviewCount":8},{"description":230,"installMethods":231,"name":232,"sourceUrl":233,"tags":234},">",{},"Document Parser Skill","https://github.com/claude-office-skills/skills/tree/HEAD/doc-parser",[235,236,15,20,17,237,18,238],"parsing","document-processing","extraction","docling",{"githubOwner":240,"githubRepo":241,"locale":24,"slug":242,"type":194},"claude-office-skills","skills","doc-parser",{"extract":244,"llm":247},{"commitSha":245,"license":246},"9c4c7d5cd2813a8936bf2c9fdb174ea883b85a11","MIT",{"promptVersionExtension":170,"promptVersionScoring":171,"score":248,"targetMarket":176,"tier":177},92,{"repoId":250},"kd7fw7xbj58qc2z8whrrjptbed8659db",{"anyEnrichmentAt":252,"extractAt":253,"githubAt":252,"llmAt":254,"updatedAt":254},1778053151766,1778053148350,1778053561145,{"_creationTime":256,"_id":257,"community":258,"display":259,"identity":267,"providers":269,"relations":272,"workflow":273},1778053148350.4656,"k171nxqak0bb4qq89mkfwf02s5867cf6",{"reviewCount":8},{"description":260,"installMethods":261,"name":262,"sourceUrl":263,"tags":264},"Convert PDF files to editable Word documents using pdf2docx",{},"PDF to DOCX Converter","https://github.com/claude-office-skills/skills/tree/HEAD/pdf-to-docx",[15,265,266,236,20],"docx","conversion",{"githubOwner":240,"githubRepo":241,"locale":24,"slug":268,"type":194},"pdf-to-docx",{"extract":270,"llm":271},{"commitSha":245,"license":246},{"promptVersionExtension":170,"promptVersionScoring":171,"score":173,"targetMarket":176,"tier":177},{"repoId":250},{"anyEnrichmentAt":252,"extractAt":253,"githubAt":252,"llmAt":254,"updatedAt":254},{"_creationTime":275,"_id":276,"community":277,"display":278,"identity":286,"providers":288,"relations":292,"workflow":293},1778053148350.465,"k17207fprgpyp6yp2b2a4csry1866v1c",{"reviewCount":8},{"description":279,"installMethods":280,"name":281,"sourceUrl":282,"tags":283},"Extract text from scanned PDFs using optical character recognition",{},"PDF OCR Extraction","https://github.com/claude-office-skills/skills/tree/HEAD/pdf-ocr",[15,17,284,285],"text-extraction","mcp",{"githubOwner":240,"githubRepo":241,"locale":24,"slug":287,"type":194},"pdf-ocr-extraction",{"extract":289,"llm":290},{"commitSha":245,"license":246},{"promptVersionExtension":170,"promptVersionScoring":171,"score":291,"targetMarket":176,"tier":177},95,{"repoId":250},{"anyEnrichmentAt":252,"extractAt":253,"githubAt":252,"llmAt":254,"updatedAt":254},{"_creationTime":295,"_id":296,"community":297,"display":298,"identity":305,"providers":307,"relations":310,"workflow":311},1778053339109.6736,"k17b9480w9tdwjwad0rkr80e11867wpb",{"reviewCount":8},{"description":299,"installMethods":300,"name":301,"sourceUrl":302,"tags":303},"Use this skill whenever the user wants text extracted from images, photos, scans, screenshots, or scanned PDFs. Returns exact machine-readable strings with line-level text and optional bbox coordinates. Strong accuracy for CJK, small print, and handwritten text. Trigger terms: OCR, 文字识别, 图片转文字, 截图识字, 提取图中文字, 扫描识字, 识字, 纯文字, plain text extraction, 坐标, 检测框, bbox, bounding box, image to text, screenshot, photo scan, recognize text.",{},"PaddleOCR Text Recognition","https://github.com/aidenwu0209/paddleocr-skills/tree/HEAD/skills/paddleocr-text-recognition",[17,284,304,15,20],"image-to-text",{"githubOwner":191,"githubRepo":192,"locale":24,"slug":306,"type":194},"paddleocr-text-recognition",{"extract":308,"llm":309},{"commitSha":199,"license":186},{"promptVersionExtension":170,"promptVersionScoring":171,"score":291,"targetMarket":176,"tier":177},{"repoId":202},{"anyEnrichmentAt":221,"extractAt":222,"githubAt":221,"llmAt":189,"updatedAt":189},{"_creationTime":313,"_id":314,"community":315,"display":316,"identity":329,"providers":331,"relations":335,"workflow":336},1778053148350.4617,"k1758w1kjmv1kaems0xes3h8m9866dtz",{"reviewCount":8},{"description":317,"installMethods":318,"name":319,"sourceUrl":320,"tags":321},"MCP server with 39 tools for Word, Excel, PowerPoint, PDF, OCR operations",{},"Office MCP Server","https://github.com/claude-office-skills/skills/tree/HEAD/office-mcp",[322,285,15,323,324,325,17,326,327,328],"office","excel","word","powerpoint","automation","typescript","nodejs",{"githubOwner":240,"githubRepo":241,"locale":24,"slug":330,"type":194},"office-mcp",{"extract":332,"llm":333},{"commitSha":245,"license":246},{"promptVersionExtension":170,"promptVersionScoring":171,"score":334,"targetMarket":176,"tier":177},94,{"repoId":250},{"anyEnrichmentAt":252,"extractAt":253,"githubAt":252,"llmAt":254,"updatedAt":254},{"_creationTime":338,"_id":339,"community":340,"display":341,"identity":348,"providers":350,"relations":353,"workflow":354},1778053148350.4734,"k1782aqmjfqy0qysysgq76w9z1867e3x",{"reviewCount":8},{"description":230,"installMethods":342,"name":343,"sourceUrl":344,"tags":345},{},"Smart OCR Skill","https://github.com/claude-office-skills/skills/tree/HEAD/smart-ocr",[17,235,346,19,237,347,236],"multilingual","image-processing",{"githubOwner":240,"githubRepo":241,"locale":24,"slug":349,"type":194},"smart-ocr",{"extract":351,"llm":352},{"commitSha":245,"license":246},{"promptVersionExtension":170,"promptVersionScoring":171,"score":248,"targetMarket":176,"tier":177},{"repoId":250},{"anyEnrichmentAt":252,"extractAt":253,"githubAt":252,"llmAt":254,"updatedAt":254}]