[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"extension-skill-claude-office-skills-pdf-ocr-extraction-de":3,"guides-for-claude-office-skills-pdf-ocr-extraction":222,"similar-k17207fprgpyp6yp2b2a4csry1866v1c":223},{"_creationTime":4,"_id":5,"children":6,"community":7,"display":9,"evaluation":19,"identity":188,"isFallback":193,"parentExtension":194,"providers":195,"relations":199,"repo":201,"workflow":219},1778053148350.465,"k17207fprgpyp6yp2b2a4csry1866v1c",[],{"reviewCount":8},0,{"description":10,"installMethods":11,"name":12,"sourceUrl":13,"tags":14},"Extract text from scanned PDFs using optical character recognition",{},"PDF OCR Extraction","https://github.com/claude-office-skills/skills/tree/HEAD/pdf-ocr",[15,16,17,18],"pdf","ocr","text-extraction","mcp",{"_creationTime":20,"_id":21,"extensionId":5,"locale":22,"result":23,"trustSignals":176,"workflow":186},1778053561145.7449,"kn7bw8kd3msvf66ppfs2g0fvwx86640b","en",{"checks":24,"evaluatedAt":166,"extensionSummary":167,"promptVersionExtension":168,"promptVersionScoring":169,"rationale":170,"score":171,"summary":172,"tags":173,"targetMarket":174,"tier":175},[25,30,33,36,40,43,47,51,54,57,61,66,70,74,77,80,83,86,89,92,96,100,104,108,112,115,118,121,125,128,131,134,137,140,144,147,150,153,156,159,163],{"category":26,"check":27,"severity":28,"summary":29},"Practical Utility","Problem relevance","pass","The description clearly states the problem of extracting text from scanned PDFs using OCR.",{"category":26,"check":31,"severity":28,"summary":32},"Unique selling proposition","The skill offers a specialized OCR capability for scanned PDFs, going beyond generic text extraction by handling various document types and output formats, which provides value over a simple prompt.",{"category":26,"check":34,"severity":28,"summary":35},"Production readiness","The skill provides a comprehensive set of features for OCR, including handling different document types, multiple output formats, and batch processing, covering the full lifecycle for its stated use case.",{"category":37,"check":38,"severity":28,"summary":39},"Scope","Single responsibility principle","The extension focuses solely on OCR and text extraction from PDFs, adhering to a single responsibility.",{"category":37,"check":41,"severity":28,"summary":42},"Description quality","The description accurately reflects the skill's functionality of extracting text from scanned PDFs using OCR.",{"category":44,"check":45,"severity":28,"summary":46},"Invocation","Scoped tools","The skill uses a single, narrow tool `extract_text_from_pdf` which is specific to its task.",{"category":48,"check":49,"severity":28,"summary":50},"Documentation","Configuration & parameter reference","The documentation clearly outlines the basic and advanced usage with examples, including language specification and layout preservation options.",{"category":37,"check":52,"severity":28,"summary":53},"Tool naming","The tool name `extract_text_from_pdf` is descriptive and relevant to the skill's domain.",{"category":37,"check":55,"severity":28,"summary":56},"Minimal I/O surface","The skill's primary input is a document, and its output is structured text or a searchable PDF, which is appropriate for the task without excessive data.",{"category":58,"check":59,"severity":28,"summary":60},"License","License usability","The extension is licensed under the MIT License, a standard permissive open-source license.",{"category":62,"check":63,"severity":64,"summary":65},"Maintenance","Commit recency","warning","No commits have been made to the repository in the last 12 months (last commit date is not available, but the repository was last updated in 2026).",{"category":62,"check":67,"severity":68,"summary":69},"Dependency Management","not_applicable","No third-party dependencies are explicitly managed within this skill's direct files.",{"category":71,"check":72,"severity":68,"summary":73},"Security","Secret Management","The skill does not appear to use or handle any secrets.",{"category":71,"check":75,"severity":28,"summary":76},"Injection","The skill's operations are confined to processing PDF files and do not involve loading or executing untrusted third-party data as instructions.",{"category":71,"check":78,"severity":28,"summary":79},"Transitive Supply-Chain Grenades","The skill operates on provided PDF files and does not fetch external code or data at runtime, nor does it include remote execution primitives.",{"category":71,"check":81,"severity":28,"summary":82},"Sandbox Isolation","The skill's operation is contained to processing input PDF files and does not appear to modify any files outside of its designated scope.",{"category":71,"check":84,"severity":28,"summary":85},"Sandbox escape primitives","No detached process spawns or deny-retry loops were detected in the skill's instructions.",{"category":71,"check":87,"severity":28,"summary":88},"Data Exfiltration","The skill processes local PDF files and does not appear to make any undocumented outbound calls or exfiltrate data.",{"category":71,"check":90,"severity":28,"summary":91},"Hidden Text Tricks","The bundled content is free of hidden-steering tricks, and all descriptions are clean printable ASCII.",{"category":93,"check":94,"severity":28,"summary":95},"Hooks","Opaque code execution","The skill's instructions are plain Markdown and do not involve obfuscated code execution.",{"category":97,"check":98,"severity":28,"summary":99},"Portability","Structural Assumption","The skill operates on provided files and does not make assumptions about user-specific project organization.",{"category":101,"check":102,"severity":68,"summary":103},"Trust","Issues Attention","No open or closed issues are available for the repository.",{"category":105,"check":106,"severity":28,"summary":107},"Versioning","Release Management","A version number (1.0) is declared in the SKILL.md frontmatter.",{"category":109,"check":110,"severity":28,"summary":111},"Code Execution","Validation","The skill's primary input is a file, and the documentation implies structured options (language, layout) which would be validated by the underlying MCP tool.",{"category":71,"check":113,"severity":68,"summary":114},"Unguarded Destructive Operations","The skill is read-only and does not perform any destructive operations.",{"category":109,"check":116,"severity":28,"summary":117},"Error Handling","The documentation provides details on output formats and includes notes on potential issues, implying that errors would be reported.",{"category":109,"check":119,"severity":68,"summary":120},"Logging","The skill is read-only and does not perform actions that require local audit logging.",{"category":122,"check":123,"severity":68,"summary":124},"Compliance","GDPR","The skill processes PDF files and does not operate on data that would typically include personal information without explicit user action.",{"category":122,"check":126,"severity":28,"summary":127},"Target market","The extension does not contain any regional signals and is globally applicable.",{"category":97,"check":129,"severity":28,"summary":130},"Runtime stability","The skill is documented to work with various models and does not appear to have OS-specific or shell-specific assumptions beyond standard MCP execution.",{"category":44,"check":132,"severity":28,"summary":133},"Precise Purpose","The description clearly states the artifact (scanned PDFs) and the task (extract text using OCR), with specific usage examples provided.",{"category":44,"check":135,"severity":28,"summary":136},"Concise Frontmatter","The frontmatter is concise and effectively summarizes the skill's core capability and usage.",{"category":48,"check":138,"severity":28,"summary":139},"Concise Body","The skill body is well-organized and uses progressive disclosure for detailed information, keeping the main instruction concise.",{"category":141,"check":142,"severity":28,"summary":143},"Context","Progressive Disclosure","Detailed information such as document type quality, output formats, and pre-processing tips are presented in tables and separate sections within the SKILL.md, rather than being embedded in lengthy prose.",{"category":141,"check":145,"severity":68,"summary":146},"Forked exploration","This skill is a direct tool execution and does not involve deep exploration or multi-file inspection that would require forked context.",{"category":26,"check":148,"severity":28,"summary":149},"Usage examples","Sufficient examples are provided for basic OCR, with options, and different output formats, plausibly demonstrating the claimed functionality.",{"category":26,"check":151,"severity":28,"summary":152},"Edge cases","The skill documents various document types with expected quality, provides pre-processing tips, and lists limitations such as accuracy with handwriting and small text.",{"category":109,"check":154,"severity":28,"summary":155},"Tool Fallback","The MCP server `office-mcp` is listed as a dependency, and it's implied that this is the primary tool for the `extract_text_from_pdf` operation, with no explicit fallback mentioned but a clear dependency stated.",{"category":97,"check":157,"severity":28,"summary":158},"Stack assumptions","The skill specifies compatibility with various LLMs and its reliance on the MCP server, and the included examples are in common formats, indicating broad portability.",{"category":160,"check":161,"severity":28,"summary":162},"Safety","Halt on unexpected state","The documentation implies that the underlying tool would handle errors and provide feedback, aligning with the principle of halting on unexpected states.",{"category":97,"check":164,"severity":28,"summary":165},"Cross-skill coupling","The skill is self-contained and focuses on PDF OCR, without implicit reliance on other skills.",1778053454615,"This skill leverages OCR technology to extract text from scanned PDF documents, supporting various document types, languages, and output formats including plain text, structured data, and searchable PDFs. It also provides guidance on image quality and pre-processing steps for optimal results.","2.0.0","3.4.0","The skill is well-documented, focuses on a single responsibility, and has clear usage examples. The only minor concern is the lack of recent commits, but the MIT license and clear documentation make it a trustworthy extension.",95,"A high-quality skill for extracting text from scanned PDFs using OCR, with comprehensive documentation and clear usage instructions.",[15,16,17,18],"global","verified",{"codeQuality":177,"collectedAt":178,"documentation":179,"maintenance":181,"security":182,"testCoverage":185},{},1778053445340,{"descriptionLength":180,"readmeSize":8},66,{},{"hasNpmPackage":183,"license":184,"smitheryVerified":183},false,"MIT",{"hasCi":183,"hasTests":183},{"updatedAt":187},1778053561145,{"githubOwner":189,"githubRepo":190,"locale":22,"slug":191,"type":192},"claude-office-skills","skills","pdf-ocr-extraction","skill",true,null,{"extract":196,"llm":198},{"commitSha":197,"license":184},"9c4c7d5cd2813a8936bf2c9fdb174ea883b85a11",{"promptVersionExtension":168,"promptVersionScoring":169,"score":171,"targetMarket":174,"tier":175},{"repoId":200},"kd7fw7xbj58qc2z8whrrjptbed8659db",{"_creationTime":202,"_id":200,"identity":203,"providers":205,"workflow":216},1777995558409.8474,{"githubOwner":189,"githubRepo":190,"sourceUrl":204},"https://github.com/claude-office-skills/skills",{"discover":206,"github":209},{"sources":207},[208],"skills-sh",{"closedIssues90d":8,"forks":210,"license":184,"openIssues90d":211,"pushedAt":212,"readmeSize":213,"stars":214,"topics":215},27,2,1769868236000,29630,98,[],{"discoverAt":217,"extractAt":218,"githubAt":218,"updatedAt":218},1777995558409,1778053155657,{"anyEnrichmentAt":220,"extractAt":221,"githubAt":220,"llmAt":187,"updatedAt":187},1778053151766,1778053148350,[],[224,251,272,292,317,339],{"_creationTime":225,"_id":226,"community":227,"display":228,"identity":236,"providers":240,"relations":245,"workflow":247},1778053339109.6736,"k17b9480w9tdwjwad0rkr80e11867wpb",{"reviewCount":8},{"description":229,"installMethods":230,"name":231,"sourceUrl":232,"tags":233},"Use this skill whenever the user wants text extracted from images, photos, scans, screenshots, or scanned PDFs. Returns exact machine-readable strings with line-level text and optional bbox coordinates. Strong accuracy for CJK, small print, and handwritten text. Trigger terms: OCR, 文字识别, 图片转文字, 截图识字, 提取图中文字, 扫描识字, 识字, 纯文字, plain text extraction, 坐标, 检测框, bbox, bounding box, image to text, screenshot, photo scan, recognize text.",{},"PaddleOCR Text Recognition","https://github.com/aidenwu0209/paddleocr-skills/tree/HEAD/skills/paddleocr-text-recognition",[16,17,234,15,235],"image-to-text","python",{"githubOwner":237,"githubRepo":238,"locale":22,"slug":239,"type":192},"aidenwu0209","paddleocr-skills","paddleocr-text-recognition",{"extract":241,"llm":244},{"commitSha":242,"license":243},"ca41406b66e5a475f43b073a5b731dfd1b9c50b1","Apache-2.0",{"promptVersionExtension":168,"promptVersionScoring":169,"score":171,"targetMarket":174,"tier":175},{"repoId":246},"kd7b1t00prnctc7258swvw0hs5865sjq",{"anyEnrichmentAt":248,"extractAt":249,"githubAt":248,"llmAt":250,"updatedAt":250},1778053339393,1778053339109,1778053352237,{"_creationTime":252,"_id":253,"community":254,"display":255,"identity":265,"providers":267,"relations":270,"workflow":271},1778053148350.4626,"k17c6qc39xd1s6rqh8y0vdxxv1867mjb",{"reviewCount":8},{"description":256,"installMethods":257,"name":258,"sourceUrl":259,"tags":260},"Reduce PDF file size while maintaining acceptable quality",{},"PDF Compress","https://github.com/claude-office-skills/skills/tree/HEAD/pdf-compress",[15,261,262,263,18,264],"compress","optimize","size","documentation",{"githubOwner":189,"githubRepo":190,"locale":22,"slug":266,"type":192},"pdf-compress",{"extract":268,"llm":269},{"commitSha":197,"license":184},{"promptVersionExtension":168,"promptVersionScoring":169,"score":214,"targetMarket":174,"tier":175},{"repoId":200},{"anyEnrichmentAt":220,"extractAt":221,"githubAt":220,"llmAt":187,"updatedAt":187},{"_creationTime":273,"_id":274,"community":275,"display":276,"identity":285,"providers":287,"relations":290,"workflow":291},1778053339109.673,"k170fjdnm4zmjtz1rgs8zwq4418663pv",{"reviewCount":8},{"description":277,"installMethods":278,"name":279,"sourceUrl":280,"tags":281},"Use this skill to extract structured Markdown/JSON from PDFs and document images—tables with cell-level precision, formulas as LaTeX, figures, seals, charts, headers/footers, multi-column layout and correct reading order. Trigger terms: 文档解析, 版面分析, 版面还原, 表格提取, 公式识别, 多栏排版, 扫描件结构化, 发票, 财报, 复杂 PDF, PDF转Markdown, 图表, 阅读顺序; reading order, formula, LaTeX, layout parsing, structure extraction, PP-StructureV3, PaddleOCR-VL.",{},"PaddleOCR Document Parsing","https://github.com/aidenwu0209/paddleocr-skills/tree/HEAD/skills/paddleocr-doc-parsing",[15,282,16,283,284,235],"document-parsing","layout-analysis","paddleocr",{"githubOwner":237,"githubRepo":238,"locale":22,"slug":286,"type":192},"paddleocr-doc-parsing",{"extract":288,"llm":289},{"commitSha":242,"license":243},{"promptVersionExtension":168,"promptVersionScoring":169,"score":214,"targetMarket":174,"tier":175},{"repoId":246},{"anyEnrichmentAt":248,"extractAt":249,"githubAt":248,"llmAt":250,"updatedAt":250},{"_creationTime":293,"_id":294,"community":295,"display":296,"identity":309,"providers":311,"relations":315,"workflow":316},1778053148350.4617,"k1758w1kjmv1kaems0xes3h8m9866dtz",{"reviewCount":8},{"description":297,"installMethods":298,"name":299,"sourceUrl":300,"tags":301},"MCP server with 39 tools for Word, Excel, PowerPoint, PDF, OCR operations",{},"Office MCP Server","https://github.com/claude-office-skills/skills/tree/HEAD/office-mcp",[302,18,15,303,304,305,16,306,307,308],"office","excel","word","powerpoint","automation","typescript","nodejs",{"githubOwner":189,"githubRepo":190,"locale":22,"slug":310,"type":192},"office-mcp",{"extract":312,"llm":313},{"commitSha":197,"license":184},{"promptVersionExtension":168,"promptVersionScoring":169,"score":314,"targetMarket":174,"tier":175},94,{"repoId":200},{"anyEnrichmentAt":220,"extractAt":221,"githubAt":220,"llmAt":187,"updatedAt":187},{"_creationTime":318,"_id":319,"community":320,"display":321,"identity":331,"providers":333,"relations":337,"workflow":338},1778053148350.4373,"k1776t2fdx4h35mkwpc5h201dd866zms",{"reviewCount":8},{"description":322,"installMethods":323,"name":324,"sourceUrl":325,"tags":326},">",{},"Document Parser Skill","https://github.com/claude-office-skills/skills/tree/HEAD/doc-parser",[327,328,15,235,16,329,283,330],"parsing","document-processing","extraction","docling",{"githubOwner":189,"githubRepo":190,"locale":22,"slug":332,"type":192},"doc-parser",{"extract":334,"llm":335},{"commitSha":197,"license":184},{"promptVersionExtension":168,"promptVersionScoring":169,"score":336,"targetMarket":174,"tier":175},92,{"repoId":200},{"anyEnrichmentAt":220,"extractAt":221,"githubAt":220,"llmAt":187,"updatedAt":187},{"_creationTime":340,"_id":341,"community":342,"display":343,"identity":352,"providers":354,"relations":362,"workflow":365},1778003232571.9153,"k17259226wsvv093qff3xnjqe5865r06",{"reviewCount":8},{"description":344,"installMethods":345,"name":346,"sourceUrl":347,"tags":348},"Use this skill whenever the user wants to do anything with PDF files. This includes reading or extracting text/tables from PDFs, combining or merging multiple PDFs into one, splitting PDFs apart, rotating pages, adding watermarks, creating new PDFs, filling PDF forms, encrypting/decrypting PDFs, extracting images, and OCR on scanned PDFs to make them searchable. If the user mentions a .pdf file or asks to produce one, use this skill.",{},"PDF Processing Guide","https://github.com/anthropics/skills/tree/HEAD/skills/pdf",[15,328,235,17,16,349,350,351],"form-filling","productivity","data-analytics",{"githubOwner":353,"githubRepo":190,"locale":22,"slug":15,"type":192},"anthropics",{"extract":355,"smithery":358},{"commitSha":356,"license":357},"d230a6dd6eb1a0dbee9fec55e2f00a96e28dff81","Proprietary",{"qualityScore":359,"totalActivations":360,"uniqueUsers":361,"useCount":8,"verified":183},0.996426,920,410,{"parentExtensionId":363,"repoId":364},"k173j5mjcps56pe131t0b18eg18658ay","kd72m31vxr2nd4hahhzvp0cyrn864eyx",{"anyEnrichmentAt":366,"extractAt":367,"githubAt":368,"invalidatedAt":366,"llmAt":369,"smitheryAt":366,"updatedAt":366},1778008076651,1778003232571,1778003234861,1778007125066]