AgentPM™

@zack/markdown-chunk

Split markdown or text into deterministic chunks with heading context, overlap, and fallback subdivision for oversized content.

Install
agentpm install @zack/markdown-chunk@0.1.1
Load
Weekly downloads
1
0%
Last publish
1d ago
v0.1.1
agent.json
{
  "name": "markdown-chunk",
  "version": "0.1.1",
  "description": "Split markdown or text into deterministic chunks with heading context, overlap, and fallback subdivision for oversized content.",
  "files": [
    "markdown_chunk/"
  ],
  "entrypoint": {
    "args": [
      "-u",
      "markdown_chunk/__main__.py"
    ],
    "command": "python",
    "timeout_ms": 30000
  },
  "inputs": {
    "type": "object",
    "required": [
      "text"
    ],
    "properties": {
      "text": {
        "type": "string",
        "description": "Markdown or plain text to chunk."
      },
      "overlap": {
        "type": "integer",
        "default": 150,
        "maximum": 5000,
        "minimum": 0,
        "description": "Number of trailing characters to carry into the next chunk for continuity."
      },
      "strategy": {
        "enum": [
          "heading",
          "paragraph",
          "hybrid"
        ],
        "type": "string",
        "default": "hybrid",
        "description": "Chunking strategy: heading preserves section boundaries when possible, paragraph prefers paragraph breaks, hybrid mixes both before falling back further."
      },
      "max_chars": {
        "type": "integer",
        "default": 1200,
        "maximum": 50000,
        "minimum": 100,
        "description": "Target maximum number of characters per chunk. Oversized sections fall back through paragraph, sentence, and window splitting."
      },
      "source_id": {
        "type": "string",
        "description": "Optional stable identifier for the source document; copied into each chunk."
      }
    },
    "additionalProperties": false
  },
  "outputs": {
    "oneOf": [
      {
        "type": "object",
        "required": [
          "ok",
          "chunks",
          "metadata"
        ],
        "properties": {
          "ok": {
            "const": true,
            "description": "True when chunk generation succeeded."
          },
          "chunks": {
            "type": "array",
            "items": {
              "type": "object",
              "required": [
                "id",
                "text",
                "heading_path",
                "start_offset",
                "end_offset",
                "char_count"
              ],
              "properties": {
                "id": {
                  "type": "string",
                  "description": "Deterministic chunk identifier derived from offsets and content."
                },
                "text": {
                  "type": "string",
                  "description": "Chunk text content."
                },
                "source_id": {
                  "type": "string",
                  "description": "Optional source identifier copied from the input."
                },
                "char_count": {
                  "type": "integer",
                  "description": "Character count of the chunk text."
                },
                "end_offset": {
                  "type": "integer",
                  "description": "Character offset in the source text where this chunk ends."
                },
                "heading_path": {
                  "type": "array",
                  "items": {
                    "type": "string",
                    "description": "One heading segment in the heading path."
                  },
                  "description": "Heading ancestry active at the point this chunk was generated."
                },
                "start_offset": {
                  "type": "integer",
                  "description": "Character offset in the source text where this chunk begins."
                }
              },
              "additionalProperties": false
            },
            "description": "Ordered list of generated chunks."
          },
          "metadata": {
            "type": "object",
            "description": "Summary metadata about the chunking run, including fallback behavior used to respect max_chars.",
            "additionalProperties": true
          }
        },
        "additionalProperties": false
      },
      {
        "type": "object",
        "required": [
          "ok",
          "error"
        ],
        "properties": {
          "ok": {
            "const": false,
            "description": "False when input validation or chunk generation failed."
          },
          "error": {
            "type": "object",
            "required": [
              "code",
              "message"
            ],
            "properties": {
              "code": {
                "type": "string",
                "description": "Stable machine-readable error code."
              },
              "details": {
                "type": "object",
                "description": "Optional structured context about the failure.",
                "additionalProperties": true
              },
              "message": {
                "type": "string",
                "description": "Human-readable explanation of the failure."
              }
            },
            "description": "Structured error returned by the tool.",
            "additionalProperties": true
          }
        },
        "additionalProperties": false
      }
    ]
  },
  "readme": "README.md",
  "license": {
    "spdx": "MIT"
  },
  "runtime": {
    "type": "python",
    "version": "3.11"
  }
}
Compatibility
NodePython