AgentPM™

@zack/html-to-markdown

Convert raw HTML into Markdown and plain text, with optional main-content extraction.

Install
agentpm install @zack/html-to-markdown@0.1.1
Load
Weekly downloads
3
0%
Last publish
Today
v0.1.1
agent.json
{
  "name": "html-to-markdown",
  "version": "0.1.1",
  "description": "Convert raw HTML into Markdown and plain text, with optional main-content extraction.",
  "files": [
    "dist/",
    "node_modules/"
  ],
  "entrypoint": {
    "args": [
      "dist/index.js"
    ],
    "command": "node",
    "timeout_ms": 60000
  },
  "environment": {
    "vars": {
      "HTML2MD_DEFAULT_BASE_URL": {
        "required": false,
        "description": "Optional default base URL used to resolve relative links when base_url is not provided in inputs."
      }
    }
  },
  "inputs": {
    "type": "object",
    "required": [
      "html"
    ],
    "properties": {
      "html": {
        "type": "string",
        "description": "Raw HTML to convert into Markdown and plain text."
      },
      "base_url": {
        "type": "string",
        "format": "uri",
        "description": "Optional base URL for resolving relative links in the HTML."
      },
      "preserve_links": {
        "type": "boolean",
        "default": true,
        "description": "If true, keep hyperlinks as Markdown links instead of stripping URLs."
      },
      "preserve_images": {
        "type": "boolean",
        "default": false,
        "description": "If true, keep images as Markdown image tags where possible."
      },
      "main_content_only": {
        "type": "boolean",
        "default": false,
        "description": "If true, try to extract the main article/content (readability-style) and drop nav/boilerplate."
      }
    },
    "additionalProperties": false
  },
  "outputs": {
    "oneOf": [
      {
        "type": "object",
        "required": [
          "ok",
          "markdown",
          "text"
        ],
        "properties": {
          "ok": {
            "const": true
          },
          "text": {
            "type": "string",
            "description": "Plain-text representation of the input HTML."
          },
          "markdown": {
            "type": "string",
            "description": "Markdown representation of the input HTML."
          },
          "metadata": {
            "type": "object",
            "properties": {
              "content_length_text": {
                "type": "integer",
                "description": "Length of the text string in characters."
              },
              "main_content_extracted": {
                "type": "boolean",
                "description": "True if main-content extraction was applied."
              },
              "content_length_markdown": {
                "type": "integer",
                "description": "Length of the markdown string in characters."
              }
            },
            "description": "Optional metadata about the conversion process (e.g. character counts, heuristics used).",
            "additionalProperties": true
          }
        },
        "additionalProperties": false
      },
      {
        "type": "object",
        "required": [
          "ok",
          "error"
        ],
        "properties": {
          "ok": {
            "const": false
          },
          "error": {
            "type": "object",
            "required": [
              "message"
            ],
            "properties": {
              "code": {
                "type": "string",
                "description": "Stable machine-readable error code (e.g. INPUT_INVALID, PARSE_FAILED)."
              },
              "details": {
                "type": "object",
                "description": "Optional structured context (e.g. truncated input, parse error info).",
                "additionalProperties": true
              },
              "message": {
                "type": "string",
                "description": "Human-readable error message."
              }
            },
            "additionalProperties": true
          }
        },
        "additionalProperties": false
      }
    ]
  },
  "license": {
    "file": "LICENSE",
    "spdx": "MIT"
  },
  "runtime": {
    "type": "node",
    "version": "20"
  }
}
Environment variables
Optional
HTML2MD_DEFAULT_BASE_URL
Optional default base URL used to resolve relative links when base_url is not provided in inputs.
Compatibility
NodePython