import { parseObjectEntries } from "../../common/utils.mjs";
import firecrawl from "../../firecrawl.app.mjs";
export default {
  key: "firecrawl-crawl-url",
  name: "Crawl URL",
  description: "Crawls a given URL and returns the contents of sub-pages. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl-post)",
  version: "1.1.1",
  annotations: {
    destructiveHint: false,
    openWorldHint: true,
    readOnlyHint: false,
  },
  type: "action",
  props: {
    firecrawl,
    url: {
      propDefinition: [
        firecrawl,
        "url",
      ],
    },
    prompt: {
      type: "string",
      label: "Prompt",
      description: "A prompt to use to generate the crawler options (all the parameters below) from natural language. Explicitly set parameters will override the generated equivalents.",
      optional: true,
    },
    excludePaths: {
      type: "string[]",
      label: "Exclude Paths",
      description: "URL pathname regex patterns that exclude matching URLs from the crawl. For example, a value of `blog/.*` for the URL `firecrawl.dev` will exclude any results matching that pattern, such as `https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap`",
      optional: true,
    },
    includePaths: {
      type: "string[]",
      label: "Include Paths",
      description: "Similar to `Exclude Paths`, but if set, only the paths matching the specified patterns will be included",
      optional: true,
    },
    maxDiscoveryDepth: {
      type: "integer",
      label: "Max Discovery Depth",
      description: "Maximum depth to crawl based on discovery order. The root site and sitemapped pages has a discovery depth of 0. For example, if you set it to 1, and you set sitemap: 'skip', you will only crawl the entered URL and all URLs that are linked on that page.",
      optional: true,
    },
    sitemap: {
      type: "string",
      label: "Sitemap",
      description: "Sitemap mode when crawling. If you set it to 'skip', the crawler will ignore the website sitemap and only crawl the entered URL and discover pages from there onwards.",
      options: [
        "skip",
        "include",
      ],
      optional: true,
    },
    ignoreQueryParameters: {
      type: "boolean",
      label: "Ignore Query Parameters",
      description: "Do not re-scrape the same path with different (or none) query parameters",
      optional: true,
    },
    limit: {
      type: "integer",
      label: "Limit",
      description: "Maximum number of pages to crawl",
      optional: true,
    },
    crawlEntireDomain: {
      type: "boolean",
      label: "Crawl Entire Domain",
      description: "Allows the crawler to follow internal links to sibling or parent URLs, not just child paths.",
      optional: true,
    },
    allowExternalLinks: {
      type: "boolean",
      label: "Allow External Links",
      description: "Allows the crawler to follow links to external websites",
      optional: true,
    },
    additionalOptions: {
      propDefinition: [
        firecrawl,
        "additionalOptions",
      ],
      description: "Additional parameters to send in the request. [See the documentation](https://docs.firecrawl.dev/api-reference/endpoint/crawl-post) for available parameters. Values will be parsed as JSON where applicable. For example, to add the `webhook` param, use the value `{\"webhook\": {\"url\": \"https://your-server-webhook-api.com\",\"headers\": {},\"metadata\": {},\"events\": [\"completed\"]}}`",
    },
  },
  async run({ $ }) {
    const {
      firecrawl, additionalOptions, ...data
    } = this;
    const response = await firecrawl.crawl({
      $,
      data: {
        ...data,
        ...(additionalOptions && parseObjectEntries(additionalOptions)),
      },
    });
    $.export("$summary", `Crawl job started (ID: ${response.id})`);
    return response;
  },
};