Type Alias CrawlOptions

CrawlOptions: {
    add_chunks_to_dataset?: boolean | null;
    allow_external_links?: boolean | null;
    body_remove_strings?: string[] | null;
    boost_titles?: boolean | null;
    exclude_paths?: string[] | null;
    exclude_tags?: string[] | null;
    heading_remove_strings?: string[] | null;
    ignore_sitemap?: boolean | null;
    include_paths?: string[] | null;
    include_tags?: string[] | null;
    interval?: CrawlInterval | null;
    limit?: number | null;
    scrape_options?: ScrapeOptions | null;
    site_url?: string | null;
    webhook_metadata?: unknown;
    webhook_urls?: string[] | null;
}

Options for setting up the crawl which will populate the dataset.

Type declaration

Optionaladd_chunks_to_dataset?: boolean | null
Add chunks to the dataset that the crawl is created for, defaults to true
Optionalallow_external_links?: boolean | null
Option for allowing the crawl to follow links to external websites.
Optionalbody_remove_strings?: string[] | null
Text strings to remove from body when creating chunks for each page
Optionalboost_titles?: boolean | null
Boost titles such that keyword matches in titles are prioritized in search results. Strongly recommended to leave this on. Defaults to true.
Optionalexclude_paths?: string[] | null
URL Patterns to exclude from the crawl
Optionalexclude_tags?: string[] | null
Specify the HTML tags, classes and ids to exclude from the response.
Optionalheading_remove_strings?: string[] | null
Text strings to remove from headings when creating chunks for each page
Optionalignore_sitemap?: boolean | null
Ignore the website sitemap when crawling, defaults to true.
Optionalinclude_paths?: string[] | null
URL Patterns to include in the crawl
Optionalinclude_tags?: string[] | null
Specify the HTML tags, classes and ids to include in the response.
Optionalinterval?: CrawlInterval | null
Optionallimit?: number | null
How many pages to crawl, defaults to 1000
Optionalscrape_options?: ScrapeOptions | null
Optionalsite_url?: string | null
The URL to crawl
Optionalwebhook_metadata?: unknown
Metadata to send back with the webhook call for each successful page scrape
Optionalwebhook_urls?: string[] | null
Host to call back on the webhook for each successful page scrape

Type Alias CrawlOptions

Type declaration

`Optional`add_chunks_to_dataset?: boolean | null

`Optional`allow_external_links?: boolean | null

`Optional`body_remove_strings?: string[] | null

`Optional`boost_titles?: boolean | null

`Optional`exclude_paths?: string[] | null

`Optional`exclude_tags?: string[] | null

`Optional`heading_remove_strings?: string[] | null

`Optional`ignore_sitemap?: boolean | null

`Optional`include_paths?: string[] | null

`Optional`include_tags?: string[] | null

`Optional`interval?: CrawlInterval | null

`Optional`limit?: number | null

`Optional`scrape_options?: ScrapeOptions | null

`Optional`site_url?: string | null

`Optional`webhook_metadata?: unknown

`Optional`webhook_urls?: string[] | null

Settings

Type Alias CrawlOptions

Type declaration

Optionaladd_chunks_to_dataset?: boolean | null

Optionalallow_external_links?: boolean | null

Optionalbody_remove_strings?: string[] | null

Optionalboost_titles?: boolean | null

Optionalexclude_paths?: string[] | null

Optionalexclude_tags?: string[] | null

Optionalheading_remove_strings?: string[] | null

Optionalignore_sitemap?: boolean | null

Optionalinclude_paths?: string[] | null

Optionalinclude_tags?: string[] | null

Optionalinterval?: CrawlInterval | null

Optionallimit?: number | null

Optionalscrape_options?: ScrapeOptions | null

Optionalsite_url?: string | null

Optionalwebhook_metadata?: unknown

Optionalwebhook_urls?: string[] | null

Settings

`Optional`add_chunks_to_dataset?: boolean | null

`Optional`allow_external_links?: boolean | null

`Optional`body_remove_strings?: string[] | null

`Optional`boost_titles?: boolean | null

`Optional`exclude_paths?: string[] | null

`Optional`exclude_tags?: string[] | null

`Optional`heading_remove_strings?: string[] | null

`Optional`ignore_sitemap?: boolean | null

`Optional`include_paths?: string[] | null

`Optional`include_tags?: string[] | null

`Optional`interval?: CrawlInterval | null

`Optional`limit?: number | null

`Optional`scrape_options?: ScrapeOptions | null

`Optional`site_url?: string | null

`Optional`webhook_metadata?: unknown

`Optional`webhook_urls?: string[] | null