Extract File Endpoint

curl --request POST \
  --url https://api.extract.page/v1/extract/file \
  --header 'Content-Type: multipart/form-data' \
  --header 'X-API-KEY: <api-key>' \
  --form 'file=<string>' \
  --form extract_text=true \
  --form extract_images=true \
  --form ocr=auto

{
  "chunks": [
    {
      "page_no": 123,
      "page_content": "",
      "bbox": [
        123
      ],
      "chunk_type": "text",
      "confidence": 123,
      "image_url": "<string>",
      "image_b64": "<string>",
      "image_mime": "<string>",
      "image_width": 123,
      "image_height": 123,
      "cells": [
        {
          "row": 123,
          "col": 123,
          "text": "",
          "row_span": 1,
          "col_span": 1,
          "bbox": [
            123
          ],
          "confidence": 123,
          "page_no": 123
        }
      ],
      "n_rows": 123,
      "n_cols": 123,
      "merged_from_pages": [
        123
      ],
      "selected": true,
      "selection_label": "<string>",
      "is_handwritten": true
    }
  ]
}

POST

extract

file

Extract File Endpoint

curl --request POST \
  --url https://api.extract.page/v1/extract/file \
  --header 'Content-Type: multipart/form-data' \
  --header 'X-API-KEY: <api-key>' \
  --form 'file=<string>' \
  --form extract_text=true \
  --form extract_images=true \
  --form ocr=auto

{
  "chunks": [
    {
      "page_no": 123,
      "page_content": "",
      "bbox": [
        123
      ],
      "chunk_type": "text",
      "confidence": 123,
      "image_url": "<string>",
      "image_b64": "<string>",
      "image_mime": "<string>",
      "image_width": 123,
      "image_height": 123,
      "cells": [
        {
          "row": 123,
          "col": 123,
          "text": "",
          "row_span": 1,
          "col_span": 1,
          "bbox": [
            123
          ],
          "confidence": 123,
          "page_no": 123
        }
      ],
      "n_rows": 123,
      "n_cols": 123,
      "merged_from_pages": [
        123
      ],
      "selected": true,
      "selection_label": "<string>",
      "is_handwritten": true
    }
  ]
}

Authorizations

X-API-KEY

string

header

required

Body

multipart/form-data

file

string

required

PDF, PPTX, or DOCX document.

extract_text

boolean

default:true

extract_images

boolean

default:true

ocr

enum<string>

default:auto

Deprecated. Accepted for backward compatibility but currently has no effect.

Available options:

auto,

never,

force

Response

Successful Response

chunks

Chunk · object[]

required

Show child attributes

Extract Endpoint Extract Schema Endpoint