> ## Documentation Index
> Fetch the complete documentation index at: https://docs.extract.page/llms.txt
> Use this file to discover all available pages before exploring further.

# Extract Endpoint



## OpenAPI

````yaml /openapi.json post /v1/extract
openapi: 3.1.0
info:
  title: extract
  summary: Parse documents into structured data. Text, tables, and figures in one call.
  version: 0.1.0
servers:
  - url: https://api.extract.page
    description: production
security:
  - APIKeyHeader: []
paths:
  /v1/extract:
    post:
      tags:
        - v1
      summary: Extract Endpoint
      operationId: extract_v1
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ExtractRequest'
        required: true
      responses:
        '200':
          description: Successful Response
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ExtractResponse'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      security:
        - APIKeyHeader: []
components:
  schemas:
    ExtractRequest:
      properties:
        url:
          anyOf:
            - type: string
            - type: 'null'
          title: Url
        extract_text:
          type: boolean
          title: Extract Text
          default: true
        extract_images:
          type: boolean
          title: Extract Images
          default: true
        ocr:
          type: string
          enum:
            - auto
            - never
            - force
          title: Ocr
          description: >-
            Deprecated. Accepted for backward compatibility but currently has no
            effect.
          default: auto
      type: object
      title: ExtractRequest
      description: >-
        Input to the extraction pipeline.


        Server-side guardrails (page limit, max size, image/OCR thresholds) are

        intentionally not user-configurable. Unknown fields are accepted and
        ignored

        for backward compatibility (e.g. a legacy ``granularity``). The ``ocr``
        knob is

        deprecated and currently has no effect.
    ExtractResponse:
      properties:
        chunks:
          items:
            $ref: '#/components/schemas/Chunk'
          type: array
          title: Chunks
      type: object
      required:
        - chunks
      title: ExtractResponse
    HTTPValidationError:
      properties:
        detail:
          items:
            $ref: '#/components/schemas/ValidationError'
          type: array
          title: Detail
      type: object
      title: HTTPValidationError
    Chunk:
      properties:
        page_content:
          type: string
          title: Page Content
          default: ''
        page_no:
          type: integer
          title: Page No
        bbox:
          anyOf:
            - items:
                type: number
              type: array
            - type: 'null'
          title: Bbox
          description: '[x0, y0, x1, y1] in PDF user-space points.'
        chunk_type:
          $ref: '#/components/schemas/ChunkType'
          default: text
        confidence:
          anyOf:
            - type: number
            - type: 'null'
          title: Confidence
        image_url:
          anyOf:
            - type: string
            - type: 'null'
          title: Image Url
        image_b64:
          anyOf:
            - type: string
            - type: 'null'
          title: Image B64
        image_mime:
          anyOf:
            - type: string
            - type: 'null'
          title: Image Mime
        image_width:
          anyOf:
            - type: integer
            - type: 'null'
          title: Image Width
        image_height:
          anyOf:
            - type: integer
            - type: 'null'
          title: Image Height
        cells:
          anyOf:
            - items:
                $ref: '#/components/schemas/TableCell'
              type: array
            - type: 'null'
          title: Cells
        n_rows:
          anyOf:
            - type: integer
            - type: 'null'
          title: N Rows
        n_cols:
          anyOf:
            - type: integer
            - type: 'null'
          title: N Cols
        merged_from_pages:
          anyOf:
            - items:
                type: integer
              type: array
            - type: 'null'
          title: Merged From Pages
          description: >-
            Pages this chunk was assembled from when document-level assembly
            merged a table spanning a page boundary (1-based, ascending).
            page_no/bbox refer to the first fragment; each cell's page_no
            carries its own source page. None for unmerged chunks.
        selected:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Selected
          description: >-
            Checkbox / selection state: True (checked/ticked/filled), False
            (unchecked/empty), or None (control present but state illegible —
            NOT guessed). Only set on SELECTION chunks.
        selection_label:
          anyOf:
            - type: string
            - type: 'null'
          title: Selection Label
          description: >-
            The option text next to a selection control (e.g. 'Yes', 'I
            consent'). Empty string when the control has no adjacent label. Only
            set on SELECTION chunks.
        is_handwritten:
          anyOf:
            - type: boolean
            - type: 'null'
          title: Is Handwritten
          description: >-
            True when the mark/value rode the handwriting path (a pen tick or
            hand-written entry) rather than typed glyphs. Only set on SELECTION
            chunks.
      type: object
      required:
        - page_no
      title: Chunk
      description: A single extracted element — either a text span or an image figure.
    ValidationError:
      properties:
        loc:
          items:
            anyOf:
              - type: string
              - type: integer
          type: array
          title: Location
        msg:
          type: string
          title: Message
        type:
          type: string
          title: Error Type
        input:
          title: Input
        ctx:
          type: object
          title: Context
      type: object
      required:
        - loc
        - msg
        - type
      title: ValidationError
    ChunkType:
      type: string
      enum:
        - text
        - image
        - table
        - selection
      title: ChunkType
    TableCell:
      properties:
        text:
          type: string
          title: Text
          default: ''
        row:
          type: integer
          title: Row
        col:
          type: integer
          title: Col
        row_span:
          type: integer
          title: Row Span
          default: 1
        col_span:
          type: integer
          title: Col Span
          default: 1
        bbox:
          anyOf:
            - items:
                type: number
              type: array
            - type: 'null'
          title: Bbox
        confidence:
          anyOf:
            - type: number
            - type: 'null'
          title: Confidence
        page_no:
          anyOf:
            - type: integer
            - type: 'null'
          title: Page No
          description: >-
            Source page of this cell when the table was assembled across pages
            (see Chunk.merged_from_pages). None for single-page tables.
      type: object
      required:
        - row
        - col
      title: TableCell
      description: One cell of a table chunk. Indices are 0-based.
  securitySchemes:
    APIKeyHeader:
      type: apiKey
      in: header
      name: X-API-KEY

````