Skip to content

Database Models API Reference

Auto-generated from SQLModel/Pydantic models

Document Models

green_gov_rag.models.document

Normalized document models: sources → files → chunks.

DocumentSource

Bases: SQLModel

Document source from config (e.g., one config entry with multiple PDFs).

Represents a logical document source that may contain multiple files. Maps 1:1 with entries in documents_config.yml.

Example: "National Construction Code (NCC)" is one source with 4 PDF files.

Source code in green_gov_rag/models/document.py
class DocumentSource(SQLModel, table=True):
    """Document source from config (e.g., one config entry with multiple PDFs).

    Represents a logical document source that may contain multiple files.
    Maps 1:1 with entries in documents_config.yml.

    Example: "National Construction Code (NCC)" is one source with 4 PDF files.
    """

    __tablename__ = "document_sources"

    # Primary key
    id: str = Field(primary_key=True, description="Unique source identifier")

    # Basic metadata
    title: str = Field(index=True, description="Source title")
    source_url: str = Field(description="Source website URL (homepage)")

    # Classification fields
    jurisdiction: str = Field(index=True, description="Federal/State/Local")
    topic: str = Field(index=True, description="Document topic/category")
    region: Optional[str] = Field(
        default=None, index=True, description="Geographic region"
    )
    category: Optional[str] = Field(
        default=None, index=True, description="Document category"
    )

    # Additional metadata (stored as JSON)
    metadata_: Optional[dict] = Field(
        default=None,
        sa_column=Column(JSON),
        description="Additional metadata as JSON",
    )

    # ESG-specific metadata
    esg_metadata: Optional[dict] = Field(
        default=None,
        sa_column=Column(JSON),
        description="ESG/emissions metadata (frameworks, scopes, gases, etc.)",
    )

    # Spatial/geographic metadata
    spatial_metadata: Optional[dict] = Field(
        default=None,
        sa_column=Column(JSON),
        description="Spatial metadata (LGA codes, state, spatial scope, etc.)",
    )

    # Processing status
    status: str = Field(
        default="pending",
        index=True,
        description="Processing status: pending/processing/completed/failed",
    )
    error_message: Optional[str] = Field(
        default=None, description="Error message if failed"
    )

    # Timestamps
    created_at: datetime = Field(
        default_factory=lambda: datetime.now(timezone.utc),
        description="Creation timestamp",
    )
    updated_at: datetime = Field(
        default_factory=lambda: datetime.now(timezone.utc),
        description="Last update timestamp",
    )
    processed_at: Optional[datetime] = Field(
        default=None,
        description="Processing completion timestamp",
    )

    # Aggregated stats (across all files)
    file_count: int = Field(default=0, description="Number of files in this source")
    chunk_count: int = Field(default=0, description="Total chunks across all files")
    embedding_model: Optional[str] = Field(
        default=None,
        description="Embedding model used",
    )

    class Config:
        """Model configuration."""

        json_schema_extra = {
            "example": {
                "id": "ncc_2022",
                "title": "National Construction Code (NCC) 2022",
                "source_url": "https://ncc.abcb.gov.au/",
                "jurisdiction": "federal",
                "category": "building",
                "topic": "standards",
                "region": "Australia",
                "status": "completed",
                "file_count": 4,
                "chunk_count": 36170,
            }
        }
Config

Model configuration.

Source code in green_gov_rag/models/document.py
class Config:
    """Model configuration."""

    json_schema_extra = {
        "example": {
            "id": "ncc_2022",
            "title": "National Construction Code (NCC) 2022",
            "source_url": "https://ncc.abcb.gov.au/",
            "jurisdiction": "federal",
            "category": "building",
            "topic": "standards",
            "region": "Australia",
            "status": "completed",
            "file_count": 4,
            "chunk_count": 36170,
        }
    }

DocumentFile

Bases: SQLModel

Individual document file (e.g., one PDF from a source).

Represents a single physical file that was downloaded. Many-to-one relationship with DocumentSource.

Example: "ncc2022-volume-one.pdf" is one file of the NCC source.

Source code in green_gov_rag/models/document.py
class DocumentFile(SQLModel, table=True):
    """Individual document file (e.g., one PDF from a source).

    Represents a single physical file that was downloaded.
    Many-to-one relationship with DocumentSource.

    Example: "ncc2022-volume-one.pdf" is one file of the NCC source.
    """

    __tablename__ = "document_files"

    # Primary key
    id: str = Field(primary_key=True, description="Unique file identifier")

    # Foreign key to source
    source_id: str = Field(
        foreign_key="document_sources.id",
        index=True,
        description="Parent source ID",
    )

    # File information
    filename: str = Field(index=True, description="Original filename")
    file_url: str = Field(description="Direct download URL for this file")
    content_hash: str = Field(
        index=True,
        description="SHA256 hash of file content for change detection",
    )
    file_size_bytes: Optional[int] = Field(
        default=None,
        description="File size in bytes",
    )

    # File-specific metadata
    file_metadata: Optional[dict] = Field(
        default=None,
        sa_column=Column(JSON),
        description="File-specific metadata (page count, format, etc.)",
    )

    # Processing status
    status: str = Field(
        default="pending",
        index=True,
        description="Status: pending/downloading/processing/completed/failed",
    )
    error_message: Optional[str] = Field(
        default=None,
        description="Error message if failed",
    )

    # Timestamps
    discovered_at: datetime = Field(
        default_factory=lambda: datetime.now(timezone.utc),
        description="When this file was discovered",
    )
    downloaded_at: Optional[datetime] = Field(
        default=None,
        description="When file was downloaded",
    )
    processed_at: Optional[datetime] = Field(
        default=None,
        description="When file was processed into chunks",
    )

    # Processing stats
    chunk_count: int = Field(default=0, description="Number of chunks from this file")

    class Config:
        """Model configuration."""

        json_schema_extra = {
            "example": {
                "id": "ncc_2022_vol1",
                "source_id": "ncc_2022",
                "filename": "ncc2022-volume-one.pdf",
                "file_url": "https://ncc.abcb.gov.au/system/files/ncc/ncc2022-volume-one.pdf",
                "content_hash": "a1b2c3d4e5f6...",
                "file_size_bytes": 15728640,
                "status": "completed",
                "chunk_count": 8543,
            }
        }
Config

Model configuration.

Source code in green_gov_rag/models/document.py
class Config:
    """Model configuration."""

    json_schema_extra = {
        "example": {
            "id": "ncc_2022_vol1",
            "source_id": "ncc_2022",
            "filename": "ncc2022-volume-one.pdf",
            "file_url": "https://ncc.abcb.gov.au/system/files/ncc/ncc2022-volume-one.pdf",
            "content_hash": "a1b2c3d4e5f6...",
            "file_size_bytes": 15728640,
            "status": "completed",
            "chunk_count": 8543,
        }
    }

Schema Models

green_gov_rag.api.schemas.query

Query request/response schemas.

SourceDocument

Bases: BaseModel

Source document reference with legal-grade citation metadata.

Enhanced schema following legal RAG best practices (2025) for hierarchical document structure and deep linking.

Source code in green_gov_rag/api/schemas/query.py
class SourceDocument(BaseModel):
    """Source document reference with legal-grade citation metadata.

    Enhanced schema following legal RAG best practices (2025) for
    hierarchical document structure and deep linking.
    """

    # Core document identification
    title: str = Field(..., description="Document title")
    source_url: str = Field(..., description="Document source URL")
    excerpt: Optional[str] = Field(None, description="Relevant excerpt from document")
    relevance_score: Optional[float] = Field(None, description="Similarity score (0-1)")
    file_id: Optional[str] = Field(
        None,
        description="Unique file identifier for citation verification and version tracking",
    )

    # Citation metadata (from hierarchical PDF parsing)
    page_number: Optional[int] = Field(
        None,
        description="Page number where this excerpt appears",
    )
    page_range: Optional[list[int]] = Field(
        None,
        description="Page range if excerpt spans multiple pages [start, end]",
    )
    section_title: Optional[str] = Field(
        None,
        description="Current section title (e.g., 'Market-Based Accounting Methods')",
    )
    section_hierarchy: Optional[list[str]] = Field(
        None,
        description="Full section hierarchy from top-level to current section",
        examples=[
            [
                "Part 3: Scope 2 Emissions",
                "Section 3.2: Calculation Methods",
                "3.2.1 Market-Based Accounting",
            ]
        ],
    )
    clause_reference: Optional[str] = Field(
        None,
        description="Clause or section reference (e.g., 's.3.2.1', 'cl.42')",
    )

    # Deep linking
    deep_link: Optional[str] = Field(
        None,
        description="Deep link to specific section/page in PDF",
        examples=["https://cer.gov.au/document/guideline.pdf#page=42"],
    )

    # Formatted citation
    citation: Optional[str] = Field(
        None,
        description="Formatted citation string for display",
        examples=[
            "Clean Energy Regulator (2024), Scope 2 Guideline, Page 42, Section 3.2.1"
        ],
    )

    # Document metadata for context
    jurisdiction: Optional[str] = Field(
        None,
        description="Jurisdiction level: federal, state, or local",
    )
    category: Optional[str] = Field(
        None,
        description="Document category: environment, planning, legislation, etc.",
    )
    topic: Optional[str] = Field(
        None,
        description="Specific topic: emissions_reporting, biodiversity, etc.",
    )
    region: Optional[str] = Field(
        None,
        description="Geographic region: Australia, New South Wales, City of Adelaide, etc.",
    )

    # ESG metadata (for ESG-specific queries)
    esg_metadata: Optional[dict[str, Any]] = Field(
        None,
        description="ESG-specific metadata (frameworks, scopes, gases, etc.)",
        examples=[
            {
                "frameworks": ["NGER", "ISSB"],
                "emission_scopes": ["scope_2"],
                "greenhouse_gases": ["CO2", "CH4", "N2O"],
                "consolidation_method": "operational_control",
                "regulator": "Clean Energy Regulator",
            }
        ],
    )

    # Spatial metadata (for location-based queries)
    spatial_metadata: Optional[dict[str, Any]] = Field(
        None,
        description="Spatial metadata (LGA codes, state, spatial scope)",
        examples=[
            {
                "spatial_scope": "local",
                "state": "SA",
                "lga_codes": ["40070"],
                "lga_names": ["City of Adelaide"],
            }
        ],
    )

QueryRequest

Bases: BaseModel

Query request schema.

Source code in green_gov_rag/api/schemas/query.py
class QueryRequest(BaseModel):
    """Query request schema."""

    query: str = Field(..., min_length=1, description="User query")
    region: Optional[str] = Field(
        None,
        description="Region filter (state/territory name or abbreviation)",
    )
    lgas: Optional[list[str]] = Field(
        None,
        description="Local Government Area names (takes priority over region)",
    )
    jurisdiction: Optional[str] = Field(None, description="Jurisdiction filter")
    topics: Optional[list[str]] = Field(None, description="Topic filters")
    max_sources: int = Field(5, ge=1, le=20, description="Max source documents")
    include_trust_score: bool = Field(
        False,
        description="Calculate trust score (expensive - requires multiple LLM calls)",
    )
    session_id: Optional[str] = Field(
        None, description="Browser session ID for user-specific query history"
    )

    class Config:
        """Schema config."""

        json_schema_extra = {
            "example": {
                "query": "What are the emissions targets for NSW?",
                "region": "NSW",
                "jurisdiction": "State",
                "topics": ["Climate", "Emissions"],
                "max_sources": 5,
            }
        }
Config

Schema config.

Source code in green_gov_rag/api/schemas/query.py
class Config:
    """Schema config."""

    json_schema_extra = {
        "example": {
            "query": "What are the emissions targets for NSW?",
            "region": "NSW",
            "jurisdiction": "State",
            "topics": ["Climate", "Emissions"],
            "max_sources": 5,
        }
    }

FeedbackRequest

Bases: BaseModel

Feedback submission for a query.

Source code in green_gov_rag/api/schemas/query.py
class FeedbackRequest(BaseModel):
    """Feedback submission for a query."""

    rating: int = Field(
        ..., ge=1, le=5, description="Rating from 1 (poor) to 5 (excellent)"
    )
    feedback_text: Optional[str] = Field(
        None, max_length=1000, description="Optional text feedback"
    )

    class Config:
        """Schema config."""

        json_schema_extra = {
            "example": {
                "rating": 5,
                "feedback_text": "Very helpful answer with accurate citations!",
            }
        }
Config

Schema config.

Source code in green_gov_rag/api/schemas/query.py
class Config:
    """Schema config."""

    json_schema_extra = {
        "example": {
            "rating": 5,
            "feedback_text": "Very helpful answer with accurate citations!",
        }
    }

FeedbackResponse

Bases: BaseModel

Response after submitting feedback.

Source code in green_gov_rag/api/schemas/query.py
class FeedbackResponse(BaseModel):
    """Response after submitting feedback."""

    success: bool
    message: str
    query_id: int

    class Config:
        """Schema config."""

        json_schema_extra = {
            "example": {
                "success": True,
                "message": "Feedback submitted successfully",
                "query_id": 42,
            }
        }
Config

Schema config.

Source code in green_gov_rag/api/schemas/query.py
class Config:
    """Schema config."""

    json_schema_extra = {
        "example": {
            "success": True,
            "message": "Feedback submitted successfully",
            "query_id": 42,
        }
    }

CoverageInfo

Bases: BaseModel

LGA document coverage information.

Source code in green_gov_rag/api/schemas/query.py
class CoverageInfo(BaseModel):
    """LGA document coverage information."""

    selected_lga: Optional[str] = Field(
        None,
        description="Selected LGA name (e.g., 'City of Adelaide')",
    )
    lga_code: Optional[str] = Field(
        None,
        description="LGA code (e.g., '40070')",
    )
    has_local_coverage: bool = Field(
        ...,
        description="Whether local documents exist for this LGA",
    )
    local_doc_count: int = Field(
        0,
        description="Number of local documents available for this LGA",
    )
    coverage_level: str = Field(
        ...,
        description="Coverage level: 'high', 'medium', 'low', or 'none'",
    )
    contribution_url: str = Field(
        ...,
        description="GitHub URL to contribute new document sources",
    )

    class Config:
        """Schema config."""

        json_schema_extra = {
            "example": {
                "selected_lga": "City of Adelaide",
                "lga_code": "40070",
                "has_local_coverage": True,
                "local_doc_count": 15,
                "coverage_level": "high",
                "contribution_url": "https://github.com/sdp5/green-gov-rag/issues/new?template=add-document-source.md",
            }
        }
Config

Schema config.

Source code in green_gov_rag/api/schemas/query.py
class Config:
    """Schema config."""

    json_schema_extra = {
        "example": {
            "selected_lga": "City of Adelaide",
            "lga_code": "40070",
            "has_local_coverage": True,
            "local_doc_count": 15,
            "coverage_level": "high",
            "contribution_url": "https://github.com/sdp5/green-gov-rag/issues/new?template=add-document-source.md",
        }
    }

QueryResponse

Bases: BaseModel

Query response schema.

Source code in green_gov_rag/api/schemas/query.py
class QueryResponse(BaseModel):
    """Query response schema."""

    query: str
    answer: str
    sources: list[SourceDocument]
    filters_applied: dict
    response_time_ms: Optional[float] = None
    query_id: Optional[int] = Field(
        None, description="Query history ID for feedback submission"
    )

    # Document coverage information
    coverage_info: Optional[CoverageInfo] = Field(
        None,
        description="LGA document coverage information and contribution link",
    )

    # Phase 3: Trust & Compliance Features
    trust_score: Optional[float] = Field(
        None,
        description="Overall trust score (0-1) for this response",
        ge=0.0,
        le=1.0,
    )
    trust_confidence: Optional[str] = Field(
        None,
        description="Trust confidence level: high, medium, or low",
    )
    trust_breakdown: Optional[dict[str, Any]] = Field(
        None,
        description="Detailed trust score breakdown: source_relevance (40%), document_currency (25%), source_authority (15%), conflict_check (10%), quote_accuracy (10%)",
    )
    conflicts_detected: Optional[list[dict[str, Any]]] = Field(
        None,
        description="Regulatory conflicts detected between sources",
    )
    hierarchy_explanation: Optional[str] = Field(
        None,
        description="Explanation of regulatory hierarchy (Federal > State > Local)",
    )
    citation_warnings: Optional[list[str]] = Field(
        None,
        description="Warnings about citation quality or currency",
    )

    class Config:
        """Schema config."""

        json_schema_extra = {
            "example": {
                "query": "What are the Scope 2 market-based accounting methods under NGER?",
                "answer": "Under NGER, Scope 2 emissions can be calculated using market-based accounting methods...",
                "sources": [
                    {
                        "title": "Clean Energy Regulator - Scope 2 Emissions Guideline",
                        "source_url": "https://cer.gov.au/document/voluntary-market-based-scope-2-emissions-guideline",
                        "excerpt": "Market-based accounting requires documentation of contractual instruments...",
                        "relevance_score": 0.92,
                        "page_number": 42,
                        "page_range": [42, 43],
                        "section_title": "Market-Based Accounting Methods",
                        "section_hierarchy": [
                            "Part 3: Scope 2 Emissions Accounting",
                            "Section 3.2: Calculation Methods",
                            "3.2.1 Market-Based Accounting",
                        ],
                        "clause_reference": "s.3.2.1",
                        "deep_link": "https://cer.gov.au/document/voluntary-market-based-scope-2-emissions-guideline#page=42",
                        "citation": "Clean Energy Regulator (2024), Scope 2 Emissions Guideline, Page 42, Section 3.2.1",
                        "jurisdiction": "federal",
                        "category": "environment",
                        "topic": "emissions_reporting",
                        "region": "Australia",
                        "esg_metadata": {
                            "frameworks": ["NGER", "ISSB", "GHG_Protocol"],
                            "emission_scopes": ["scope_2"],
                            "greenhouse_gases": [
                                "CO2",
                                "CH4",
                                "N2O",
                                "SF6",
                                "HFCs",
                                "PFCs",
                                "NF3",
                            ],
                            "consolidation_method": "operational_control",
                            "methodology_type": "calculation",
                            "regulator": "Clean Energy Regulator",
                            "reportable_under_nger": True,
                            "accounting_methods": ["location_based", "market_based"],
                        },
                        "spatial_metadata": {
                            "spatial_scope": "federal",
                            "state": None,
                            "lga_codes": [],
                            "applies_to_all_lgas": True,
                        },
                    }
                ],
                "filters_applied": {
                    "frameworks": ["NGER"],
                    "emission_scopes": ["scope_2"],
                },
                "response_time_ms": 1234.56,
            }
        }
Config

Schema config.

Source code in green_gov_rag/api/schemas/query.py
class Config:
    """Schema config."""

    json_schema_extra = {
        "example": {
            "query": "What are the Scope 2 market-based accounting methods under NGER?",
            "answer": "Under NGER, Scope 2 emissions can be calculated using market-based accounting methods...",
            "sources": [
                {
                    "title": "Clean Energy Regulator - Scope 2 Emissions Guideline",
                    "source_url": "https://cer.gov.au/document/voluntary-market-based-scope-2-emissions-guideline",
                    "excerpt": "Market-based accounting requires documentation of contractual instruments...",
                    "relevance_score": 0.92,
                    "page_number": 42,
                    "page_range": [42, 43],
                    "section_title": "Market-Based Accounting Methods",
                    "section_hierarchy": [
                        "Part 3: Scope 2 Emissions Accounting",
                        "Section 3.2: Calculation Methods",
                        "3.2.1 Market-Based Accounting",
                    ],
                    "clause_reference": "s.3.2.1",
                    "deep_link": "https://cer.gov.au/document/voluntary-market-based-scope-2-emissions-guideline#page=42",
                    "citation": "Clean Energy Regulator (2024), Scope 2 Emissions Guideline, Page 42, Section 3.2.1",
                    "jurisdiction": "federal",
                    "category": "environment",
                    "topic": "emissions_reporting",
                    "region": "Australia",
                    "esg_metadata": {
                        "frameworks": ["NGER", "ISSB", "GHG_Protocol"],
                        "emission_scopes": ["scope_2"],
                        "greenhouse_gases": [
                            "CO2",
                            "CH4",
                            "N2O",
                            "SF6",
                            "HFCs",
                            "PFCs",
                            "NF3",
                        ],
                        "consolidation_method": "operational_control",
                        "methodology_type": "calculation",
                        "regulator": "Clean Energy Regulator",
                        "reportable_under_nger": True,
                        "accounting_methods": ["location_based", "market_based"],
                    },
                    "spatial_metadata": {
                        "spatial_scope": "federal",
                        "state": None,
                        "lga_codes": [],
                        "applies_to_all_lgas": True,
                    },
                }
            ],
            "filters_applied": {
                "frameworks": ["NGER"],
                "emission_scopes": ["scope_2"],
            },
            "response_time_ms": 1234.56,
        }
    }

green_gov_rag.api.schemas.document

Document schemas.

DocumentResponse

Bases: BaseModel

Document response schema.

Source code in green_gov_rag/api/schemas/document.py
class DocumentResponse(BaseModel):
    """Document response schema."""

    id: str
    title: str
    source_url: str
    jurisdiction: str
    topic: str
    region: Optional[str] = None
    category: Optional[str] = None
    status: str
    chunk_count: int
    created_at: datetime
    processed_at: Optional[datetime] = None

DocumentsFilter

Bases: BaseModel

Document filter parameters.

Source code in green_gov_rag/api/schemas/document.py
class DocumentsFilter(BaseModel):
    """Document filter parameters."""

    jurisdiction: Optional[str] = None
    topic: Optional[str] = None
    region: Optional[str] = None
    status: Optional[str] = None
    limit: int = Field(50, ge=1, le=500)
    offset: int = Field(0, ge=0)

DocumentListResponse

Bases: BaseModel

Document list response.

Source code in green_gov_rag/api/schemas/document.py
class DocumentListResponse(BaseModel):
    """Document list response."""

    documents: list[DocumentResponse]
    total: int
    limit: int
    offset: int

Types

green_gov_rag.types

Type definitions and Enums for GreenGovRAG.

This module centralizes all type definitions, enums, and constants used throughout the application, replacing hardcoded strings and static dictionaries.

ESGFramework

Bases: str, Enum

ESG reporting frameworks and standards.

Source code in green_gov_rag/types.py
class ESGFramework(str, Enum):
    """ESG reporting frameworks and standards."""

    NGER = "NGER"  # National Greenhouse and Energy Reporting
    ISSB = "ISSB"  # International Sustainability Standards Board
    GHG_PROTOCOL = "GHG_Protocol"  # GHG Protocol Corporate Standard
    GRI = "GRI"  # Global Reporting Initiative
    TCFD = "TCFD"  # Task Force on Climate-related Financial Disclosures
    CDP = "CDP"  # Carbon Disclosure Project
    SAFEGUARD_MECHANISM = "Safeguard_Mechanism"  # Australian Safeguard Mechanism

DocumentCategory

Bases: str, Enum

Document categories.

Source code in green_gov_rag/types.py
class DocumentCategory(str, Enum):
    """Document categories."""

    ENVIRONMENT = "environment"
    PLANNING = "planning"
    LEGISLATION = "legislation"
    REGULATION = "regulation"
    GUIDELINES = "guidelines"
    POLICY = "policy"
    BUILDING = "building"
    HERITAGE = "heritage"

LLMProvider

Bases: str, Enum

Supported LLM providers.

Source code in green_gov_rag/types.py
class LLMProvider(str, Enum):
    """Supported LLM providers."""

    OPENAI = "openai"
    AZURE = "azure"
    BEDROCK = "bedrock"
    ANTHROPIC = "anthropic"

ChunkType

Bases: str, Enum

Standardized chunk types for document elements.

Source code in green_gov_rag/types.py
class ChunkType(str, Enum):
    """Standardized chunk types for document elements."""

    HEADER = "header"  # Section heading/title
    PARAGRAPH = "paragraph"  # Regular text paragraph
    TABLE = "table"  # Table content
    LIST = "list"  # List items
    FOOTER = "footer"  # Footer content