Skip to content

PandasAI Integration

Ingeration with PandasAI

With the help of langchain integration, we can integrate PandasAI with our LLM and ask questions that are based on a dataset such as asking questions related to data in a csv.

Below is a simple example of how to integrate PandasAI with our LLM.

Note: In order to install PandasAI refer the PandasAI docs

Replace <api_key> with the actual key.

# Import necessary libraries
import pandas as pd
import requests
from pandasai import SmartDataframe
from pandasai.connectors.pandas import PandasConnector
from langchain_core.language_models.llms import LLM
from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from typing import Any

# Set API key
api_key = "<api_key>"

# Create a custom LLM class, inherited from LLM
class CustomSDSCLLM(LLM):
    api_key: str
    model: str = "llama3-sdsc"
    base_url: str = "https://sdsc-llm-openwebui.nrp-nautilus.io/api/chat/completions"
    @property
    def _llm_type(self) -> str:
        return "custom-sdsc-llm"
    def _call(
        self,
        prompt: str,
        stop: list[str] = None,
        run_manager: "CallbackManagerForLLMRun" = None,
        **kwargs: Any,
    ) -> str:
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }
        payload = {
            "model": self.model,
            "messages": [
                {
                    "role": "system",
                    "content": "You are a helpful assistant."
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            "stream": False
        }

        response = requests.post(self.base_url, headers=headers, json=payload)

        if response.status_code == 200:
            response_data = response.json()
            return response_data['choices'][0]['message']['content'].strip()
        else:
            raise Exception(f"Request failed with status {response.status_code}: {response.text}")

# Create dummy data
data = {
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 24, 35, 32],
    'Country': ['USA', 'UK', 'Australia', 'Germany']
}

# Create a DataFrame
# df = pd.read_csv('data.csv')  # Uncomment this line to load data from a CSV file
df = pd.DataFrame(data)

# Create a custom LLM instance
llm = CustomSDSCLLM(api_key=api_key)

# Create a Pandas connector
# with open('description.json') as f:  # Uncomment this line to load a description from a JSON file
#     desc = json.load(f)
desc = {
    'Name': 'The name of the person.',
    'Age': 'The age of the person.',
    'Country': 'The country of the person.'
}
connector = PandasConnector(config={'original_df':df}, field_descriptions=desc)

# Create a SmartDataframe
sdf = SmartDataframe(connector,
                     config={
                         "llm": llm,
                     }
                    )

# Ask a question using the chat method
print(sdf.chat('How many rows are there?'))
print(sdf.chat('What are the unique countries?'))
print(sdf.chat('How many people are from the USA?'))