Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
FROM mcr.microsoft.com/devcontainers/python:1-3.9-bookworm

ARG SPARK_VERSION=3.5.5
ARG HADOOP_VERSION=3.4.1
ARG OPENJDK_VERSION=17

ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/UTC
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8

# install dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
alien \
build-essential \
curl \
git \
procps \
libffi-dev \
libpq-dev \
libssl-dev \
lsb-release \
odbcinst \
openjdk-${OPENJDK_VERSION}-jre-headless \
unixodbc-dev

# Install Microsoft ODBC driver 18 for SQL Server https://learn.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server
RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
echo "deb [arch=$(dpkg --print-architecture)] https://packages.microsoft.com/debian/12/prod $(lsb_release -cs) main" > /etc/apt/sources.list.d/microsoft-prod.list && \
apt-get update && \
ACCEPT_EULA=Y apt-get install -y libsasl2-dev odbcinst mssql-tools18 msodbcsql18 unixodbc-dev

# Dremio support
RUN curl -L https://download.dremio.com/arrow-flight-sql-odbc-driver/arrow-flight-sql-odbc-driver-LATEST.x86_64.rpm -o arrow-driver.rpm && \
alien -iv --scripts arrow-driver.rpm

# Spark support
ENV SPARK_HOME=/usr/local/spark

RUN HADOOP_MAJOR_VERSION=`echo ${HADOOP_VERSION} | cut -d. -f1` && \
wget -qO "spark.tgz" "https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_MAJOR_VERSION}.tgz"; \
tar xzf "spark.tgz" -C /usr/local --owner root --group root --no-same-owner && \
rm "spark.tgz" && \
ln -s "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_MAJOR_VERSION}" "${SPARK_HOME}"

# Install native Hadoop libraries
RUN wget -qO "hadoop.tgz" "https://www.apache.org/dist/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz"; \
tar xzf "hadoop.tgz" -C /opt/ --owner root --group root --no-same-owner && \
rm "hadoop.tgz" && \
ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop && \
mkdir /opt/hadoop-$HADOOP_VERSION/logs

# Configure Spark and Hadoop
ENV SPARK_OPTS="--driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info"
ENV HADOOP_HOME="/opt/hadoop-${HADOOP_VERSION}"
ENV HADOOP_CONF_DIR=/etc/hadoop
ENV MULTIHOMED_NETWORK=1
ENV HADOOP_COMMON_LIB_NATIVE_DIR="${HADOOP_HOME}/lib/native"
ENV HADOOP_OPTS="${HADOOP_OPTS} -Djava.library.path=${HADOOP_HOME}/lib/native"
ENV LD_LIBRARY_PATH="${HADOOP_HOME}/lib/native"

ENV PATH="${PATH}:${SPARK_HOME}/bin:${HADOOP_HOME}/bin/"

# Run sleep infinity so that the dev container stays alive
CMD [ "sleep", "infinity" ]
52 changes: 52 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{
"name": "soda-core",
"dockerComposeFile": [
"docker-compose.yml",
"../soda/postgres/docker-compose.yml",
"../soda/spark/tests/docker/docker-compose.yml",
"../soda/vertica/docker-compose.yml"
],
"service": "devcontainer",
"features": {
"docker-in-docker": {
"version": "latest"
}
},
"postStartCommand": "./.devcontainer/initialize.sh",
"workspaceFolder": "/workspaces/${localWorkspaceFolderBasename}",
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-python.mypy-type-checker",
"ms-python.black-formatter",
"ms-python.isort",
"ms-azuretools.vscode-docker"
],
"settings": {
"editor.formatOnSave": true,
"editor.formatOnType": true,
"files.exclude": {
".mypy_cache": true,
".venv": true,
".pytest_cache": true,
"**/*.egg-info": true,
"**/__pycache__": true,
"soda/*/build": true
},
"python.analysis.enablePytestSupport": true,
"python.defaultInterpreterPath": ".venv/bin/python",
"python.testing.pytestEnabled": true,
"python.testing.pytestPath": "soda/core/tests/",
"python.languageServer": "Pylance",
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.codeActionsOnSave": {
"source.organizeImports": "always",
"source.fixAll": "always"
}
}
}
}
}
}
8 changes: 8 additions & 0 deletions .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
services:
devcontainer:
build:
context: .
dockerfile: Dockerfile
environment:
POSTGRES_HOST: soda-sql-postgres
test_data_source: postgres
13 changes: 13 additions & 0 deletions .devcontainer/initialize.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -x

# Create the virtual environment if it does not already exist
if [ ! -d .venv ]; then
scripts/recreate_venv.sh
fi

# Activate the virtual environment
source .venv/bin/activate

# Setup pre-commit
pre-commit install
2 changes: 1 addition & 1 deletion soda/postgres/tests/postgres_data_source_fixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def _build_configuration_dict(self, schema_name: str | None = None) -> dict:
return {
"data_source postgres": {
"type": "postgres",
"host": "localhost",
"host": os.getenv("POSTGRES_HOST", "localhost"),
"username": os.getenv("POSTGRES_USERNAME", "sodasql"),
"password": os.getenv("POSTGRES_PASSWORD"),
"database": os.getenv("POSTGRES_DATABASE", "sodasql"),
Expand Down
Loading