StatCan · Jose-Matsuda · Jan 26, 2024 · Jan 26, 2024
@@ -0,0 +1,36 @@
+# Include any files or directories that you don't want to be copied to your
+# container here (e.g., local build artifacts, temporary files, etc.).
+#
+# For more help, visit the .dockerignore file reference guide at
+# https://docs.docker.com/go/build-context-dockerignore/
+
+**/.DS_Store
+**/__pycache__
+**/.venv
+**/.classpath
+**/.dockerignore
+**/.env
+**/.git
+**/.gitignore
+**/.project
+**/.settings
+**/.toolstarget
+**/.vs
+**/.vscode
+**/*.*proj.user
+**/*.dbmdl
+**/*.jfm
+**/bin
+**/charts
+**/docker-compose*
+**/compose*
+**/Dockerfile*
+**/node_modules
+**/npm-debug.log
+**/obj
+**/secrets.dev.yaml
+**/values.dev.yaml
+**/sp2jiracron
+LICENSE
+README.md
+**/sp2jira-cronjob
@@ -0,0 +1,3 @@
+__pycache__/JiraUtils.cpython-311.pyc
+__pycache__/SharepointUtils.cpython-311.pyc
+sp2jiracron/*
@@ -0,0 +1,49 @@
+# syntax=docker/dockerfile:1
+
+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Dockerfile reference guide at
+# https://docs.docker.com/go/dockerfile-reference/
+
+ARG PYTHON_VERSION=3.12
+FROM python:${PYTHON_VERSION}-slim as base
+
+# Prevents Python from writing pyc files.
+ENV PYTHONDONTWRITEBYTECODE=1
+
+# Keeps Python from buffering stdout and stderr to avoid situations where
+# the application crashes without emitting any logs due to buffering.
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /app
+
+# Create a non-privileged user that the app will run under.
+# See https://docs.docker.com/go/dockerfile-user-best-practices/
+ARG UID=10001
+RUN adduser \
+    --disabled-password \
+    --gecos "" \
+    --home "/nonexistent" \
+    --shell "/sbin/nologin" \
+    --no-create-home \
+    --uid "${UID}" \
+    appuser
+
+# Download dependencies as a separate step to take advantage of Docker's caching.
+# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
+# Leverage a bind mount to requirements.txt to avoid having to copy them into
+# into this layer.
+RUN --mount=type=cache,target=/root/.cache/pip \
+    --mount=type=bind,source=requirements.txt,target=requirements.txt \
+    python -m pip install -r requirements.txt
+
+# Switch to the non-privileged user to run the application.
+USER appuser
+
+# Copy the source code into the container.
+COPY . .
+
+# Expose the port that the application listens on.
+EXPOSE 8000
+
+# Run the application.
+CMD python Sp2jira.py
@@ -0,0 +1,31 @@
+from jira import JIRA
+
+class JiraUtils:
+
+    # parameterized constructor
+    def __init__(self, jira_server, jira_auth_token, jira_project, jira_issue_type, jira_assignee, jira_watchers):
+
+        self.jira = JIRA(server=jira_server, token_auth=jira_auth_token)
+        self.jira_project = jira_project
+        self.jira_issue_type = jira_issue_type
+        self.jira_assignee = jira_assignee
+        self.jira_watchers = jira_watchers
+
+    def create_jira_issue_from_form_data(self, issue_summary, issue_desc):
+
+        issue_dict = {
+            'project': {'key': self.jira_project},
+            'summary': issue_summary,
+            # epic name field
+            'customfield_10704': issue_summary,
+            'description': issue_desc,
+            'issuetype': {'name': self.jira_issue_type},
+            'assignee': {'name': self.jira_assignee}
+        }
+
+        new_issue = self.jira.create_issue(fields=issue_dict)
+        # this can't be done as part of issue creation, unfortunately
+        for watcher in self.jira_watchers:
+            self.jira.add_watcher(new_issue, watcher)
+
+        return new_issue
@@ -0,0 +1,22 @@
+### Building and running your application
+
+When you're ready, start your application by running:
+`docker compose up --build`.
+
+Your application will be available at http://localhost:8000.
+
+### Deploying your application to the cloud
+
+First, build your image, e.g.: `docker build -t myapp .`.
+If your cloud uses a different CPU architecture than your development
+machine (e.g., you are on a Mac M1 and your cloud provider is amd64),
+you'll want to build the image for that platform, e.g.:
+`docker build --platform=linux/amd64 -t myapp .`.
+
+Then, push it to your registry, e.g. `docker push myregistry.com/myapp`.
+
+Consult Docker's [getting started](https://docs.docker.com/go/get-started-sharing/)
+docs for more detail on building and pushing.
+
+### References
+* [Docker's Python guide](https://docs.docker.com/language/python/)
@@ -0,0 +1,39 @@
+# sp2jira-cron-container
+- intended to be run as a k8s cronjob.
+- retrieves ms forms submissions from a sharepoint excel file, and creates a jira issue 
+- keeps track of already-processed submissions via a Sharepoint List
+
+# TODO
+- set up dev form / excel, dev list, dev jira project, test running both prod and dev with same container
+- dockerfile / compose.yaml currently set up with defaults for a service, need to research what to change here for a cronjob but it's working
+
+## environment variables
+The following environment variables are used by the script.
+
+### Jira
+| Variable | Mandatory | Description |
+| ----------- | ----------- | ----------- |
+| `JIRA_SERVER` | * | URL of the jira server. e.g. https://jirab.statcan.ca |
+| `JIRA_TOKEN` | * | the token used to authorize the script with the jira server |
+| `JIRA_PROJECT` | * | the Jira project that tickets will be created in |
+| `JIRA_ASSIGNEE` | * | the Jira user that Jira issues will be assigned to |
+| `JIRA_WATCHERS` | * | json list of Jira users that will be added to new Jira issues as watchers. See https://stackoverflow.com/questions/31352317/how-to-pass-a-list-as-an-environment-variable |
+| `JIRA_ISSUE_TYPE`| | the Jira project that tickets will be created in. Default is 'Epic' |
+| `JIRA_ISSUE_SUMMARY`| | text for the issue summary. Defaults to 'DAS Intake Form submission by {0} {1}', where 0 is FNAME and 1 is LNAME |
+| `JIRA_ISSUE_DESC_NO_RESPONSE`| | what to put when an answer hasn't been provided in a submission. Default is 'No Response' |
+
+### Sharepoint
+| Variable | Mandatory | Description |
+| ----------- | ----------- | ----------- |
+| `SHAREPOINT_CLIENT_ID` | * | the id used to authorize the script with the sharepoint site |
+| `SHAREPOINT_CLIENT_SECRET` | * | the secret used to authorize the script with the sharepoint site |
+| `SHAREPOINT_SITE_URL` | * | URL of the sharepoint site |
+| `SHAREPOINT_FILE_URL` | * | Path to the .xslx file in sharepoint. |
+| `SHAREPOINT_LIST_TITLE`| * | Name of the processed id list in sharepoint. |
+| `SHAREPOINT_SHEET_NAME` |  | Name of the excel sheet used. Default is 'Form1' |
+| `SHAREPOINT_ID_COLUMN` |  | Name of the excel sheet used. Default is '0' |
+| `SHAREPOINT_FNAME_COLUMN` |  | Name of the column containing First Name. Default is 'First name' |
+| `SHAREPOINT_LNAME_COLUMN` |  | Name of the column containing Last Name. Default is 'Last name' |
+| `SHAREPOINT_LIST_COLUMN`|  | Name of the list column in sharepoint containing processed ID data. Default is 'Title' |
+| `SHAREPOINT_LIST_MAX_RETURN`|  | Maximum number of list items to fetch. Default is '5000' |
+
@@ -0,0 +1,63 @@
+from office365.runtime.auth.client_credential import ClientCredential
+from office365.sharepoint.client_context import ClientContext
+from office365.sharepoint.files.file import File
+import io
+import pandas as pd
+
+class SharepointUtils:    
+
+    # creates the sharepoint connection
+    def __init__(self, client_id, client_secret, site_url, file_url, sheet_name, list_title, list_column, list_max_return):
+
+        client_creds = ClientCredential(client_id, client_secret)
+        self.ctx = ClientContext(site_url).with_credentials(client_creds)
+        self.file_url = file_url
+        self.sheet_name = sheet_name
+        self.list_title = list_title
+        self.list_column = list_column
+        self.list_max_return = list_max_return
+
+
+    def get_intake_form_data_as_dataframe(self):
+
+        # connect to sharepoint and get the xslx file
+        response = File.open_binary(self.ctx, self.file_url)
+
+        # save data to BytesIO stream
+        bytes_file_obj = io.BytesIO()
+        bytes_file_obj.write(response.content)
+        bytes_file_obj.seek(0) #set file object to start
+
+        # read excel file and each sheet into pandas dataframe 
+        df = pd.read_excel(bytes_file_obj, self.sheet_name)
+        # drop empty rows. inplace=True modifies the existing dataframe instead of returning a new one.
+        df.dropna(inplace=True, subset=['ID'])
+        # remove the already processed ids from the dataframe
+        processed_ids = self.get_processed_id_list()
+        df = df[df.ID.isin(processed_ids) == False]
+
+        return df
+
+    def get_processed_id_list(self):
+
+        # connect to sharepoint and get the list of IDs
+        raw_list = self.ctx.web.lists.get_by_title(self.list_title)
+        id_list = raw_list.items.get().select([self.list_column]).top(self.list_max_return).execute_query()
+
+        print("Total number of processed applications before this run: {0}".format(len(id_list)))
+        processed_id_list = []
+
+        for index, item in enumerate(id_list):  # type: int, ListItem
+            application_id = float(item.properties[self.list_column]) #convert to float to match dataframe
+            processed_id_list.append(application_id)
+
+        return processed_id_list
+
+    def add_processed_id_to_list(self, new_id):
+
+        raw_list = self.ctx.web.lists.get_by_title(self.list_title)
+        new_list_item_properties = {
+            self.list_column: str(new_id) #need to convert back to string because sharepoint wants it that way
+        }
+        new_item = raw_list.add_item(new_list_item_properties).execute_query()
+        return new_item
@@ -0,0 +1,70 @@
+import os
+import json
+import pandas as pd
+from JiraUtils import JiraUtils
+from SharepointUtils import SharepointUtils
+
+# initialize JIRA variables and helper class. see README for details.
+# mandatories
+jira_server = os.environ["JIRA_SERVER"] #"https://jirab.statcan.ca"
+jira_auth_token = os.environ["JIRA_TOKEN"]
+jira_project = os.environ["JIRA_PROJECT"] #"DASBOP"
+jira_assignee = os.environ["JIRA_ASSIGNEE"] #"luodan"
+jira_watchers = json.loads(os.environ['JIRA_WATCHERS']) #["zimmshe", "bonedan", "coutann"] https://stackoverflow.com/questions/31352317/how-to-pass-a-list-as-an-environment-variable 
+#optionals
+jira_issue_type = os.environ.get('JIRA_ISSUE_TYPE', "Epic")
+jira_issue_summary = os.environ.get('JIRA_ISSUE_SUMMARY', "DAS Intake Form submission by {0} {1}")
+jira_desc_no_response = os.environ.get('JIRA_ISSUE_DESC_NO_RESPONSE', "No Response")
+
+jira = JiraUtils(jira_server, jira_auth_token, jira_project, jira_issue_type, jira_assignee, jira_watchers)
+
+# initialize sharepoint variables and helper class. see README for details.
+#mandatories
+client_id = os.environ['SHAREPOINT_CLIENT_ID']
+client_secret = os.environ['SHAREPOINT_CLIENT_SECRET']
+site_url = os.environ['SHAREPOINT_SITE_URL'] #"https://054gc.sharepoint.com/sites/DAaaSD-AllStaff-DADS-Touslesemployes"
+file_url = os.environ['SHAREPOINT_FILE_URL'] #"/sites/DAaaSD-AllStaff-DADS-Touslesemployes/Shared%20Documents/CSU%20-%20UCS/DAaaS%20Intake%20Form/Data%20Analytics%20Services%20(DAS)%20-%20Get%20started%201.xlsx"
+list_title = os.environ['SHAREPOINT_LIST_TITLE'] #"Intake_form_processed_ids"
+#optionals
+sheet_name = os.environ.get('SHAREPOINT_SHEET_NAME', "Form1")
+ID_COL = os.environ.get('SHAREPOINT_ID_COLUMN', 0)
+FNAME_COL = os.environ.get('SHAREPOINT_FNAME_COLUMN', "First name")
+LNAME_COL = os.environ.get('SHAREPOINT_LNAME_COLUMN', "Last name")
+list_column = os.environ.get('SHAREPOINT_LIST_COLUMN', "Title" )
+list_max_return = os.environ.get('SHAREPOINT_LIST_MAX_RETURN', 5000) #if we ever get more applications than this we'll have to adjust it
+
+sputils = SharepointUtils(client_id, client_secret, site_url, file_url, sheet_name, list_title, list_column, list_max_return)
+
+
+# get the form data from sharepoint
+df = sputils.get_intake_form_data_as_dataframe()
+
+## go through each row and create a JIRA issue, saving processed IDs to the sharepoint list so we don't create them again later
+issue_count = 0
+for index, row in df.iterrows():
+
+    current_id = row[ID_COL]
+    issue_desc = ""
+    issue_summary = jira_issue_summary.format(row[FNAME_COL], row[LNAME_COL])
+
+    for rowindex, rowval in row.items():
+        issue_desc += f"{rowindex} : \n"
+        if pd.isna(rowval):
+            issue_desc += f"*{jira_desc_no_response}*\n\n"
+        else:
+            issue_desc += f"*{rowval}*\n\n"
+
+    print(f"JIRA issue to be created from row id: {current_id}")
+    print(f"Summary: {issue_summary}")
+    #print(issue_desc) #left for debug
+
+    try:
+        new_issue = jira.create_jira_issue_from_form_data(issue_summary, issue_desc)
+    except:
+        print(f"Error creating JIRA issue from ID {current_id}")
+    else:
+        sputils.add_processed_id_to_list(current_id)
+        issue_count += 1
+        print(new_issue)
+
+print(f"Process completed. {issue_count} issues created.")
@@ -0,0 +1,35 @@
+# Comments are provided throughout this file to help you get started.
+# If you need more help, visit the Docker compose reference guide at
+# https://docs.docker.com/go/compose-spec-reference/
+
+# Here the instructions define your application as a service called "server".
+# This service is built from the Dockerfile in the current directory.
+# You can add other services your application may depend on here, such as a
+# database or a cache. For examples, see the Awesome Compose repository:
+# https://github.com/docker/awesome-compose
+services:
+  server:
+    build:
+      context: .
+    ports:
+      - 8000:8000
+    environment:
+      - JIRA_SERVER
+      - JIRA_TOKEN
+      - JIRA_PROJECT
+      - JIRA_ASSIGNEE
+      - JIRA_WATCHERS
+      - JIRA_ISSUE_TYPE
+      - JIRA_ISSUE_SUMMARY
+      - JIRA_ISSUE_DESC_NO_RESPONSE
+      - SHAREPOINT_CLIENT_ID
+      - SHAREPOINT_CLIENT_SECRET
+      - SHAREPOINT_SITE_URL
+      - SHAREPOINT_FILE_URL
+      - SHAREPOINT_LIST_TITLE
+      - SHAREPOINT_SHEET_NAME
+      - SHAREPOINT_ID_COLUMN
+      - SHAREPOINT_FNAME_COLUMN
+      - SHAREPOINT_LNAME_COLUMN
+      - SHAREPOINT_LIST_COLUMN
+      - SHAREPOINT_LIST_MAX_RETURN
@@ -0,0 +1,26 @@
+certifi==2023.11.17
+cffi==1.16.0
+charset-normalizer==3.3.2
+cryptography==41.0.7
+defusedxml==0.7.1
+et-xmlfile==1.1.0
+idna==3.6
+jira==3.5.2
+msal==1.26.0
+numpy==1.26.3
+oauthlib==3.2.2
+Office365-REST-Python-Client==2.5.4
+openpyxl==3.1.2
+packaging==23.2
+pandas==2.1.4
+pycparser==2.21
+PyJWT==2.8.0
+python-dateutil==2.8.2
+pytz==2023.3.post1
+requests==2.31.0
+requests-oauthlib==1.3.1
+requests-toolbelt==1.0.0
+six==1.16.0
+typing_extensions==4.9.0
+tzdata==2023.4
+urllib3==2.1.0