diff --git a/.gitignore b/.gitignore index 274165ed8..1c1b83789 100644 --- a/.gitignore +++ b/.gitignore @@ -189,4 +189,7 @@ cache *.iml # MacOS Finder -**/.DS_Store \ No newline at end of file +**/.DS_Store + +#VS Code files +.vscode/ \ No newline at end of file diff --git a/benchmarks/000.microbenchmarks/010.sleep/config.json b/benchmarks/000.microbenchmarks/010.sleep/config.json index 93ce2f561..53f6349d6 100644 --- a/benchmarks/000.microbenchmarks/010.sleep/config.json +++ b/benchmarks/000.microbenchmarks/010.sleep/config.json @@ -1,6 +1,6 @@ { "timeout": 120, "memory": 128, - "languages": ["python", "nodejs"], + "languages": ["python", "nodejs", "pypy"], "modules": [] } diff --git a/benchmarks/000.microbenchmarks/010.sleep/pypy/function.py b/benchmarks/000.microbenchmarks/010.sleep/pypy/function.py new file mode 100644 index 000000000..7dda59a57 --- /dev/null +++ b/benchmarks/000.microbenchmarks/010.sleep/pypy/function.py @@ -0,0 +1,9 @@ + +from time import sleep + +def handler(event): + + # start timing + sleep_time = event.get('sleep') + sleep(sleep_time) + return { 'result': sleep_time } diff --git a/benchmarks/100.webapps/110.dynamic-html/config.json b/benchmarks/100.webapps/110.dynamic-html/config.json index 25254c247..da7765197 100644 --- a/benchmarks/100.webapps/110.dynamic-html/config.json +++ b/benchmarks/100.webapps/110.dynamic-html/config.json @@ -1,6 +1,6 @@ { "timeout": 10, "memory": 128, - "languages": ["python", "nodejs"], + "languages": ["python", "nodejs", "pypy"], "modules": [] } diff --git a/benchmarks/100.webapps/110.dynamic-html/pypy/function.py b/benchmarks/100.webapps/110.dynamic-html/pypy/function.py new file mode 100644 index 000000000..7c990f4eb --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/pypy/function.py @@ -0,0 +1,22 @@ +from datetime import datetime +from random import sample +from os import path +from time import time +import os + +from jinja2 import Template + +SCRIPT_DIR = path.abspath(path.join(path.dirname(__file__))) + +def handler(event): + + # start timing + name = event.get('username') + size = event.get('random_len') + cur_time = datetime.now() + random_numbers = sample(range(0, 1000000), size) + template = Template( open(path.join(SCRIPT_DIR, 'templates', 'template.html'), 'r').read()) + html = template.render(username = name, cur_time = cur_time, random_numbers = random_numbers) + # end timing + # dump stats + return {'result': html} diff --git a/benchmarks/100.webapps/110.dynamic-html/pypy/init.sh b/benchmarks/100.webapps/110.dynamic-html/pypy/init.sh new file mode 100755 index 000000000..7b047bff1 --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/pypy/init.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +DIR=$1 +VERBOSE=$2 +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +path="${SCRIPT_DIR}/templates/" +if [ "$VERBOSE" = true ]; then + echo "Update ${DIR} with static templates ${path}" +fi +cp -r ${SCRIPT_DIR}/templates ${DIR} diff --git a/benchmarks/100.webapps/110.dynamic-html/pypy/requirements.txt b/benchmarks/100.webapps/110.dynamic-html/pypy/requirements.txt new file mode 100644 index 000000000..5ca569440 --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/pypy/requirements.txt @@ -0,0 +1 @@ +jinja2>=2.10.3 diff --git a/benchmarks/100.webapps/110.dynamic-html/pypy/templates/template.html b/benchmarks/100.webapps/110.dynamic-html/pypy/templates/template.html new file mode 100644 index 000000000..d4a11f019 --- /dev/null +++ b/benchmarks/100.webapps/110.dynamic-html/pypy/templates/template.html @@ -0,0 +1,26 @@ + + + + Randomly generated data. + + + + + +
+

Welcome {{username}}!

+

Data generated at: {{cur_time}}!

+

Requested random numbers:

+ +
+ + diff --git a/benchmarks/100.webapps/120.uploader/config.json b/benchmarks/100.webapps/120.uploader/config.json index cbc635670..6ab2c4e94 100644 --- a/benchmarks/100.webapps/120.uploader/config.json +++ b/benchmarks/100.webapps/120.uploader/config.json @@ -1,6 +1,12 @@ { "timeout": 30, "memory": 128, - "languages": ["python", "nodejs"], - "modules": ["storage"] -} + "languages": [ + "python", + "nodejs", + "pypy" + ], + "modules": [ + "storage" + ] +} \ No newline at end of file diff --git a/benchmarks/100.webapps/120.uploader/pypy/function.py b/benchmarks/100.webapps/120.uploader/pypy/function.py new file mode 100755 index 000000000..d032bbdb6 --- /dev/null +++ b/benchmarks/100.webapps/120.uploader/pypy/function.py @@ -0,0 +1,48 @@ + +import datetime +import os + +import urllib.request + +from . import storage +client = storage.storage.get_instance() + +SEBS_USER_AGENT = "SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2" + +def handler(event): + + bucket = event.get('bucket').get('bucket') + output_prefix = event.get('bucket').get('output') + url = event.get('object').get('url') + name = os.path.basename(url) + download_path = '/tmp/{}'.format(name) + + process_begin = datetime.datetime.now() + req = urllib.request.Request(url) + req.add_header('User-Agent', SEBS_USER_AGENT) + with open(download_path, 'wb') as f: + with urllib.request.urlopen(req) as response: + f.write(response.read()) + size = os.path.getsize(download_path) + process_end = datetime.datetime.now() + + upload_begin = datetime.datetime.now() + key_name = client.upload(bucket, os.path.join(output_prefix, name), download_path) + upload_end = datetime.datetime.now() + + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + return { + 'result': { + 'bucket': bucket, + 'url': url, + 'key': key_name + }, + 'measurement': { + 'download_time': 0, + 'download_size': 0, + 'upload_time': upload_time, + 'upload_size': size, + 'compute_time': process_time + } + } diff --git a/benchmarks/100.webapps/120.uploader/pypy/requirements.txt b/benchmarks/100.webapps/120.uploader/pypy/requirements.txt new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/200.multimedia/210.thumbnailer/config.json b/benchmarks/200.multimedia/210.thumbnailer/config.json index 8edb99e52..676180d49 100644 --- a/benchmarks/200.multimedia/210.thumbnailer/config.json +++ b/benchmarks/200.multimedia/210.thumbnailer/config.json @@ -1,6 +1,12 @@ { "timeout": 60, "memory": 256, - "languages": ["python", "nodejs"], - "modules": ["storage"] -} + "languages": [ + "python", + "nodejs", + "pypy" + ], + "modules": [ + "storage" + ] +} \ No newline at end of file diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/README.md b/benchmarks/200.multimedia/210.thumbnailer/pypy/README.md new file mode 100755 index 000000000..fc6a75265 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/README.md @@ -0,0 +1,12 @@ +# Image Processing + +A simple pipeline performing basic image operations with Pillow. + +[Inspired by AWS Lambda tutorial code.](https://docs.aws.amazon.com/lambda/latest/dg/with-s3-example-deployment-pkg.htm) + +### Instructions + +1. Deploy Docker container with function code and input data. + +2. Example of JSON payload: `{ "dir": "input_data", "id": "1" }'`. + diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/function.py b/benchmarks/200.multimedia/210.thumbnailer/pypy/function.py new file mode 100755 index 000000000..20527067b --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/function.py @@ -0,0 +1,70 @@ +import datetime +import io +import os +import sys +import uuid +from urllib.parse import unquote_plus +from PIL import Image + +from . import storage +client = storage.storage.get_instance() + +# Disk-based solution +#def resize_image(image_path, resized_path, w, h): +# with Image.open(image_path) as image: +# image.thumbnail((w,h)) +# image.save(resized_path) + +# Memory-based solution +def resize_image(image_bytes, w, h): + with Image.open(io.BytesIO(image_bytes)) as image: + image.thumbnail((w,h)) + out = io.BytesIO() + image.save(out, format='jpeg') + # necessary to rewind to the beginning of the buffer + out.seek(0) + return out + +def handler(event): + + bucket = event.get('bucket').get('bucket') + input_prefix = event.get('bucket').get('input') + output_prefix = event.get('bucket').get('output') + key = unquote_plus(event.get('object').get('key')) + width = event.get('object').get('width') + height = event.get('object').get('height') + # UUID to handle multiple calls + #download_path = '/tmp/{}-{}'.format(uuid.uuid4(), key) + #upload_path = '/tmp/resized-{}'.format(key) + #client.download(input_bucket, key, download_path) + #resize_image(download_path, upload_path, width, height) + #client.upload(output_bucket, key, upload_path) + download_begin = datetime.datetime.now() + img = client.download_stream(bucket, os.path.join(input_prefix, key)) + download_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + resized = resize_image(img, width, height) + resized_size = resized.getbuffer().nbytes + process_end = datetime.datetime.now() + + upload_begin = datetime.datetime.now() + key_name = client.upload_stream(bucket, os.path.join(output_prefix, key), resized) + upload_end = datetime.datetime.now() + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + return { + 'result': { + 'bucket': bucket, + 'key': key_name + }, + 'measurement': { + 'download_time': download_time, + 'download_size': len(img), + 'upload_time': upload_time, + 'upload_size': resized_size, + 'compute_time': process_time + } + } diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.10 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.10 new file mode 100644 index 000000000..9caa46c8d --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.10 @@ -0,0 +1 @@ +pillow==10.3.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.11 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.11 new file mode 100644 index 000000000..9caa46c8d --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.11 @@ -0,0 +1 @@ +pillow==10.3.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.12 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.12 new file mode 100644 index 000000000..9caa46c8d --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.12 @@ -0,0 +1 @@ +pillow==10.3.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.6 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.6 new file mode 100755 index 000000000..118ca689e --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.6 @@ -0,0 +1 @@ +Pillow==7.0.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.7 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.7 new file mode 100755 index 000000000..91d1b3192 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.7 @@ -0,0 +1 @@ +Pillow==8.0.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.8 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.8 new file mode 100755 index 000000000..8da721c23 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.8 @@ -0,0 +1 @@ +Pillow==9.0.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.9 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.9 new file mode 100755 index 000000000..8da721c23 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.3.9 @@ -0,0 +1 @@ +Pillow==9.0.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.8 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.8 new file mode 100644 index 000000000..68ac1eb37 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.8 @@ -0,0 +1 @@ +Pillow==10.0.0 diff --git a/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.9 b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.9 new file mode 100644 index 000000000..68ac1eb37 --- /dev/null +++ b/benchmarks/200.multimedia/210.thumbnailer/pypy/requirements.txt.arm.3.9 @@ -0,0 +1 @@ +Pillow==10.0.0 diff --git a/benchmarks/300.utilities/311.compression/config.json b/benchmarks/300.utilities/311.compression/config.json index 8edb99e52..59fb266d5 100644 --- a/benchmarks/300.utilities/311.compression/config.json +++ b/benchmarks/300.utilities/311.compression/config.json @@ -1,6 +1,12 @@ { "timeout": 60, - "memory": 256, - "languages": ["python", "nodejs"], - "modules": ["storage"] -} + "memory": 512, + "languages": [ + "python", + "nodejs", + "pypy" + ], + "modules": [ + "storage" + ] +} \ No newline at end of file diff --git a/benchmarks/300.utilities/311.compression/pypy/README.md b/benchmarks/300.utilities/311.compression/pypy/README.md new file mode 100755 index 000000000..fc6a75265 --- /dev/null +++ b/benchmarks/300.utilities/311.compression/pypy/README.md @@ -0,0 +1,12 @@ +# Image Processing + +A simple pipeline performing basic image operations with Pillow. + +[Inspired by AWS Lambda tutorial code.](https://docs.aws.amazon.com/lambda/latest/dg/with-s3-example-deployment-pkg.htm) + +### Instructions + +1. Deploy Docker container with function code and input data. + +2. Example of JSON payload: `{ "dir": "input_data", "id": "1" }'`. + diff --git a/benchmarks/300.utilities/311.compression/pypy/function.py b/benchmarks/300.utilities/311.compression/pypy/function.py new file mode 100755 index 000000000..f758e14e4 --- /dev/null +++ b/benchmarks/300.utilities/311.compression/pypy/function.py @@ -0,0 +1,59 @@ +import datetime +import io +import os +import shutil +import uuid +import zlib + +from . import storage +client = storage.storage.get_instance() + +def parse_directory(directory): + + size = 0 + for root, dirs, files in os.walk(directory): + for file in files: + size += os.path.getsize(os.path.join(root, file)) + return size + +def handler(event): + + bucket = event.get('bucket').get('bucket') + input_prefix = event.get('bucket').get('input') + output_prefix = event.get('bucket').get('output') + key = event.get('object').get('key') + download_path = '/tmp/{}-{}'.format(key, uuid.uuid4()) + os.makedirs(download_path) + + s3_download_begin = datetime.datetime.now() + client.download_directory(bucket, os.path.join(input_prefix, key), download_path) + s3_download_stop = datetime.datetime.now() + size = parse_directory(download_path) + + compress_begin = datetime.datetime.now() + shutil.make_archive(os.path.join(download_path, key), 'zip', root_dir=download_path) + compress_end = datetime.datetime.now() + + s3_upload_begin = datetime.datetime.now() + archive_name = '{}.zip'.format(key) + archive_size = os.path.getsize(os.path.join(download_path, archive_name)) + key_name = client.upload(bucket, os.path.join(output_prefix, archive_name), os.path.join(download_path, archive_name)) + s3_upload_stop = datetime.datetime.now() + + download_time = (s3_download_stop - s3_download_begin) / datetime.timedelta(microseconds=1) + upload_time = (s3_upload_stop - s3_upload_begin) / datetime.timedelta(microseconds=1) + process_time = (compress_end - compress_begin) / datetime.timedelta(microseconds=1) + return { + 'result': { + 'bucket': bucket, + 'key': key_name + }, + 'measurement': { + 'download_time': download_time, + 'download_size': size, + 'upload_time': upload_time, + 'upload_size': archive_size, + 'compute_time': process_time + } + } + diff --git a/benchmarks/300.utilities/311.compression/pypy/requirements.txt b/benchmarks/300.utilities/311.compression/pypy/requirements.txt new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/300.utilities/312.converter/config.json b/benchmarks/300.utilities/312.converter/config.json new file mode 100644 index 000000000..9918b7899 --- /dev/null +++ b/benchmarks/300.utilities/312.converter/config.json @@ -0,0 +1,11 @@ +{ + "timeout": 60, + "memory": 2048, + "languages": [ + "python", + "pypy" + ], + "modules": [ + "storage" + ] +} \ No newline at end of file diff --git a/benchmarks/300.utilities/312.converter/input.py b/benchmarks/300.utilities/312.converter/input.py new file mode 100644 index 000000000..028278820 --- /dev/null +++ b/benchmarks/300.utilities/312.converter/input.py @@ -0,0 +1,33 @@ +import json +import random +import string + +size_generators = { + 'test' : 10, + 'small' : 1000, + 'large': 50000 +} + +def buckets_count(): + return (1, 1) + +def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): + data = [] + for _ in range(size_generators[size]): + row = {} + for i in range(1, 21): + row[f'a{i}'] = ''.join(random.choices(string.ascii_letters + string.digits, k=8)) + data.append(row) + with open('data.json', 'w') as f: + json.dump(data, f, indent=4) + filename = f.name + + upload_func(0, filename, filename) + + input_config = {'object': {}, 'bucket': {}} + input_config['object']['key'] = filename + input_config['bucket']['bucket'] = benchmarks_bucket + input_config['bucket']['input'] = input_paths[0] + input_config['bucket']['output'] = output_paths[0] + + return input_config \ No newline at end of file diff --git a/benchmarks/300.utilities/312.converter/pypy/function.py b/benchmarks/300.utilities/312.converter/pypy/function.py new file mode 100644 index 000000000..4306c2cb5 --- /dev/null +++ b/benchmarks/300.utilities/312.converter/pypy/function.py @@ -0,0 +1,67 @@ +import openpyxl +import json +import datetime +import os +import io +from urllib.parse import unquote_plus + +from . import storage +client = storage.storage.get_instance() + +def convertJson(json_input): + if isinstance(json_input, str): + data = json.loads(json_input) + else: + data = json_input + + wb = openpyxl.Workbook() + ws = wb.active + + ordered_keys = [f'a{i}' for i in range(1, 21)] + + for entry in data: + row_values = [entry[key] for key in ordered_keys] + ws.append(row_values) + + outputStream = io.BytesIO() + wb.save(outputStream) + outputStream.seek(0) + return outputStream + +def handler(event): + bucket = event.get('bucket').get('bucket') + input_prefix = event.get('bucket').get('input') + output_prefix = event.get('bucket').get('output') + + key = unquote_plus(event.get('object').get('key')) + download_begin = datetime.datetime.now() + jsonMemView = client.download_stream(bucket, os.path.join(input_prefix, key)) + download_end = datetime.datetime.now() + jsonObj = json.loads(jsonMemView.tobytes()) + process_begin = datetime.datetime.now() + xlsxFileBytes = convertJson(jsonObj) + xlsxSize = xlsxFileBytes.getbuffer().nbytes + process_end = datetime.datetime.now() + + upload_begin = datetime.datetime.now() + outputFilename = 'output.xlsx' + key_name = client.upload_stream(bucket, os.path.join(output_prefix, outputFilename), xlsxFileBytes) + upload_end = datetime.datetime.now() + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + + return { + 'result': { + 'bucket': bucket, + 'key': key_name + }, + 'measurement': { + 'download_time': download_time, + 'download_size': len(jsonMemView), + 'upload_time': upload_time, + 'upload_size': xlsxSize, + 'compute_time': process_time + } + } \ No newline at end of file diff --git a/benchmarks/300.utilities/312.converter/pypy/requirements.txt b/benchmarks/300.utilities/312.converter/pypy/requirements.txt new file mode 100644 index 000000000..794cc3d38 --- /dev/null +++ b/benchmarks/300.utilities/312.converter/pypy/requirements.txt @@ -0,0 +1 @@ +openpyxl diff --git a/benchmarks/300.utilities/312.converter/python/function.py b/benchmarks/300.utilities/312.converter/python/function.py new file mode 100644 index 000000000..4306c2cb5 --- /dev/null +++ b/benchmarks/300.utilities/312.converter/python/function.py @@ -0,0 +1,67 @@ +import openpyxl +import json +import datetime +import os +import io +from urllib.parse import unquote_plus + +from . import storage +client = storage.storage.get_instance() + +def convertJson(json_input): + if isinstance(json_input, str): + data = json.loads(json_input) + else: + data = json_input + + wb = openpyxl.Workbook() + ws = wb.active + + ordered_keys = [f'a{i}' for i in range(1, 21)] + + for entry in data: + row_values = [entry[key] for key in ordered_keys] + ws.append(row_values) + + outputStream = io.BytesIO() + wb.save(outputStream) + outputStream.seek(0) + return outputStream + +def handler(event): + bucket = event.get('bucket').get('bucket') + input_prefix = event.get('bucket').get('input') + output_prefix = event.get('bucket').get('output') + + key = unquote_plus(event.get('object').get('key')) + download_begin = datetime.datetime.now() + jsonMemView = client.download_stream(bucket, os.path.join(input_prefix, key)) + download_end = datetime.datetime.now() + jsonObj = json.loads(jsonMemView.tobytes()) + process_begin = datetime.datetime.now() + xlsxFileBytes = convertJson(jsonObj) + xlsxSize = xlsxFileBytes.getbuffer().nbytes + process_end = datetime.datetime.now() + + upload_begin = datetime.datetime.now() + outputFilename = 'output.xlsx' + key_name = client.upload_stream(bucket, os.path.join(output_prefix, outputFilename), xlsxFileBytes) + upload_end = datetime.datetime.now() + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + + return { + 'result': { + 'bucket': bucket, + 'key': key_name + }, + 'measurement': { + 'download_time': download_time, + 'download_size': len(jsonMemView), + 'upload_time': upload_time, + 'upload_size': xlsxSize, + 'compute_time': process_time + } + } \ No newline at end of file diff --git a/benchmarks/300.utilities/312.converter/python/requirements.txt b/benchmarks/300.utilities/312.converter/python/requirements.txt new file mode 100644 index 000000000..794cc3d38 --- /dev/null +++ b/benchmarks/300.utilities/312.converter/python/requirements.txt @@ -0,0 +1 @@ +openpyxl diff --git a/benchmarks/500.scientific/501.graph-pagerank/config.json b/benchmarks/500.scientific/501.graph-pagerank/config.json index e80fb4351..689694d92 100644 --- a/benchmarks/500.scientific/501.graph-pagerank/config.json +++ b/benchmarks/500.scientific/501.graph-pagerank/config.json @@ -1,6 +1,9 @@ { "timeout": 120, "memory": 512, - "languages": ["python"], + "languages": [ + "python", + "pypy" + ], "modules": [] -} +} \ No newline at end of file diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/function.py b/benchmarks/500.scientific/501.graph-pagerank/pypy/function.py new file mode 100755 index 000000000..0e462e9b4 --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/function.py @@ -0,0 +1,29 @@ +import datetime +import igraph + +def handler(event): + + size = event.get('size') + if "seed" in event: + import random + + random.seed(event["seed"]) + + graph_generating_begin = datetime.datetime.now() + graph = igraph.Graph.Barabasi(size, 10) + graph_generating_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + result = graph.pagerank() + process_end = datetime.datetime.now() + + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + + return { + 'result': result[0], + 'measurement': { + 'graph_generating_time': graph_generating_time, + 'compute_time': process_time + } + } diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.10 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.10 new file mode 100644 index 000000000..e291b7b39 --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.10 @@ -0,0 +1 @@ +igraph==0.11.4 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.11 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.11 new file mode 100644 index 000000000..e291b7b39 --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.11 @@ -0,0 +1 @@ +igraph==0.11.4 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.12 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.12 new file mode 100644 index 000000000..e291b7b39 --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.12 @@ -0,0 +1 @@ +igraph==0.11.4 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.7 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.7 new file mode 100755 index 000000000..4e4d562fd --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.7 @@ -0,0 +1 @@ +python-igraph==0.8.0 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.8 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.8 new file mode 100755 index 000000000..4e4d562fd --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.8 @@ -0,0 +1 @@ +python-igraph==0.8.0 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.9 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.9 new file mode 100755 index 000000000..0918761fe --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.3.9 @@ -0,0 +1 @@ +python-igraph==0.9.0 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.8 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.8 new file mode 100644 index 000000000..398b70edc --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.8 @@ -0,0 +1 @@ +python-igraph==0.11.4 diff --git a/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.9 b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.9 new file mode 100644 index 000000000..398b70edc --- /dev/null +++ b/benchmarks/500.scientific/501.graph-pagerank/pypy/requirements.txt.arm.3.9 @@ -0,0 +1 @@ +python-igraph==0.11.4 diff --git a/benchmarks/wrappers/aws/pypy/handler.py b/benchmarks/wrappers/aws/pypy/handler.py new file mode 100644 index 000000000..907b2c612 --- /dev/null +++ b/benchmarks/wrappers/aws/pypy/handler.py @@ -0,0 +1,71 @@ + +import datetime, io, json, os, sys, uuid + +# Add current directory to allow location of packages +sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) + +# TODO: usual trigger +# implement support for S3 and others +def handler(event, context): + + income_timestamp = datetime.datetime.now().timestamp() + + # HTTP trigger with API Gateaway + if 'body' in event: + event = json.loads(event['body']) + req_id = context.aws_request_id + event['request-id'] = req_id + event['income-timestamp'] = income_timestamp + begin = datetime.datetime.now() + from function import function + ret = function.handler(event) + end = datetime.datetime.now() + + log_data = { + 'output': ret['result'] + } + if 'measurement' in ret: + log_data['measurement'] = ret['measurement'] + if 'logs' in event: + log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + results_begin = datetime.datetime.now() + from function import storage + storage_inst = storage.storage.get_instance() + b = event.get('logs').get('bucket') + storage_inst.upload_stream(b, '{}.json'.format(req_id), + io.BytesIO(json.dumps(log_data).encode('utf-8'))) + results_end = datetime.datetime.now() + results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) + else: + results_time = 0 + + # cold test + is_cold = False + fname = os.path.join('/tmp', 'cold_run') + if not os.path.exists(fname): + is_cold = True + container_id = str(uuid.uuid4())[0:8] + with open(fname, 'a') as f: + f.write(container_id) + else: + with open(fname, 'r') as f: + container_id = f.read() + + cold_start_var = "" + if "cold_start" in os.environ: + cold_start_var = os.environ["cold_start"] + + return { + 'statusCode': 200, + 'body': json.dumps({ + 'begin': begin.strftime('%s.%f'), + 'end': end.strftime('%s.%f'), + 'results_time': results_time, + 'is_cold': is_cold, + 'result': log_data, + 'request_id': context.aws_request_id, + 'cold_start_var': cold_start_var, + 'container_id': container_id, + }) + } + diff --git a/benchmarks/wrappers/aws/pypy/nosql.py b/benchmarks/wrappers/aws/pypy/nosql.py new file mode 100644 index 000000000..72bc2d9da --- /dev/null +++ b/benchmarks/wrappers/aws/pypy/nosql.py @@ -0,0 +1,121 @@ +from decimal import Decimal +from os import environ +from typing import List, Optional, Union, Tuple + +import boto3 + + +class nosql: + + instance: Optional["nosql"] = None + + def __init__(self): + self.client = boto3.resource("dynamodb") + self._tables = {} + + # Based on: https://github.com/boto/boto3/issues/369#issuecomment-157205696 + def _remove_decimals(self, data: dict) -> Union[dict, list, int, float]: + + if isinstance(data, list): + return [self._remove_decimals(x) for x in data] + elif isinstance(data, dict): + return {k: self._remove_decimals(v) for k, v in data.items()} + elif isinstance(data, Decimal): + if data.as_integer_ratio()[1] == 1: + return int(data) + else: + return float(data) + else: + return data + + def _get_table(self, table_name: str): + + if table_name not in self._tables: + + env_name = f"NOSQL_STORAGE_TABLE_{table_name}" + + if env_name in environ: + aws_name = environ[env_name] + self._tables[table_name] = self.client.Table(aws_name) + else: + raise RuntimeError( + f"Couldn't find an environment variable {env_name} for table {table_name}" + ) + + return self._tables[table_name] + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + self._get_table(table_name).put_item(Item=data) + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> dict: + + data = {} + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + res = self._get_table(table_name).get_item(Key=data) + return self._remove_decimals(res["Item"]) + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + updates: dict, + ): + + key_data = {} + for key in (primary_key, secondary_key): + key_data[key[0]] = key[1] + + update_expression = "SET " + update_values = {} + update_names = {} + + # We use attribute names because DynamoDB reserves some keywords, like 'status' + for key, value in updates.items(): + + update_expression += f" #{key}_name = :{key}_value, " + update_values[f":{key}_value"] = value + update_names[f"#{key}_name"] = key + + update_expression = update_expression[:-2] + + self._get_table(table_name).update_item( + Key=key_data, + UpdateExpression=update_expression, + ExpressionAttributeValues=update_values, + ExpressionAttributeNames=update_names, + ) + + def query(self, table_name: str, primary_key: Tuple[str, str], _: str) -> List[dict]: + + res = self._get_table(table_name).query( + KeyConditionExpression=f"{primary_key[0]} = :keyvalue", + ExpressionAttributeValues={":keyvalue": primary_key[1]}, + )["Items"] + return self._remove_decimals(res) + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + data = {} + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + self._get_table(table_name).delete_item(Key=data) + + @staticmethod + def get_instance(): + if nosql.instance is None: + nosql.instance = nosql() + return nosql.instance diff --git a/benchmarks/wrappers/aws/pypy/setup.py b/benchmarks/wrappers/aws/pypy/setup.py new file mode 100644 index 000000000..b3d878351 --- /dev/null +++ b/benchmarks/wrappers/aws/pypy/setup.py @@ -0,0 +1,15 @@ +from distutils.core import setup +from glob import glob +from pkg_resources import parse_requirements + +with open('requirements.txt') as f: + requirements = [str(r) for r in parse_requirements(f)] + +setup( + name='function', + install_requires=requirements, + packages=['function'], + package_dir={'function': '.'}, + package_data={'function': glob('**', recursive=True)}, +) + diff --git a/benchmarks/wrappers/aws/pypy/storage.py b/benchmarks/wrappers/aws/pypy/storage.py new file mode 100644 index 000000000..4be0025e8 --- /dev/null +++ b/benchmarks/wrappers/aws/pypy/storage.py @@ -0,0 +1,53 @@ +import io +import os +import uuid + +import boto3 + + +class storage: + instance = None + client = None + + def __init__(self): + self.client = boto3.client('s3') + + @staticmethod + def unique_name(name): + name, extension = os.path.splitext(name) + return '{name}.{random}{extension}'.format( + name=name, + extension=extension, + random=str(uuid.uuid4()).split('-')[0] + ) + + def upload(self, bucket, file, filepath): + key_name = storage.unique_name(file) + self.client.upload_file(filepath, bucket, key_name) + return key_name + + def download(self, bucket, file, filepath): + self.client.download_file(bucket, file, filepath) + + def download_directory(self, bucket, prefix, path): + objects = self.client.list_objects_v2(Bucket=bucket, Prefix=prefix) + for obj in objects['Contents']: + file_name = obj['Key'] + path_to_file = os.path.dirname(file_name) + os.makedirs(os.path.join(path, path_to_file), exist_ok=True) + self.download(bucket, file_name, os.path.join(path, file_name)) + + def upload_stream(self, bucket, file, data): + key_name = storage.unique_name(file) + self.client.upload_fileobj(data, bucket, key_name) + return key_name + + def download_stream(self, bucket, file): + data = io.BytesIO() + self.client.download_fileobj(bucket, file, data) + return data.getbuffer() + + def get_instance(): + if storage.instance is None: + storage.instance = storage() + return storage.instance diff --git a/benchmarks/wrappers/gcp/pypy/handler.py b/benchmarks/wrappers/gcp/pypy/handler.py new file mode 100644 index 000000000..9b6989611 --- /dev/null +++ b/benchmarks/wrappers/gcp/pypy/handler.py @@ -0,0 +1,72 @@ +import datetime, io, json, os, uuid, sys + +sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) + +# This variable is defined by SeBS during function creation. +if 'NOSQL_STORAGE_DATABASE' in os.environ: + from function import nosql + + nosql.nosql.get_instance( + os.environ['NOSQL_STORAGE_DATABASE'] + ) + + +def handler(req): + income_timestamp = datetime.datetime.now().timestamp() + req_id = req.headers.get('Function-Execution-Id') + + + req_json = req.get_json() + req_json['request-id'] = req_id + req_json['income-timestamp'] = income_timestamp + begin = datetime.datetime.now() + # We are deployed in the same directorygit status + from function import function + ret = function.handler(req_json) + end = datetime.datetime.now() + + + log_data = { + 'output': ret['result'] + } + if 'measurement' in ret: + log_data['measurement'] = ret['measurement'] + if 'logs' in req_json: + log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + results_begin = datetime.datetime.now() + from function import storage + storage_inst = storage.storage.get_instance() + b = req_json.get('logs').get('bucket') + storage_inst.upload_stream(b, '{}.json'.format(req_id), + io.BytesIO(json.dumps(log_data).encode('utf-8'))) + results_end = datetime.datetime.now() + results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) + else: + results_time = 0 + + # cold test + is_cold = False + fname = os.path.join('/tmp', 'cold_run') + if not os.path.exists(fname): + is_cold = True + container_id = str(uuid.uuid4())[0:8] + with open(fname, 'a') as f: + f.write(container_id) + else: + with open(fname, 'r') as f: + container_id = f.read() + + cold_start_var = "" + if "cold_start" in os.environ: + cold_start_var = os.environ["cold_start"] + + return json.dumps({ + 'begin': begin.strftime('%s.%f'), + 'end': end.strftime('%s.%f'), + 'results_time': results_time, + 'is_cold': is_cold, + 'result': log_data, + 'request_id': req_id, + 'cold_start_var': cold_start_var, + 'container_id': container_id, + }), 200, {'ContentType': 'application/json'} diff --git a/benchmarks/wrappers/gcp/pypy/nosql.py b/benchmarks/wrappers/gcp/pypy/nosql.py new file mode 100644 index 000000000..408712857 --- /dev/null +++ b/benchmarks/wrappers/gcp/pypy/nosql.py @@ -0,0 +1,131 @@ +from typing import List, Optional, Tuple + +from google.cloud import datastore + + +class nosql: + + instance: Optional["nosql"] = None + + """ + Each benchmark supports up to two keys - one for grouping items, + and for unique identification of each item. + + In Google Cloud Datastore, we determine different tables by using + its value for `kind` name. + + The primary key is assigned to the `kind` value. + + To implement sorting semantics, we use the ancestor relation: + the sorting key is used as the parent. + It is the assumption that all related items will have the same parent. + """ + + def __init__(self, database: str): + self._client = datastore.Client(database=database) + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + + parent_key = self._client.key(primary_key[0], primary_key[1]) + key = self._client.key( + # kind determines the table + table_name, + # main ID key + secondary_key[1], + # organization key + parent=parent_key, + ) + + val = datastore.Entity(key=key) + val.update(data) + self._client.put(val) + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + # There is no direct update - we have to fetch the entire entity and manually change fields. + parent_key = self._client.key(primary_key[0], primary_key[1]) + key = self._client.key( + # kind determines the table + table_name, + # main ID key + secondary_key[1], + # organization key + parent=parent_key, + ) + res = self._client.get(key) + if res is None: + res = datastore.Entity(key=key) + res.update(data) + self._client.put(res) + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> Optional[dict]: + + parent_key = self._client.key(primary_key[0], primary_key[1]) + key = self._client.key( + # kind determines the table + table_name, + # main ID key + secondary_key[1], + # organization key + parent=parent_key, + ) + + res = self._client.get(key) + if res is None: + return None + + # Emulate the kind key + res[secondary_key[0]] = secondary_key[1] + + return res + + """ + This query must involve partition key - it does not scan across partitions. + """ + + def query( + self, table_name: str, primary_key: Tuple[str, str], secondary_key_name: str + ) -> List[dict]: + + ancestor = self._client.key(primary_key[0], primary_key[1]) + query = self._client.query(kind=table_name, ancestor=ancestor) + res = list(query.fetch()) + + # Emulate the kind key + for item in res: + item[secondary_key_name] = item.key.name + + return res + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + parent_key = self._client.key(primary_key[0], primary_key[1]) + key = self._client.key( + # kind determines the table + table_name, + # main ID key + secondary_key[1], + # organization key + parent=parent_key, + ) + + return self._client.delete(key) + + @staticmethod + def get_instance(database: Optional[str] = None): + if nosql.instance is None: + assert database is not None + nosql.instance = nosql(database) + return nosql.instance diff --git a/benchmarks/wrappers/gcp/pypy/setup.py b/benchmarks/wrappers/gcp/pypy/setup.py new file mode 100644 index 000000000..b3d878351 --- /dev/null +++ b/benchmarks/wrappers/gcp/pypy/setup.py @@ -0,0 +1,15 @@ +from distutils.core import setup +from glob import glob +from pkg_resources import parse_requirements + +with open('requirements.txt') as f: + requirements = [str(r) for r in parse_requirements(f)] + +setup( + name='function', + install_requires=requirements, + packages=['function'], + package_dir={'function': '.'}, + package_data={'function': glob('**', recursive=True)}, +) + diff --git a/benchmarks/wrappers/gcp/pypy/storage.py b/benchmarks/wrappers/gcp/pypy/storage.py new file mode 100644 index 000000000..81163cb34 --- /dev/null +++ b/benchmarks/wrappers/gcp/pypy/storage.py @@ -0,0 +1,61 @@ +import io +import os +import uuid + +from google.cloud import storage as gcp_storage + + +class storage: + instance = None + client = None + + def __init__(self): + self.client = gcp_storage.Client() + + @staticmethod + def unique_name(name): + name, extension = os.path.splitext(name) + return '{name}.{random}{extension}'.format( + name=name, + extension=extension, + random=str(uuid.uuid4()).split('-')[0] + ) + + def upload(self, bucket, file, filepath): + key_name = storage.unique_name(file) + bucket_instance = self.client.bucket(bucket) + blob = bucket_instance.blob(key_name) + blob.upload_from_filename(filepath) + return key_name + + def download(self, bucket, file, filepath): + bucket_instance = self.client.bucket(bucket) + blob = bucket_instance.blob(file) + blob.download_to_filename(filepath) + + def download_directory(self, bucket, prefix, path): + objects = self.client.bucket(bucket).list_blobs(prefix=prefix) + for obj in objects: + file_name = obj.name + path_to_file = os.path.dirname(file_name) + os.makedirs(os.path.join(path, path_to_file), exist_ok=True) + self.download(bucket, file_name, os.path.join(path, file_name)) + + def upload_stream(self, bucket, file, data): + key_name = storage.unique_name(file) + bucket_instance = self.client.bucket(bucket) + blob = bucket_instance.blob(key_name) + blob.upload_from_file(data) + return key_name + + def download_stream(self, bucket, file): + data = io.BytesIO() + bucket_instance = self.client.bucket(bucket) + blob = bucket_instance.blob(file) + blob.download_to_file(data) + return data.getbuffer() + + def get_instance(): + if storage.instance is None: + storage.instance = storage() + return storage.instance diff --git a/benchmarks/wrappers/gcp/python/setup.py b/benchmarks/wrappers/gcp/python/setup.py new file mode 100644 index 000000000..b3d878351 --- /dev/null +++ b/benchmarks/wrappers/gcp/python/setup.py @@ -0,0 +1,15 @@ +from distutils.core import setup +from glob import glob +from pkg_resources import parse_requirements + +with open('requirements.txt') as f: + requirements = [str(r) for r in parse_requirements(f)] + +setup( + name='function', + install_requires=requirements, + packages=['function'], + package_dir={'function': '.'}, + package_data={'function': glob('**', recursive=True)}, +) + diff --git a/benchmarks/wrappers/local/pypy/nosql.py b/benchmarks/wrappers/local/pypy/nosql.py new file mode 100644 index 000000000..0e816954c --- /dev/null +++ b/benchmarks/wrappers/local/pypy/nosql.py @@ -0,0 +1,131 @@ +from decimal import Decimal +from os import environ +from typing import List, Optional, Union, Tuple + +import boto3 + + +class nosql: + + instance: Optional["nosql"] = None + + def __init__(self): + + if environ["NOSQL_STORAGE_TYPE"] != "scylladb": + raise RuntimeError(f"Unsupported NoSQL storage type: {environ['NOSQL_STORAGE_TYPE']}!") + + self.client = boto3.resource( + "dynamodb", + region_name="None", + aws_access_key_id="None", + aws_secret_access_key="None", + endpoint_url=f"http://{environ['NOSQL_STORAGE_ENDPOINT']}", + ) + self._tables = {} + + # Based on: https://github.com/boto/boto3/issues/369#issuecomment-157205696 + def _remove_decimals(self, data: dict) -> Union[dict, list, int, float]: + + if isinstance(data, list): + return [self._remove_decimals(x) for x in data] + elif isinstance(data, dict): + return {k: self._remove_decimals(v) for k, v in data.items()} + elif isinstance(data, Decimal): + if data.as_integer_ratio()[1] == 1: + return int(data) + else: + return float(data) + else: + return data + + def _get_table(self, table_name: str): + + if table_name not in self._tables: + + env_name = f"NOSQL_STORAGE_TABLE_{table_name}" + + if env_name in environ: + aws_name = environ[env_name] + self._tables[table_name] = self.client.Table(aws_name) + else: + raise RuntimeError( + f"Couldn't find an environment variable {env_name} for table {table_name}" + ) + + return self._tables[table_name] + + def insert( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + data: dict, + ): + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + self._get_table(table_name).put_item(Item=data) + + def get( + self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str] + ) -> dict: + + data = {} + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + res = self._get_table(table_name).get_item(Key=data) + return self._remove_decimals(res["Item"]) + + def update( + self, + table_name: str, + primary_key: Tuple[str, str], + secondary_key: Tuple[str, str], + updates: dict, + ): + + key_data = {} + for key in (primary_key, secondary_key): + key_data[key[0]] = key[1] + + update_expression = "SET " + update_values = {} + update_names = {} + + # We use attribute names because DynamoDB reserves some keywords, like 'status' + for key, value in updates.items(): + + update_expression += f" #{key}_name = :{key}_value, " + update_values[f":{key}_value"] = value + update_names[f"#{key}_name"] = key + + update_expression = update_expression[:-2] + + self._get_table(table_name).update_item( + Key=key_data, + UpdateExpression=update_expression, + ExpressionAttributeValues=update_values, + ExpressionAttributeNames=update_names, + ) + + def query(self, table_name: str, primary_key: Tuple[str, str], _: str) -> List[dict]: + + res = self._get_table(table_name).query( + KeyConditionExpression=f"{primary_key[0]} = :keyvalue", + ExpressionAttributeValues={":keyvalue": primary_key[1]}, + )["Items"] + return self._remove_decimals(res) + + def delete(self, table_name: str, primary_key: Tuple[str, str], secondary_key: Tuple[str, str]): + data = {} + for key in (primary_key, secondary_key): + data[key[0]] = key[1] + + self._get_table(table_name).delete_item(Key=data) + + @staticmethod + def get_instance(): + if nosql.instance is None: + nosql.instance = nosql() + return nosql.instance diff --git a/benchmarks/wrappers/local/pypy/storage.py b/benchmarks/wrappers/local/pypy/storage.py new file mode 100644 index 000000000..b44968408 --- /dev/null +++ b/benchmarks/wrappers/local/pypy/storage.py @@ -0,0 +1,58 @@ +import io +import os +import uuid + +import minio + +class storage: + instance = None + client = None + + def __init__(self): + if 'MINIO_ADDRESS' in os.environ: + address = os.environ['MINIO_ADDRESS'] + access_key = os.environ['MINIO_ACCESS_KEY'] + secret_key = os.environ['MINIO_SECRET_KEY'] + self.client = minio.Minio( + address, + access_key=access_key, + secret_key=secret_key, + secure=False) + + @staticmethod + def unique_name(name): + name, extension = os.path.splitext(name) + return '{name}.{random}{extension}'.format( + name=name, + extension=extension, + random=str(uuid.uuid4()).split('-')[0] + ) + + def upload(self, bucket, file, filepath): + key_name = storage.unique_name(file) + self.client.fput_object(bucket, key_name, filepath) + return key_name + + def download(self, bucket, file, filepath): + self.client.fget_object(bucket, file, filepath) + + def download_directory(self, bucket, prefix, path): + objects = self.client.list_objects_v2(bucket, prefix, recursive=True) + for obj in objects: + file_name = obj.object_name + self.download(bucket, file_name, os.path.join(path, file_name)) + + def upload_stream(self, bucket, file, bytes_data): + key_name = storage.unique_name(file) + self.client.put_object(bucket, key_name, bytes_data, bytes_data.getbuffer().nbytes) + return key_name + + def download_stream(self, bucket, file): + data = self.client.get_object(bucket, file) + return data.read() + + def get_instance(): + if storage.instance is None: + storage.instance = storage() + return storage.instance + diff --git a/config/systems.json b/config/systems.json index 5a38b4965..645ee925c 100644 --- a/config/systems.json +++ b/config/systems.json @@ -64,10 +64,41 @@ ], "packages": [] } + }, + "pypy": { + "base_images": { + "x64": { + "3.11": "pypy:3.11" + }, + "arm64": { + "3.11": "pypy:3.11" + } + }, + "images": [ + "run", + "build" + ], + "username": "docker_user", + "deployment": { + "files": [ + "storage.py", + "nosql.py" + ], + "packages": [], + "module_packages": { + "nosql": [ + "boto3==1.28.3" + ] + } + } } }, - "architecture": ["x64"], - "deployments": ["package"] + "architecture": [ + "x64" + ], + "deployments": [ + "package" + ] }, "aws": { "languages": { @@ -121,10 +152,39 @@ "uuid": "3.4.0" } } + }, + "pypy": { + "base_images": { + "x64": { + "3.11": "pypy:3.11-slim" + }, + "arm64": { + "3.11": "pypy:3.11-slim" + } + }, + "images": [ + "build" + ], + "deployment": { + "files": [ + "handler.py", + "storage.py", + "nosql.py", + "setup.py" + ], + "packages": [], + "module_packages": {} + } } }, - "architecture": ["x64", "arm64"], - "deployments": ["package", "container"] + "architecture": [ + "x64", + "arm64" + ], + "deployments": [ + "package", + "container" + ] }, "azure": { "languages": { @@ -188,19 +248,26 @@ "username": "docker_user" } }, - "architecture": ["x64"], - "deployments": ["package"] + "architecture": [ + "x64" + ], + "deployments": [ + "package" + ] }, "gcp": { "languages": { "python": { "base_images": { "x64": { - "3.8": "ubuntu:22.04", - "3.9": "ubuntu:22.04", - "3.10": "ubuntu:22.04", - "3.11": "ubuntu:22.04", - "3.12": "ubuntu:22.04" + "3.10": "python:3.10-slim", + "3.11": "python:3.11-slim", + "3.12": "python:3.12-slim" + }, + "arm64": { + "3.10": "python:3.10-slim", + "3.11": "python:3.11-slim", + "3.12": "python:3.12-slim" } }, "images": [ @@ -211,7 +278,39 @@ "files": [ "handler.py", "storage.py", - "nosql.py" + "nosql.py", + "setup.py" + ], + "packages": [], + "module_packages": { + "storage": [ + "google-cloud-storage" + ], + "nosql": [ + "google-cloud-datastore" + ] + } + } + }, + "pypy": { + "base_images": { + "x64": { + "3.11": "pypy:3.11-slim" + }, + "arm64": { + "3.11": "pypy:3.11-slim" + } + }, + "images": [ + "build" + ], + "username": "docker_user", + "deployment": { + "files": [ + "handler.py", + "storage.py", + "nosql.py", + "setup.py" ], "packages": [], "module_packages": { @@ -227,8 +326,8 @@ "nodejs": { "base_images": { "x64": { - "18": "ubuntu:22.04", - "20": "ubuntu:22.04" + "20": "node:20-slim", + "22": "node:22-slim" } }, "images": [ @@ -252,8 +351,14 @@ "username": "docker_user" } }, - "architecture": ["x64"], - "deployments": ["package"] + "architecture": [ + "x64", + "arm64" + ], + "deployments": [ + "package", + "container" + ] }, "openwhisk": { "languages": { @@ -313,7 +418,11 @@ } } }, - "architecture": ["x64"], - "deployments": ["container"] + "architecture": [ + "x64" + ], + "deployments": [ + "container" + ] } -} +} \ No newline at end of file diff --git a/dockerfiles/aws/pypy/Dockerfile.build b/dockerfiles/aws/pypy/Dockerfile.build new file mode 100644 index 000000000..20d50e6ba --- /dev/null +++ b/dockerfiles/aws/pypy/Dockerfile.build @@ -0,0 +1,17 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +COPY --from=tianon/gosu:1.19-debian /usr/local/bin/gosu /usr/local/bin/gosu + +RUN mkdir -p /sebs/ +COPY dockerfiles/pypy_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV PATH=/usr/sbin:$PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/aws/pypy/Dockerfile.function b/dockerfiles/aws/pypy/Dockerfile.function new file mode 100644 index 000000000..0724ad130 --- /dev/null +++ b/dockerfiles/aws/pypy/Dockerfile.function @@ -0,0 +1,58 @@ +ARG BASE_IMAGE +ARG TARGET_ARCHITECTURE + +FROM --platform=linux/amd64 ${BASE_IMAGE} AS base-x64 +FROM --platform=linux/arm64/v8 ${BASE_IMAGE} AS base-arm64 + +FROM base-${TARGET_ARCHITECTURE} +ARG TARGET_ARCHITECTURE +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +COPY . function/ +ENV PLATFORM_ARG="" + +# Non-wheel images +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libc-dev \ + libjpeg-dev \ + zlib1g-dev \ + libtiff-dev \ + libfreetype6-dev \ + liblcms2-dev \ + libwebp-dev \ + g++ \ + cmake \ + make \ + && rm -rf /var/lib/apt/lists/* + +RUN touch function/__init__.py + # Probably remove this conditional, might break pypy builds, might lead to installation of CPython libraries +RUN if [ "${TARGET_ARCHITECTURE}" = "arm64" ]; then \ + export PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"; \ + fi + +RUN ls -la function/ +RUN if [ "${TARGET_ARCHITECTURE}" = "arm64" ] && test -f "function/requirements.txt.arm.${PYTHON_VERSION}"; then \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + -r function/requirements.txt.arm.${PYTHON_VERSION} \ + function/ && \ + pip cache purge; \ + elif test -f "function/requirements.txt.${PYTHON_VERSION}"; then \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + -r function/requirements.txt.${PYTHON_VERSION} \ + function/ && \ + pip cache purge; \ + else \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + function/ && \ + pip cache purge; \ + fi + +RUN pip install awslambdaric boto3 +ENTRYPOINT ["/opt/pypy/bin/python", "-m", "awslambdaric"] +CMD ["function/handler.handler"] diff --git a/dockerfiles/gcp/nodejs/Dockerfile.build b/dockerfiles/gcp/nodejs/Dockerfile.build index 477f236bc..d98780c50 100755 --- a/dockerfiles/gcp/nodejs/Dockerfile.build +++ b/dockerfiles/gcp/nodejs/Dockerfile.build @@ -1,12 +1,8 @@ ARG BASE_IMAGE FROM ${BASE_IMAGE} ARG VERSION -ENV NVM_DIR=/nvm -#RUN install_node --ignore-verification-failure v${VERSION} -RUN apt-get update && apt-get install -y gosu wget -RUN mkdir -p ${NVM_DIR} && wget -qO- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash -RUN . ${NVM_DIR}/nvm.sh && nvm install ${VERSION} && nvm alias default ${VERSION} && nvm use default +COPY --from=tianon/gosu:1.19-debian /usr/local/bin/gosu /usr/local/bin/gosu RUN mkdir -p /sebs/ COPY dockerfiles/nodejs_installer.sh /sebs/installer.sh diff --git a/dockerfiles/gcp/pypy/Dockerfile.build b/dockerfiles/gcp/pypy/Dockerfile.build new file mode 100755 index 000000000..c2c918dcf --- /dev/null +++ b/dockerfiles/gcp/pypy/Dockerfile.build @@ -0,0 +1,26 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} +ENV DEBIAN_FRONTEND="noninteractive" +ENV TZ="Europe/Zurich" + +RUN apt-get update\ + && apt-get install -y --no-install-recommends gosu gcc build-essential libxml2 libxml2-dev zlib1g-dev gpg-agent zip\ + && apt-get update\ + && apt-get purge -y --auto-remove + +#RUN export PATH=/opt/python3.7/bin:/opt/python3.6/bin:/opt/python3.5/bin:/opt/python3.4/bin:$PATH +RUN python -mvenv /sebs/env +ENV PATH /sebs/env/bin:${PATH} +ENV VIRTUAL_ENV /sebs/env + +RUN mkdir -p /sebs/ +COPY dockerfiles/pypy_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/gcp/pypy/Dockerfile.function b/dockerfiles/gcp/pypy/Dockerfile.function new file mode 100644 index 000000000..18b53467c --- /dev/null +++ b/dockerfiles/gcp/pypy/Dockerfile.function @@ -0,0 +1,59 @@ +ARG BASE_IMAGE +ARG TARGET_ARCHITECTURE + +FROM --platform=linux/amd64 ${BASE_IMAGE} AS base-x64 +FROM --platform=linux/arm64/v8 ${BASE_IMAGE} AS base-arm64 + +FROM base-${TARGET_ARCHITECTURE} +ARG TARGET_ARCHITECTURE +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +COPY . function/ + +ENV PLATFORM_ARG="" + +RUN pip install functions-framework + +# Non-wheel images +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libc-dev \ + libjpeg-dev \ + zlib1g-dev \ + libtiff-dev \ + libfreetype6-dev \ + liblcms2-dev \ + libwebp-dev \ + g++ \ + cmake \ + make \ + && rm -rf /var/lib/apt/lists/* + +RUN touch function/__init__.py \ + && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then \ + export PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"; \ + fi \ + && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]] && test -f "function/requirements.txt.arm.${PYTHON_VERSION}"; then \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + -r function/requirements.txt.arm.${PYTHON_VERSION} \ + function/ && \ + pip cache purge; \ + elif test -f "function/requirements.txt.${PYTHON_VERSION}"; then \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + -r function/requirements.txt.${PYTHON_VERSION} \ + function/ && \ + pip cache purge; \ + else \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + function/ && \ + pip cache purge; \ + fi + + +ENV PYTHONPATH="/" + +CMD ["functions-framework", "--target=handler", "--source=function/handler.py"] \ No newline at end of file diff --git a/dockerfiles/gcp/python/Dockerfile.build b/dockerfiles/gcp/python/Dockerfile.build index 88554d230..ae52345b1 100755 --- a/dockerfiles/gcp/python/Dockerfile.build +++ b/dockerfiles/gcp/python/Dockerfile.build @@ -5,11 +5,10 @@ ENV PYTHON_VERSION=${VERSION} ENV DEBIAN_FRONTEND="noninteractive" ENV TZ="Europe/Zurich" +COPY --from=tianon/gosu:1.19-debian /usr/local/bin/gosu /usr/local/bin/gosu + RUN apt-get update\ - && apt-get install -y --no-install-recommends gosu gcc build-essential libxml2 libxml2-dev zlib1g-dev software-properties-common gpg-agent zip\ - && add-apt-repository -y ppa:deadsnakes/ppa\ - && apt-get update\ - && apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-venv python${PYTHON_VERSION}-dev\ + && apt-get install -y --no-install-recommends gcc build-essential libxml2 libxml2-dev zlib1g-dev gpg-agent zip\ && apt-get purge -y --auto-remove #RUN export PATH=/opt/python3.7/bin:/opt/python3.6/bin:/opt/python3.5/bin:/opt/python3.4/bin:$PATH diff --git a/dockerfiles/gcp/python/Dockerfile.function b/dockerfiles/gcp/python/Dockerfile.function new file mode 100644 index 000000000..f165437e6 --- /dev/null +++ b/dockerfiles/gcp/python/Dockerfile.function @@ -0,0 +1,44 @@ +ARG BASE_IMAGE +ARG TARGET_ARCHITECTURE + +FROM --platform=linux/amd64 ${BASE_IMAGE} AS base-x64 +FROM --platform=linux/arm64/v8 ${BASE_IMAGE} AS base-arm64 + +FROM base-${TARGET_ARCHITECTURE} +ARG TARGET_ARCHITECTURE +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +COPY . function/ + +ENV PLATFORM_ARG="" + +RUN pip install functions-framework + +RUN touch function/__init__.py \ + && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then \ + export PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:"; \ + fi \ + && if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]] && test -f "function/requirements.txt.arm.${PYTHON_VERSION}"; then \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + -r function/requirements.txt.arm.${PYTHON_VERSION} \ + function/ && \ + pip cache purge; \ + elif test -f "function/requirements.txt.${PYTHON_VERSION}"; then \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + -r function/requirements.txt.${PYTHON_VERSION} \ + function/ && \ + pip cache purge; \ + else \ + pip install --no-cache-dir ${PLATFORM_ARG} --target . \ + -r function/requirements.txt \ + function/ && \ + pip cache purge; \ + fi + + +ENV PYTHONPATH="/" + +CMD ["functions-framework", "--target=handler", "--source=function/handler.py"] \ No newline at end of file diff --git a/dockerfiles/local/pypy/Dockerfile.build b/dockerfiles/local/pypy/Dockerfile.build new file mode 100755 index 000000000..6edb0bbac --- /dev/null +++ b/dockerfiles/local/pypy/Dockerfile.build @@ -0,0 +1,18 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} +ARG VERSION +ENV PYTHON_VERSION=${VERSION} + +RUN apt-get update\ + && apt-get install -y --no-install-recommends gcc build-essential python3-dev libxml2 libxml2-dev zlib1g-dev gosu\ + && apt-get purge -y --auto-remove + +RUN mkdir -p /sebs/ +COPY dockerfiles/pypy_installer.sh /sebs/installer.sh +COPY dockerfiles/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh + +# useradd and groupmod is installed in /usr/sbin which is not in PATH +ENV SCRIPT_FILE=/mnt/function/package.sh +CMD /bin/bash /sebs/installer.sh +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/local/pypy/Dockerfile.run b/dockerfiles/local/pypy/Dockerfile.run new file mode 100755 index 000000000..25fa2ebdb --- /dev/null +++ b/dockerfiles/local/pypy/Dockerfile.run @@ -0,0 +1,25 @@ +ARG BASE_IMAGE +FROM ${BASE_IMAGE} + +RUN deps=''\ + && apt-get update\ + # for route and sudo + && apt-get install --no-install-recommends -y curl gosu net-tools sudo ${deps}\ + && apt-get purge -y --auto-remove ${deps}\ + && pip3 install cffi minio bottle + +RUN mkdir -p /sebs +COPY dockerfiles/local/run.sh /sebs/ +COPY dockerfiles/local/*.py /sebs/ +COPY dockerfiles/local/pypy/*.py /sebs/ +COPY dockerfiles/local/pypy/run_server.sh /sebs/ +COPY dockerfiles/local/pypy/timeit.sh /sebs/ +COPY dockerfiles/local/pypy/runners.json /sebs/ +ADD third-party/pypapi/pypapi /sebs/pypapi +ENV PYTHONPATH=/sebs/.python_packages/lib/site-packages:$PYTHONPATH + +COPY dockerfiles/local/entrypoint.sh /sebs/entrypoint.sh +RUN chmod +x /sebs/entrypoint.sh +RUN chmod +x /sebs/run.sh + +ENTRYPOINT ["/sebs/entrypoint.sh"] diff --git a/dockerfiles/local/pypy/analyzer-runner.py b/dockerfiles/local/pypy/analyzer-runner.py new file mode 100644 index 000000000..624459795 --- /dev/null +++ b/dockerfiles/local/pypy/analyzer-runner.py @@ -0,0 +1,64 @@ + +import datetime, json, sys, subprocess, os +ip_address = os.environ['DOCKER_HOST_IP'] +cfg = json.load(open(sys.argv[1], 'r')) +ret = subprocess.run(['curl', '-X', 'POST', + '{}:{}/start'.format(ip_address, cfg['benchmark']['analyzer']['analyzer_port']), + '-d', + '{{"uuid": "{}" }}'.format(sys.argv[2])], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) +if ret.returncode != 0: + import sys + print('Analyzer initialization failed!') + print(ret.stderr.decode('utf-8')) + sys.exit(100) + + +from utils import * +from tools import * +# imported function +from function import function + +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] + +timedata = [0] * repetitions +try: + start = start_benchmarking(disable_gc) + for i in range(0, repetitions): + begin = datetime.datetime.now() + res = function.handler(input_data) + stop = datetime.datetime.now() + print(res, file = open( + get_result_prefix(LOGS_DIR, 'output', 'txt'), + 'w' + )) + timedata[i] = [begin, stop] + end = stop_benchmarking() + + ret = subprocess.run( + [ + 'curl', '-X', 'POST', + '{}:{}/stop'.format(ip_address, cfg['benchmark']['analyzer']['analyzer_port']), + '-d', + '{{"uuid": "{}" }}'.format(sys.argv[2]) + ], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if ret.returncode != 0: + import sys + print('Analyzer deinitialization failed!') + print(ret.stderr.decode('utf-8')) + sys.exit(101) + experiment_data = {} + experiment_data['repetitions'] = repetitions + experiment_data['timestamps'] = process_timestamps(timedata) + experiment_data['start'] = str(start) + experiment_data['end'] = str(end) + print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2)) +except Exception as e: + print('Exception caught!') + print(e) + sys.exit(102) +sys.exit(0) diff --git a/dockerfiles/local/pypy/config.py b/dockerfiles/local/pypy/config.py new file mode 100644 index 000000000..e7115cc73 --- /dev/null +++ b/dockerfiles/local/pypy/config.py @@ -0,0 +1,5 @@ +import json + +from tools import * + +print(json.dumps(get_config())) diff --git a/dockerfiles/local/pypy/papi-runner.py b/dockerfiles/local/pypy/papi-runner.py new file mode 100644 index 000000000..0c82d476d --- /dev/null +++ b/dockerfiles/local/pypy/papi-runner.py @@ -0,0 +1,104 @@ + +import datetime, json, sys, traceback, csv + +from utils import * +from tools import * + +# imported function +from function import function + +import pypapi.exceptions + +class papi_benchmarker: + from pypapi import papi_low as papi + from pypapi import events as papi_events + + def __init__(self, papi_cfg): + self.events = [] + self.events_names = [] + self.count = 0 + + self.papi.library_init() + self.events = self.papi.create_eventset() + for event in papi_cfg['events']: + try: + self.papi.add_event(self.events, getattr(self.papi_events, event)) + except pypapi.exceptions.PapiInvalidValueError as err: + print('Adding event {event} failed!'.format(event=event)) + sys.exit(100) + + self.events_names = papi_cfg['events'] + self.count = len(papi_cfg['events']) + self.results = [] + + self.ins_granularity = papi_cfg['overflow_instruction_granularity'] + self.buffer_size = papi_cfg['overflow_buffer_size'] + self.start_time = datetime.datetime.now() + + self.papi.overflow_sampling(self.events, self.papi_events.PAPI_TOT_INS, + int(self.ins_granularity), int(self.buffer_size)) + + def start_overflow(self): + self.papi.start(self.events) + + def stop_overflow(self): + self.papi.stop(self.events) + + def get_results(self): + data = self.papi.overflow_sampling_results(self.events) + for vals in data: + for i in range(0, len(vals), self.count + 1): + chunks = vals[i:i+self.count+1] + measurement_time = datetime.datetime.fromtimestamp(chunks[0]/1e6) + time = (measurement_time - self.start_time) / datetime.timedelta(microseconds = 1) + self.results.append([measurement_time.strftime("%s.%f"), time] + list(chunks[1:])) + + def finish(self): + self.papi.cleanup_eventset(self.events) + self.papi.destroy_eventset(self.events) + + +cfg = json.load(open(sys.argv[1], 'r')) +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] +papi_experiments = papi_benchmarker(cfg['benchmark']['papi']) + +timedata = [0] * repetitions +try: + start = start_benchmarking(disable_gc) + for i in range(0, repetitions): + begin = datetime.datetime.now() + papi_experiments.start_overflow() + res = function.handler(input_data) + papi_experiments.stop_overflow() + stop = datetime.datetime.now() + print(res, file = open( + get_result_prefix(LOGS_DIR, 'output', 'txt'), + 'w' + )) + timedata[i] = [begin, stop] + end = stop_benchmarking() +except Exception as e: + print('Exception caught!') + print(e) + traceback.print_exc() + + +papi_experiments.get_results() +papi_experiments.finish() +result = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'csv') +with open(result, 'w') as f: + csv_writer = csv.writer(f) + csv_writer.writerow( + ['Time','RelativeTime'] + papi_experiments.events_names + ) + for val in papi_experiments.results: + csv_writer.writerow(val) + +experiment_data = {} +experiment_data['repetitions'] = repetitions +experiment_data['timestamps'] = process_timestamps(timedata) +experiment_data['start'] = str(start) +experiment_data['end'] = str(end) +print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2)) diff --git a/dockerfiles/local/pypy/run_server.sh b/dockerfiles/local/pypy/run_server.sh new file mode 100755 index 000000000..ab09238e3 --- /dev/null +++ b/dockerfiles/local/pypy/run_server.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +pypy /sebs/server.py "$@" diff --git a/dockerfiles/local/pypy/runners.json b/dockerfiles/local/pypy/runners.json new file mode 100644 index 000000000..3ef17d1f4 --- /dev/null +++ b/dockerfiles/local/pypy/runners.json @@ -0,0 +1,7 @@ +{ + "papi": "papi-runner.py", + "time" : {"warm" : "time-in-proc.py", "cold" : "time-out-proc.py"}, + "memory": "analyzer-runner.py", + "disk-io": "analyzer-runner.py", + "config": ["pypy", "config.py"] +} diff --git a/dockerfiles/local/pypy/server.py b/dockerfiles/local/pypy/server.py new file mode 100644 index 000000000..4ed1314f2 --- /dev/null +++ b/dockerfiles/local/pypy/server.py @@ -0,0 +1,38 @@ +import datetime +import os +import sys +import uuid + +import bottle +from bottle import route, run, template, request + +CODE_LOCATION='/function' + +@route('/alive', method='GET') +def alive(): + return { + "result:" "ok" + } + +@route('/', method='POST') +def process_request(): + begin = datetime.datetime.now() + from function import function + end = datetime.datetime.now() + # FIXME: measurements? + ret = function.handler(request.json) + + return { + 'begin': begin.strftime('%s.%f'), + 'end': end.strftime('%s.%f'), + "request_id": str(uuid.uuid4()), + "is_cold": False, + "result": { + "output": ret + } + } + +sys.path.append(os.path.join(CODE_LOCATION)) +sys.path.append(os.path.join(CODE_LOCATION, '.python_packages/lib/site-packages/')) +run(host='0.0.0.0', port=int(sys.argv[1]), debug=True) + diff --git a/dockerfiles/local/pypy/time-in-proc.py b/dockerfiles/local/pypy/time-in-proc.py new file mode 100644 index 000000000..962da527a --- /dev/null +++ b/dockerfiles/local/pypy/time-in-proc.py @@ -0,0 +1,59 @@ + +import datetime, json, sys, traceback, csv, resource + +from utils import * +from tools import * + +# imported function +from function import function + + +cfg = json.load(open(sys.argv[1], 'r')) +repetitions = cfg['benchmark']['repetitions'] +disable_gc = cfg['benchmark']['disable_gc'] +input_data = cfg['input'] + +timedata = [0] * repetitions +os_times = [0] * repetitions +try: + start = start_benchmarking(disable_gc) + for i in range(0, repetitions): + begin = datetime.datetime.now() + begin_times = resource.getrusage(resource.RUSAGE_SELF) + res = function.handler(input_data) + end_times = resource.getrusage(resource.RUSAGE_SELF) + stop = datetime.datetime.now() + print(res, file = open( + get_result_prefix(LOGS_DIR, 'output', 'txt'), + 'w' + )) + timedata[i] = [begin, stop] + os_times[i] = [begin_times, end_times] + end = stop_benchmarking() +except Exception as e: + print('Exception caught!') + print(e) + traceback.print_exc() + + +result = get_result_prefix(RESULTS_DIR, cfg['benchmark']['name'], 'csv') +with open(result, 'w') as f: + csv_writer = csv.writer(f) + csv_writer.writerow(['#Seconds from epoch.microseconds; CPU times are in microseconds']) + csv_writer.writerow(['Begin','End','Duration','User','Sys']) + for i in range(0, len(timedata)): + csv_writer.writerow([ + timedata[i][0].strftime('%s.%f'), + timedata[i][1].strftime('%s.%f'), + (timedata[i][1] - timedata[i][0]) / + datetime.timedelta(microseconds=1), + (os_times[i][1].ru_utime - os_times[i][0].ru_utime) * 1e6, + (os_times[i][1].ru_stime - os_times[i][0].ru_stime) * 1e6 + ]) + +experiment_data = {} +experiment_data['repetitions'] = repetitions +experiment_data['timestamps'] = process_timestamps(timedata) +experiment_data['start'] = str(start) +experiment_data['end'] = str(end) +print(json.dumps({'experiment': experiment_data, 'runtime': get_config()}, indent=2)) diff --git a/dockerfiles/local/pypy/timeit.sh b/dockerfiles/local/pypy/timeit.sh new file mode 100755 index 000000000..73e6e5eaf --- /dev/null +++ b/dockerfiles/local/pypy/timeit.sh @@ -0,0 +1,5 @@ +#!/bin/bash +#ts=$(date +%s%N); +export TIMEFORMAT='%3R,%3U,%3S' +time pypy -c "from json import load; from function import function; print(function.handler(load(open('input.json', 'r'))))" > $1 +#tt=$((($(date +%s%N) - $ts)/1000)) ; echo $tt diff --git a/dockerfiles/local/pypy/tools.py b/dockerfiles/local/pypy/tools.py new file mode 100644 index 000000000..0413489e3 --- /dev/null +++ b/dockerfiles/local/pypy/tools.py @@ -0,0 +1,21 @@ + +import datetime, gc, platform, os, sys + +def start_benchmarking(disable_gc): + if disable_gc: + gc.disable() + return datetime.datetime.now() + +def stop_benchmarking(): + end = datetime.datetime.now() + gc.enable() + return end + +def get_config(): + # get currently loaded modules + # https://stackoverflow.com/questions/4858100/how-to-list-imported-modules + modulenames = set(sys.modules) & set(globals()) + allmodules = [sys.modules[name] for name in modulenames] + return {'name': 'pypy', + 'version': platform.python_version(), + 'modules': str(allmodules)} diff --git a/dockerfiles/local/runner.py b/dockerfiles/local/runner.py index 96261fc33..b1d0ca423 100644 --- a/dockerfiles/local/runner.py +++ b/dockerfiles/local/runner.py @@ -4,7 +4,7 @@ from utils import * def get_language(lang): - languages = {'python': 'python3', 'nodejs': 'nodejs'} + languages = {'python': 'python3', 'nodejs': 'nodejs', 'pypy': 'pypy'} return languages[lang] def get_runner(experiment, options=None): diff --git a/dockerfiles/pypy_installer.sh b/dockerfiles/pypy_installer.sh new file mode 100644 index 000000000..3d749ab4e --- /dev/null +++ b/dockerfiles/pypy_installer.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +cd /mnt/function + +#Probably remove this conditional, might break pypy builds, might lead to installation of CPython libraries +PLATFORM_ARG="" +if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]]; then + PLATFORM_ARG="--platform manylinux_2_17_aarch64 --only-binary=:all:" +fi + +if [[ "${TARGET_ARCHITECTURE}" == "arm64" ]] && [[ -f "requirements.txt.arm.${PYTHON_VERSION}" ]]; then + + pip3 -q install ${PLATFORM_ARG} -r requirements.txt.arm.${PYTHON_VERSION} -t .python_packages/lib/site-packages + +elif [[ -f "requirements.txt.${PYTHON_VERSION}" ]]; then + + pip3 -q install ${PLATFORM_ARG} -r requirements.txt.${PYTHON_VERSION} -t .python_packages/lib/site-packages + +else + + pip3 -q install ${PLATFORM_ARG} -r requirements.txt -t .python_packages/lib/site-packages + +fi + +if [[ -f "${SCRIPT_FILE}" ]]; then + /bin/bash ${SCRIPT_FILE} .python_packages/lib/site-packages +fi + + diff --git a/requirements.txt b/requirements.txt index b8c1517f0..821bcf5e8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,4 @@ scipy pycurl>=7.43 click>=7.1.2 rich - +openpyxl diff --git a/sebs.py b/sebs.py index 80fb11ed3..2881f40c6 100755 --- a/sebs.py +++ b/sebs.py @@ -64,7 +64,7 @@ def simplified_common_params(func): @click.option( "--language", default=None, - type=click.Choice(["python", "nodejs"]), + type=click.Choice(["python", "nodejs", "pypy"]), help="Benchmark language", ) @click.option("--language-version", default=None, type=str, help="Benchmark language version") diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 243a6f0f9..bb4fe31a5 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -136,6 +136,7 @@ def package_code( CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], + "pypy": ["handler.py", "requirements.txt", ".python_packages"], "nodejs": ["handler.js", "package.json", "node_modules"], } package_config = CONFIG_FILES[language_name] @@ -173,7 +174,9 @@ def _map_language_runtime(self, language: str, runtime: str): # AWS uses different naming scheme for Node.js versions # For example, it's 12.x instead of 12. if language == "nodejs": - return f"{runtime}.x" + return f"{language}{runtime}.x" + elif language == "python": + return f"{language}{runtime}" return runtime def create_function( @@ -231,6 +234,9 @@ def create_function( create_function_params["PackageType"] = "Image" create_function_params["Code"] = {"ImageUri": container_uri} else: + if language == "pypy": + raise RuntimeError("PyPy Zip deployment is not supported on AWS") + create_function_params["PackageType"] = "Zip" if code_size < 50 * 1024 * 1024: package_body = open(package, "rb").read() @@ -251,9 +257,7 @@ def create_function( "S3Key": code_prefix, } - create_function_params["Runtime"] = "{}{}".format( - language, self._map_language_runtime(language, language_runtime) - ) + create_function_params["Runtime"] = self._map_language_runtime(language, language_runtime) create_function_params["Handler"] = "handler.handler" create_function_params = { @@ -407,6 +411,7 @@ def default_function_name( ) -> str: # Create function name resource_id = resources.resources_id if resources else self.config.resources.resources_id + func_name = "sebs-{}-{}-{}-{}-{}".format( resource_id, code_package.benchmark, diff --git a/sebs/benchmark.py b/sebs/benchmark.py index f159e820c..39b919ef7 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -252,8 +252,9 @@ def hash_directory(directory: str, deployment: str, language: str): FILES = { "python": ["*.py", "requirements.txt*"], "nodejs": ["*.js", "package.json"], + "pypy": ["*.py", "requirements.txt*"], } - WRAPPERS = {"python": "*.py", "nodejs": "*.js"} + WRAPPERS = {"python": "*.py", "nodejs": "*.js", "pypy": "*.py"} NON_LANG_FILES = ["*.sh", "*.json"] selected_files = FILES[language] + NON_LANG_FILES for file_type in selected_files: @@ -316,6 +317,7 @@ def copy_code(self, output_dir): FILES = { "python": ["*.py", "requirements.txt*"], "nodejs": ["*.js", "package.json"], + "pypy": ["*.py", "requirements.txt*"], } path = os.path.join(self.benchmark_path, self.language_name) for file_type in FILES[self.language_name]: @@ -402,7 +404,7 @@ def add_deployment_package_nodejs(self, output_dir): def add_deployment_package(self, output_dir): from sebs.faas.function import Language - if self.language == Language.PYTHON: + if self.language == Language.PYTHON or self.language == Language.PYPY: self.add_deployment_package_python(output_dir) elif self.language == Language.NODEJS: self.add_deployment_package_nodejs(output_dir) @@ -483,7 +485,7 @@ def ensure_image(name: str) -> None: } # run Docker container to install packages - PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json"} + PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json", "pypy": "requirements.txt"} file = os.path.join(output_dir, PACKAGE_FILES[self.language_name]) if os.path.exists(file): try: diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py index 7b940f8df..c8271f2f9 100644 --- a/sebs/experiments/perf_cost.py +++ b/sebs/experiments/perf_cost.py @@ -83,7 +83,8 @@ def run(self): for memory in memory_sizes: self.logging.info(f"Begin experiment on memory size {memory}") self._function.config.memory = memory - self._deployment_client.update_function(self._function, self._benchmark, False, "") + uri = "" if not self._benchmark.container_deployment else self._benchmark.container_uri + self._deployment_client.update_function(self._function, self._benchmark, self._benchmark.container_deployment, uri) self._sebs_client.cache_client.update_function(self._function) self.run_configuration(settings, settings["repetitions"], suffix=str(memory)) diff --git a/sebs/faas/function.py b/sebs/faas/function.py index 0fab7bcf4..ddb5117d0 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -263,6 +263,7 @@ def deserialize(cached_config: dict) -> "Trigger": class Language(Enum): PYTHON = "python" NODEJS = "nodejs" + PYPY = "pypy" # FIXME: 3.7+ python with future annotations @staticmethod @@ -270,7 +271,7 @@ def deserialize(val: str) -> Language: for member in Language: if member.value == val: return member - raise Exception(f"Unknown language type {member}") + raise Exception(f"Unknown language type {val}") class Architecture(Enum): @@ -299,7 +300,7 @@ def serialize(self) -> dict: @staticmethod def deserialize(config: dict) -> Runtime: - languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS} + languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS, "pypy": Language.PYPY} return Runtime(language=languages[config["language"]], version=config["version"]) diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index 56d3b5c41..0ae550be3 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -1,6 +1,8 @@ import json import os from typing import cast, List, Optional, Tuple +import time +from googleapiclient.errors import HttpError from sebs.cache import Cache from sebs.faas.config import Config, Credentials, Resources @@ -108,6 +110,7 @@ def update_cache(self, cache: Cache): class GCPResources(Resources): def __init__(self): super().__init__(name="gcp") + self._container_repository = None @staticmethod def initialize(res: Resources, dct: dict): @@ -120,7 +123,9 @@ def initialize(res: Resources, dct: dict): """ def serialize(self) -> dict: - return super().serialize() + out = super().serialize() + out["container_repository"] = self._container_repository + return out @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": @@ -147,6 +152,61 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resou def update_cache(self, cache: Cache): super().update_cache(cache) + @property + def container_repository(self) -> str: + return self._container_repository + + def check_container_repository_exists(self, config: Config, ar_client): + try: + parent = f"projects/{config.credentials.project_name}/locations/{config.region}" + repo_full_name = f"{parent}/repositories/{self._container_repository}" + self.logging.info("Checking if container repository exists...") + ar_client.projects().locations().repositories().get(name=repo_full_name).execute() + return True + except HttpError as e: + if e.resp.status == 404: + self.logging.error("Container repository does not exist.") + return False + else: + raise e + + def create_container_repository(self, ar_client, parent): + request_body = { + "format": "DOCKER", + "description": "Container repository for SEBS" + } + self._container_repository = f"sebs-benchmarks-{self._resources_id}" + operation = ar_client.projects().locations().repositories().create( + parent=parent, + body=request_body, + repositoryId=self._container_repository + ).execute() + + while True: + # Operations for AR are global or location specific + op_name = operation['name'] + op = ar_client.projects().locations().operations().get(name=op_name).execute() + + if op.get('done'): + if 'error' in op: + raise Exception(f"Failed to create repo: {op['error']}") + self.logging.info("Repository created successfully.") + break + time.sleep(2) + + def get_container_repository(self, config: Config, ar_client): + if self._container_repository is not None: + return self._container_repository + + self._container_repository = f"sebs-benchmarks-{self._resources_id}" + if self.check_container_repository_exists(config, ar_client): + return self._container_repository + + parent = f"projects/{config.credentials.project_name}/locations/{config.region}" + self.create_container_repository(ar_client, parent) + return self._container_repository + + """ FaaS system config defining cloud region (if necessary), credentials and diff --git a/sebs/gcp/container.py b/sebs/gcp/container.py new file mode 100644 index 000000000..9afcc7664 --- /dev/null +++ b/sebs/gcp/container.py @@ -0,0 +1,80 @@ +import docker +from typing import Tuple + +from sebs.gcp.config import GCPConfig +from sebs.config import SeBSConfig +from sebs.faas.container import DockerContainer +from googleapiclient.discovery import build +from google.oauth2 import service_account +from googleapiclient.errors import HttpError +from google.auth.transport.requests import Request + + +class GCRContainer(DockerContainer): + @staticmethod + def name(): + return "gcp" + + @staticmethod + def typename() -> str: + return "GCP.GCRContainer" + + def __init__( + self, + system_config: SeBSConfig, + config: GCPConfig, + docker_client: docker.client.DockerClient, + ): + super().__init__(system_config, docker_client) + self.config = config + self.creds = service_account.Credentials.from_service_account_file(self.config.credentials.gcp_credentials, scopes=["https://www.googleapis.com/auth/cloud-platform"]) + self.ar_client = build("artifactregistry", "v1", credentials=self.creds) + + def registry_name( + self, benchmark: str, language_name: str, language_version: str, architecture: str + ) -> Tuple[str, str, str, str]: + + project_id = self.config.credentials.project_name + region = self.config.region + registry_name = f"{region}-docker.pkg.dev/{project_id}" + repository_name = self.config.resources.get_container_repository(self.config, self.ar_client) + + image_tag = self.system_config.benchmark_image_tag( + self.name(), benchmark, language_name, language_version, architecture + ) + image_uri = f"{registry_name}/{repository_name}/{benchmark}:{image_tag}" + + return registry_name, repository_name, image_tag, image_uri + + def find_image(self, repository_name, image_tag) -> bool: + try: + response = self.ar_client.projects().locations().repositories().dockerImages().list( + parent=f"projects/{self.config.credentials.project_id}/locations/{self.config.region}/repositories/{repository_name}" + ) + if "dockerImages" in response: + for image in response["dockerImages"]: + if "latest" in image["tags"] and image_tag in image["tags"]: + return True + except HttpError as e: + if (e.content.code == 404): + return False + raise e + return False + + def push_image(self, repository_uri, image_tag): + self.logging.info("Authenticating Docker against Artifact Registry...") + self.creds.refresh(Request()) + auth_token = self.creds.token + + try: + self.docker_client.login( + username="oauth2accesstoken", + password=auth_token, + registry=repository_uri + ) + super().push_image(repository_uri, image_tag) + self.logging.info(f"Successfully pushed the image to registry {repository_uri}.") + except docker.errors.DockerException as e: + self.logging.error(f"Failed to push the image to registry {repository_uri}.") + self.logging.error(f"Error: {str(e)}") + raise RuntimeError("Couldn't push to registry.") diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 6525034c2..73db95453 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -23,6 +23,7 @@ from sebs.gcp.resources import GCPSystemResources from sebs.gcp.storage import GCPStorage from sebs.gcp.function import GCPFunction +from sebs.gcp.container import GCRContainer from sebs.utils import LoggingHandlers """ @@ -77,10 +78,14 @@ def function_type() -> "Type[Function]": :param config: systems-specific parameters """ - def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): self.function_client = build("cloudfunctions", "v1", cache_discovery=False) + # Container-based functions are created via run-client + self.run_client = build("run", "v2", cache_discovery=False) self.initialize_resources(select_prefix=resource_prefix) + self.gcr_client = GCRContainer( + self.system_config, self.config, self.docker_client + ) def get_function_client(self): return self.function_client @@ -90,13 +95,14 @@ def default_function_name( ) -> str: # Create function name resource_id = resources.resources_id if resources else self.config.resources.resources_id - func_name = "sebs-{}-{}-{}-{}".format( + func_name = "sebs-{}-{}-{}-{}-{}".format( resource_id, code_package.benchmark, code_package.language_name, code_package.language_version, + code_package.architecture ) - return GCP.format_function_name(func_name) + return GCP.format_function_name(func_name) if not code_package.container_deployment else func_name.replace(".", "-") @staticmethod def format_function_name(func_name: str) -> str: @@ -106,6 +112,10 @@ def format_function_name(func_name: str) -> str: func_name = func_name.replace(".", "_") return func_name + @staticmethod + def is_service_function(full_function_name: str): + return "/services/" in full_function_name + """ Apply the system-specific code packaging routine to build benchmark. The benchmark creates a code directory with the following structure: @@ -133,17 +143,22 @@ def package_code( ) -> Tuple[str, int, str]: container_uri = "" - + if container_deployment: - raise NotImplementedError("Container Deployment is not supported in GCP") + # build base image and upload to GCR + _, container_uri = self.gcr_client.build_base_image( + directory, language_name, language_version, architecture, benchmark, is_cached + ) CONFIG_FILES = { "python": ["handler.py", ".python_packages"], "nodejs": ["handler.js", "node_modules"], + "pypy" : ["handler.py", ".python_packages"] } HANDLER = { "python": ("handler.py", "main.py"), "nodejs": ("handler.js", "index.js"), + "pypy": ("handler.py", "main.py"), } package_config = CONFIG_FILES[language_name] function_dir = os.path.join(directory, "function") @@ -154,10 +169,11 @@ def package_code( shutil.move(file, function_dir) # rename handler function.py since in gcp it has to be caled main.py - old_name, new_name = HANDLER[language_name] - old_path = os.path.join(directory, old_name) - new_path = os.path.join(directory, new_name) - shutil.move(old_path, new_path) + if not container_deployment: + old_name, new_name = HANDLER[language_name] + old_path = os.path.join(directory, old_name) + new_path = os.path.join(directory, new_name) + shutil.move(old_path, new_path) """ zip the whole directory (the zip-file gets uploaded to gcp later) @@ -179,7 +195,8 @@ def package_code( logging.info("Zip archive size {:2f} MB".format(mbytes)) # rename the main.py back to handler.py - shutil.move(new_path, old_path) + if not container_deployment: + shutil.move(new_path, old_path) return os.path.join(directory, "{}.zip".format(benchmark)), bytes_size, container_uri @@ -191,8 +208,6 @@ def create_function( container_uri: str, ) -> "GCPFunction": - if container_deployment: - raise NotImplementedError("Container deployment is not supported in GCP") package = code_package.code_location benchmark = code_package.benchmark @@ -206,16 +221,26 @@ def create_function( function_cfg = FunctionConfig.from_benchmark(code_package) architecture = function_cfg.architecture.value - code_package_name = cast(str, os.path.basename(package)) - code_package_name = f"{architecture}-{code_package_name}" - code_bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) - code_prefix = os.path.join(benchmark, code_package_name) - storage_client.upload(code_bucket, package, code_prefix) + if architecture == "arm64" and not container_deployment: + raise RuntimeError("GCP does not support arm64 for non-container deployments") + - self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) + if container_deployment: + full_service_name = GCP.get_full_service_name(project_name, location, func_name) + get_req = self.run_client.projects().locations().services().get(name=full_service_name) + else: + if code_package.language_name == "pypy": + raise RuntimeError("PyPy Zip deployment is not supported on GCP") - full_func_name = GCP.get_full_function_name(project_name, location, func_name) - get_req = self.function_client.projects().locations().functions().get(name=full_func_name) + full_func_name = GCP.get_full_function_name(project_name, location, func_name) + code_package_name = cast(str, os.path.basename(package)) + code_package_name = f"{architecture}-{code_package_name}" + code_bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) + code_prefix = os.path.join(benchmark, code_package_name) + storage_client.upload(code_bucket, package, code_prefix) + + self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) + get_req = self.function_client.projects().locations().functions().get(name=full_func_name) try: get_req.execute() @@ -223,45 +248,95 @@ def create_function( envs = self._generate_function_envs(code_package) - create_req = ( - self.function_client.projects() - .locations() - .functions() - .create( - location="projects/{project_name}/locations/{location}".format( - project_name=project_name, location=location - ), - body={ - "name": full_func_name, - "entryPoint": "handler", - "runtime": code_package.language_name + language_runtime.replace(".", ""), - "availableMemoryMb": memory, - "timeout": str(timeout) + "s", - "httpsTrigger": {}, - "ingressSettings": "ALLOW_ALL", - "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_prefix, - "environmentVariables": envs, - }, + if container_deployment: + # In the service model, envs is a list of objects with attributes name and value + envs = self._transform_service_envs(envs) + self.logging.info("Deploying run container service") + parent = f"projects/{project_name}/locations/{location}" + create_req = ( + self.run_client.projects() + .locations() + .services() + .create( + parent=parent, + serviceId=func_name, + body={ + "template": { + "containers": [ + { + "image": container_uri, + "ports": [{"containerPort": 8080}], + "env": envs, + "resources": { + "limits": { + "memory": f"{memory if memory >= 512 else 512}Mi", + } + } + } + ], + "timeout": f"{timeout}s", + }, + "ingress": "INGRESS_TRAFFIC_ALL" + }, + ) + ) + else: + create_req = ( + self.function_client.projects() + .locations() + .functions() + .create( + location="projects/{project_name}/locations/{location}".format( + project_name=project_name, location=location + ), + body={ + "name": full_func_name, + "entryPoint": "handler", + "runtime": code_package.language_name + language_runtime.replace(".", ""), + "availableMemoryMb": memory, + "timeout": str(timeout) + "s", + "httpsTrigger": {}, + "ingressSettings": "ALLOW_ALL", + "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_prefix, + "environmentVariables": envs, + }, + ) ) - ) create_req.execute() self.logging.info(f"Function {func_name} has been created!") - allow_unauthenticated_req = ( - self.function_client.projects() - .locations() - .functions() - .setIamPolicy( - resource=full_func_name, - body={ - "policy": { - "bindings": [ - {"role": "roles/cloudfunctions.invoker", "members": ["allUsers"]} - ] - } - }, + if container_deployment: + allow_unauthenticated_req = ( + self.run_client.projects() + .locations() + .services() + .setIamPolicy( + resource=full_service_name, + body={ + "policy": { + "bindings": [ + {"role": "roles/run.invoker", "members": ["allUsers"]} + ] + } + }, + ) + ) + else: + allow_unauthenticated_req = ( + self.function_client.projects() + .locations() + .functions() + .setIamPolicy( + resource=full_func_name, + body={ + "policy": { + "bindings": [ + {"role": "roles/cloudfunctions.invoker", "members": ["allUsers"]} + ] + } + }, + ) ) - ) # Avoid infinite loop MAX_RETRIES = 5 @@ -317,25 +392,50 @@ def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) location = self.config.region project_name = self.config.project_name - full_func_name = GCP.get_full_function_name(project_name, location, function.name) self.logging.info(f"Function {function.name} - waiting for deployment...") - our_function_req = ( - self.function_client.projects().locations().functions().get(name=full_func_name) - ) - deployed = False - begin = time.time() - while not deployed: - status_res = our_function_req.execute() - if status_res["status"] == "ACTIVE": - deployed = True - else: - time.sleep(3) - if time.time() - begin > 300: # wait 5 minutes; TODO: make it configurable - self.logging.error(f"Failed to deploy function: {function.name}") - raise RuntimeError("Deployment timeout!") - self.logging.info(f"Function {function.name} - deployed!") - invoke_url = status_res["httpsTrigger"]["url"] - + + # Cloud Functions v1 do not have "-" in their name, Cloud Run Services do + if "-" in function.name: + # Cloud Run Service + service_id = function.name.lower() + full_service_name = GCP.get_full_service_name(project_name, self.config.region, service_id) + self.logging.info(f"Waiting for service {full_service_name} to be ready...") + deployed = False + begin = time.time() + while not deployed: + svc = self.run_client.projects().locations().services().get(name=full_service_name).execute() + condition = svc.get("terminalCondition", {}) + if condition.get("type") == "Ready" and condition.get("state") == "CONDITION_SUCCEEDED": + deployed = True + else: + time.sleep(3) + + if time.time() - begin > 300: + self.logging.error(f"Failed to deploy service: {function.name}") + raise RuntimeError("Deployment timeout!") + + self.logging.info(f"Service {function.name} - deployed!") + invoke_url = svc["uri"] + + else: + full_func_name = GCP.get_full_function_name(project_name, location, function.name) + our_function_req = ( + self.function_client.projects().locations().functions().get(name=full_func_name) + ) + deployed = False + begin = time.time() + while not deployed: + status_res = our_function_req.execute() + if status_res["status"] == "ACTIVE": + deployed = True + else: + time.sleep(3) + if time.time() - begin > 300: # wait 5 minutes; TODO: make it configurable + self.logging.error(f"Failed to deploy function: {function.name}") + raise RuntimeError("Deployment timeout!") + self.logging.info(f"Function {function.name} - deployed!") + invoke_url = status_res["httpsTrigger"]["url"] + trigger = HTTPTrigger(invoke_url) else: raise RuntimeError("Not supported!") @@ -363,9 +463,6 @@ def update_function( container_uri: str, ): - if container_deployment: - raise NotImplementedError("Container deployment is not supported in GCP") - function = cast(GCPFunction, function) language_runtime = code_package.language_version @@ -379,60 +476,122 @@ def update_function( storage.upload(bucket, code_package.code_location, code_package_name) envs = self._generate_function_envs(code_package) - - self.logging.info(f"Uploaded new code package to {bucket}/{code_package_name}") - full_func_name = GCP.get_full_function_name( - self.config.project_name, self.config.region, function.name - ) - req = ( - self.function_client.projects() - .locations() - .functions() - .patch( - name=full_func_name, - body={ - "name": full_func_name, - "entryPoint": "handler", - "runtime": code_package.language_name + language_runtime.replace(".", ""), - "availableMemoryMb": function.config.memory, - "timeout": str(function.config.timeout) + "s", - "httpsTrigger": {}, - "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, - "environmentVariables": envs, - }, + + if container_deployment: + full_service_name = GCP.get_full_service_name(self.config.project_name, self.config.region, function.name) + + memory = function.config.memory + timeout = function.config.timeout + + # Cloud Run v2 Service Update + service_body = { + "template": { + "maxInstanceRequestConcurrency" : 1, + "containers": [ + { + "image": container_uri, + "resources": { + "limits": { + "memory": f"{memory if memory >= 512 else 512}Mi", + } + }, + "env": [{"name": k, "value": v} for k, v in envs.items()] + } + ], + "timeout": f"{timeout}s" + } + } + + req = self.run_client.projects().locations().services().patch( + name=full_service_name, + body=service_body ) - ) - res = req.execute() - versionId = res["metadata"]["versionId"] - retries = 0 - last_version = -1 - while retries < 100: - is_deployed, last_version = self.is_deployed(function.name, versionId) - if not is_deployed: - time.sleep(5) - retries += 1 - else: - break - if retries > 0 and retries % 10 == 0: - self.logging.info(f"Waiting for function deployment, {retries} retries.") - if retries == 100: - raise RuntimeError( - "Failed to publish new function code after 10 attempts. " - f"Version {versionId} has not been published, last version {last_version}." + + else: + + self.logging.info(f"Uploaded new code package to {bucket}/{code_package_name}") + full_func_name = GCP.get_full_function_name( + self.config.project_name, self.config.region, function.name + ) + req = ( + self.function_client.projects() + .locations() + .functions() + .patch( + name=full_func_name, + body={ + "name": full_func_name, + "entryPoint": "handler", + "runtime": code_package.language_name + language_runtime.replace(".", ""), + "availableMemoryMb": function.config.memory, + "timeout": str(function.config.timeout) + "s", + "httpsTrigger": {}, + "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, + "environmentVariables": envs, + }, + ) ) - self.logging.info("Published new function code and configuration.") + + res = req.execute() + + if container_deployment: + self.logging.info(f"Updated Cloud Run service {function.name}, waiting for operation completion...") + + op_name = res["name"] + op_res = self.run_client.projects().locations().operations().wait(name=op_name).execute() + + if "error" in op_res: + raise RuntimeError(f"Cloud Run update failed: {op_res['error']}") + + self.logging.info(f"Cloud Run service {function.name} updated and ready.") + + else: + versionId = res["metadata"]["versionId"] + retries = 0 + last_version = -1 + while retries < 100: + is_deployed, last_version = self.is_deployed(function.name, versionId) + if not is_deployed: + time.sleep(5) + retries += 1 + else: + break + if retries > 0 and retries % 10 == 0: + self.logging.info(f"Waiting for function deployment, {retries} retries.") + if retries == 100: + raise RuntimeError( + "Failed to publish new function code after 10 attempts. " + f"Version {versionId} has not been published, last version {last_version}." + ) + self.logging.info("Published new function code and configuration.") def _update_envs(self, full_function_name: str, envs: dict) -> dict: - get_req = ( - self.function_client.projects().locations().functions().get(name=full_function_name) - ) - response = get_req.execute() + if GCP.is_service_function(full_function_name): + # Envs are in template.containers[0].env (list of {name, value}) + get_req = self.run_client.projects().locations().services().get(name=full_function_name) + response = get_req.execute() + + existing_envs = {} + if "template" in response and "containers" in response["template"]: + container = response["template"]["containers"][0] + if "env" in container: + for e in container["env"]: + existing_envs[e["name"]] = e["value"] + + # Merge: new overrides old + envs = {**existing_envs, **envs} + + else: + get_req = ( + self.function_client.projects().locations().functions().get(name=full_function_name) + ) + response = get_req.execute() - # preserve old variables while adding new ones. - # but for conflict, we select the new one - if "environmentVariables" in response: - envs = {**response["environmentVariables"], **envs} + # preserve old variables while adding new ones. + # but for conflict, we select the new one + if "environmentVariables" in response: + envs = {**response["environmentVariables"], **envs} return envs @@ -450,6 +609,10 @@ def _generate_function_envs(self, code_package: Benchmark) -> dict: return envs + + def _transform_service_envs(self, envs: dict) -> list: + return [{"name": k, "value": v} for k, v in envs.items()] + def update_function_configuration( self, function: Function, code_package: Benchmark, env_variables: dict = {} ): @@ -457,9 +620,16 @@ def update_function_configuration( assert code_package.has_input_processed function = cast(GCPFunction, function) - full_func_name = GCP.get_full_function_name( - self.config.project_name, self.config.region, function.name - ) + if code_package.container_deployment: + full_func_name = GCP.get_full_service_name( + self.config.project_name, + self.config.region, + function.name.replace("_", "-").lower() + ) + else: + full_func_name = GCP.get_full_function_name( + self.config.project_name, self.config.region, function.name + ) envs = self._generate_function_envs(code_package) envs = {**envs, **env_variables} @@ -468,7 +638,47 @@ def update_function_configuration( if len(envs) > 0: envs = self._update_envs(full_func_name, envs) - if len(envs) > 0: + if GCP.is_service_function(full_func_name): + # Cloud Run Configuration Update + + # Prepare envs list + env_vars = [{"name": k, "value": v} for k, v in envs.items()] + memory = function.config.memory + timeout = function.config.timeout + + service_body = { + "template": { + "maxInstanceRequestConcurrency" : 1, + "containers": [ + { + "image": code_package.container_uri, + "resources": { + "limits": { + "memory": f"{memory if memory > 512 else 512}Mi", + } + }, + "env": env_vars + } + ], + "timeout": f"{timeout}s" + } + } + + req = self.run_client.projects().locations().services().patch( + name=full_func_name, + body=service_body + ) + res = req.execute() + + self.logging.info(f"Updated Cloud Run configuration {function.name}, waiting for operation...") + op_name = res["name"] + op_res = self.run_client.projects().locations().operations().wait(name=op_name).execute() + if "error" in op_res: + raise RuntimeError(f"Cloud Run config update failed: {op_res['error']}") + + return 0 + + elif len(envs) > 0: req = ( self.function_client.projects() @@ -527,6 +737,10 @@ def update_function_configuration( def get_full_function_name(project_name: str, location: str, func_name: str): return f"projects/{project_name}/locations/{location}/functions/{func_name}" + @staticmethod + def get_full_service_name(project_name: str, location: str, service_name: str): + return f"projects/{project_name}/locations/{location}/services/{service_name}" + def prepare_experiment(self, benchmark): logs_bucket = self._system_resources.get_storage().add_output_bucket( benchmark, suffix="logs" @@ -720,14 +934,31 @@ def get_functions(self, code_package: Benchmark, function_names: List[str]) -> L return functions def is_deployed(self, func_name: str, versionId: int = -1) -> Tuple[bool, int]: - name = GCP.get_full_function_name(self.config.project_name, self.config.region, func_name) - function_client = self.get_function_client() - status_req = function_client.projects().locations().functions().get(name=name) - status_res = status_req.execute() - if versionId == -1: - return (status_res["status"] == "ACTIVE", status_res["versionId"]) + + #v1 functions don't allow hyphens, new functions don't allow underscores + if "pypy" in func_name or '-' in func_name: + # Cloud Run Service + service_name = func_name.replace("_", "-").lower() + name = GCP.get_full_service_name(self.config.project_name, self.config.region, service_name) + try: + svc = self.run_client.projects().locations().services().get(name=name).execute() + conditions = svc.get("terminalCondition", {}) + is_ready = conditions.get("type", "") == "Ready" + return (is_ready, 0) + except HttpError: + return (False, -1) else: - return (status_res["versionId"] == versionId, status_res["versionId"]) + name = GCP.get_full_function_name(self.config.project_name, self.config.region, func_name) + try: + function_client = self.get_function_client() + status_req = function_client.projects().locations().functions().get(name=name) + status_res = status_req.execute() + if versionId == -1: + return (status_res["status"] == "ACTIVE", status_res["versionId"]) + else: + return (status_res["versionId"] == versionId, status_res["versionId"]) + except HttpError: + return (False, -1) def deployment_version(self, func: Function) -> int: name = GCP.get_full_function_name(self.config.project_name, self.config.region, func.name) diff --git a/sebs/local/local.py b/sebs/local/local.py index 32b9f9ffb..1f40016aa 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -124,6 +124,7 @@ def package_code( CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], "nodejs": ["handler.js", "package.json", "node_modules"], + "pypy": ["handler.py", "requirements.txt", ".python_packages"], } package_config = CONFIG_FILES[language_name] function_dir = os.path.join(directory, "function") diff --git a/tools/build_docker_images.py b/tools/build_docker_images.py index 5336fb485..6d6911aee 100755 --- a/tools/build_docker_images.py +++ b/tools/build_docker_images.py @@ -13,7 +13,7 @@ "--deployment", default=None, choices=["local", "aws", "azure", "gcp"], action="store" ) parser.add_argument("--type", default=None, choices=["build", "run", "manage"], action="store") -parser.add_argument("--language", default=None, choices=["python", "nodejs"], action="store") +parser.add_argument("--language", default=None, choices=["python", "nodejs", "pypy"], action="store") parser.add_argument("--language-version", default=None, type=str, action="store") args = parser.parse_args() config = json.load(open(os.path.join(PROJECT_DIR, "config", "systems.json"), "r"))