IEEE-Ege · MelisCakan · Apr 2, 2025 · Apr 2, 2025 · Apr 2, 2025 · Apr 2, 2025
diff --git a/src/modules/DEMO_save_result.ipynb b/src/modules/DEMO_save_result.ipynb
@@ -0,0 +1,119 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "efb8299d-379f-4e0b-aaca-e342e64226ff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import scanpy as sc\n",
+    "import os\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "da42d05f-266b-4610-9fcf-33c1006f24c7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "61f523d945954c429e3152c8b60686bf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0.00/5.58M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Saving AnnData object to: pbmc3k_processed.h5ad\n",
+      "AnnData object saved successfully to: pbmc3k_processed.h5ad\n",
+      "\n",
+      "Verification: The file 'pbmc3k_processed.h5ad' has been created.\n"
+     ]
+    }
+   ],
+   "source": [
+    "def demo_save_result():\n",
+    "    \"\"\"Demo function to save an AnnData object to a file.\"\"\"\n",
+    "    # Load a sample dataset (or use the one preprocessed in the previous demo)\n",
+    "    try:\n",
+    "        adata = sc.datasets.pbmc3k()\n",
+    "        data_normalization_for_demo(adata) # Apply minimal normalization for the demo\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error loading sample dataset: {e}\")\n",
+    "        print(\"Using a dummy AnnData object for the save demo.\")\n",
+    "        import numpy as np\n",
+    "        dummy_matrix = np.array([[1, 2], [3, 4]])\n",
+    "        dummy_var = ['gene1', 'gene2']\n",
+    "        dummy_obs = ['cell1', 'cell2']\n",
+    "        adata = sc.AnnData(dummy_matrix, obs={'obs_names': dummy_obs}, var={'var_names': dummy_var})\n",
+    "        adata.var_names = dummy_var\n",
+    "        adata.obs_names = dummy_obs\n",
+    "\n",
+    "    # Define the result file name\n",
+    "    result_file = \"pbmc3k_processed.h5ad\"  # .h5ad is the recommended format for AnnData\n",
+    "\n",
+    "    print(f\"Saving AnnData object to: {result_file}\")\n",
+    "    save_result(adata, result_file)\n",
+    "    print(f\"AnnData object saved successfully to: {result_file}\")\n",
+    "\n",
+    "    # Verify if the file was created (optional)\n",
+    "    if os.path.exists(result_file):\n",
+    "        print(f\"\\nVerification: The file '{result_file}' has been created.\")\n",
+    "    else:\n",
+    "        print(f\"\\nVerification: The file '{result_file}' was NOT created.\")\n",
+    "\n",
+    "    # Clean up the created file (optional)\n",
+    "    # os.remove(result_file)\n",
+    "    # print(f\"Cleaned up: Removed '{result_file}'\")\n",
+    "\n",
+    "def save_result(adata, result_file):\n",
+    "    \"\"\"Saves the AnnData object to the specified file.\"\"\"\n",
+    "    try:\n",
+    "        adata.write_h5ad(result_file) # Use .write_h5ad for the recommended format\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error during saving: {e}\")\n",
+    "\n",
+    "def data_normalization_for_demo(adata, target_sum=1e4):\n",
+    "    \"\"\"Simplified normalization for the save demo.\"\"\"\n",
+    "    sc.pp.normalize_total(adata, target_sum=target_sum)\n",
+    "    sc.pp.log1p(adata)\n",
+    "\n",
+    "# Run the demo\n",
+    "demo_save_result()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/src/modules/preprocessing.py b/src/modules/preprocessing.py
@@ -0,0 +1,22 @@
+import scanpy as sc
+
+def normalize_and_scale(adata, target_sum, max_value):
+    sc.pp.normalize_total(adata, target_sum)
+    sc.pp.log1p(adata)
+    sc.pp.scale(adata, max_value)
+
+    vars_to_check = ["total_counts", "pct_counts_mt"]
+    try:
+        for var in vars_to_check: #checking for the var names if adata has them
+            if var not in adata.obs.columns:
+                raise ValueError(f"{var} is not found in adata.var_names")
+
-                raise ValueError(f"{var} is not found in adata.var_names")
-            
+                raise ValueError(f"{var} is not found in adata.obs.columns")
-                raise ValueError(f"{var} is not found in adata.var_names")
-            
+                raise ValueError(f"{var} is not found in adata.obs.columns")
+        sc.pp.regress_out(adata, ['total_counts', 'pct_counts_mt']) #if adata has the parameters do regress out
+    except Exception as e:
+        print(f"An error has occurred: {e}") 
+
+
+def preprocess(adata, target_sum = 1e4, max_value = 10):
+    normalize_and_scale(adata, target_sum, max_value)
+    #other preprocessing methods here
+    return adata
diff --git a/src/modules/save.py b/src/modules/save.py
@@ -0,0 +1,2 @@
+def save_result(adata, result_file):
+    adata.write(result_file)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		def save_result(adata, result_file):
		adata.write(result_file)