diff --git a/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb b/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb index 45e9a58..d28c297 100644 --- a/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb +++ b/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb @@ -48,6 +48,12 @@ "colab": { "base_uri": "https://localhost:8080/" }, + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.128408Z", + "iopub.status.busy": "2026-06-16T21:19:51.128158Z", + "iopub.status.idle": "2026-06-16T21:19:51.144459Z", + "shell.execute_reply": "2026-06-16T21:19:51.143716Z" + }, "id": "lxgAfuxcdftS", "outputId": "36339d6b-f7a8-4686-911a-60642a8adbe6" }, @@ -56,7 +62,7 @@ "%%bash\n", "\n", "pip install haystack-ai\n", - "pip install langdetect" + "pip install langdetect-haystack" ] }, { @@ -74,24 +80,21 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.166273Z", + "iopub.status.busy": "2026-06-16T21:19:51.166129Z", + "iopub.status.idle": "2026-06-16T21:19:51.329280Z", + "shell.execute_reply": "2026-06-16T21:19:51.328723Z" + }, "id": "mN2fFuWWP_8D" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages/pypdf/_crypt_providers/_cryptography.py:32: CryptographyDeprecationWarning: ARC4 has been moved to cryptography.hazmat.decrepit.ciphers.algorithms.ARC4 and will be removed from this module in 48.0.0.\n", - " from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4\n" - ] - } - ], + "outputs": [], "source": [ "from haystack import Document, Pipeline\n", "from haystack.document_stores.in_memory import InMemoryDocumentStore\n", - "from haystack.components.classifiers import DocumentLanguageClassifier\n", + "from haystack_integrations.components.classifiers.langdetect import DocumentLanguageClassifier\n", "from haystack.components.routers import MetadataRouter\n", "from haystack.components.writers import DocumentWriter\n", "\n", @@ -130,8 +133,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": { + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.330558Z", + "iopub.status.busy": "2026-06-16T21:19:51.330447Z", + "iopub.status.idle": "2026-06-16T21:19:51.332184Z", + "shell.execute_reply": "2026-06-16T21:19:51.331870Z" + }, "id": "rfC1ZCigQJgI" }, "outputs": [], @@ -156,8 +165,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": { + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.333205Z", + "iopub.status.busy": "2026-06-16T21:19:51.333149Z", + "iopub.status.idle": "2026-06-16T21:19:51.334886Z", + "shell.execute_reply": "2026-06-16T21:19:51.334585Z" + }, "id": "FlqGdbuxQNKk" }, "outputs": [], @@ -173,8 +188,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": { + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.335792Z", + "iopub.status.busy": "2026-06-16T21:19:51.335727Z", + "iopub.status.idle": "2026-06-16T21:19:51.337496Z", + "shell.execute_reply": "2026-06-16T21:19:51.337202Z" + }, "id": "FEw5pfmBQRBB" }, "outputs": [], @@ -195,15 +216,21 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.338481Z", + "iopub.status.busy": "2026-06-16T21:19:51.338396Z", + "iopub.status.idle": "2026-06-16T21:19:51.341400Z", + "shell.execute_reply": "2026-06-16T21:19:51.341041Z" + }, "id": "BdvO_fEfcVAY" }, "outputs": [ { "data": { "text/plain": [ - "\n", + "\n", "🚅 Components\n", " - language_classifier: DocumentLanguageClassifier\n", " - router: MetadataRouter\n", @@ -211,13 +238,13 @@ " - fr_writer: DocumentWriter\n", " - es_writer: DocumentWriter\n", "🛤️ Connections\n", - " - language_classifier.documents -> router.documents (List[Document])\n", - " - router.en -> en_writer.documents (List[Document])\n", - " - router.fr -> fr_writer.documents (List[Document])\n", - " - router.es -> es_writer.documents (List[Document])" + " - language_classifier.documents -> router.documents (list[Document])\n", + " - router.en -> en_writer.documents (list[Document])\n", + " - router.fr -> fr_writer.documents (list[Document])\n", + " - router.es -> es_writer.documents (list[Document])" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -248,8 +275,14 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": { + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.342424Z", + "iopub.status.busy": "2026-06-16T21:19:51.342369Z", + "iopub.status.idle": "2026-06-16T21:19:51.343688Z", + "shell.execute_reply": "2026-06-16T21:19:51.343414Z" + }, "id": "598ZTa7RzNeR" }, "outputs": [], @@ -268,11 +301,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.344538Z", + "iopub.status.busy": "2026-06-16T21:19:51.344465Z", + "iopub.status.idle": "2026-06-16T21:19:51.493556Z", + "shell.execute_reply": "2026-06-16T21:19:51.493166Z" + }, "id": "lE5XE8cPXN5-", "outputId": "43017d9b-65f8-48ad-dadb-66ad0de3af43" }, @@ -282,11 +321,11 @@ "text/plain": [ "{'router': {'unmatched': []},\n", " 'en_writer': {'documents_written': 2},\n", - " 'fr_writer': {'documents_written': 3},\n", - " 'es_writer': {'documents_written': 2}}" + " 'es_writer': {'documents_written': 2},\n", + " 'fr_writer': {'documents_written': 3}}" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -308,11 +347,17 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.494811Z", + "iopub.status.busy": "2026-06-16T21:19:51.494736Z", + "iopub.status.idle": "2026-06-16T21:19:51.496600Z", + "shell.execute_reply": "2026-06-16T21:19:51.496308Z" + }, "id": "LNHzxz52uxZV", "outputId": "d0459677-73c0-4bb6-f5d3-87c0c00b1552" }, @@ -348,11 +393,17 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.497770Z", + "iopub.status.busy": "2026-06-16T21:19:51.497712Z", + "iopub.status.idle": "2026-06-16T21:19:51.499323Z", + "shell.execute_reply": "2026-06-16T21:19:51.498958Z" + }, "id": "hVJaARodWezy", "outputId": "d9bdcb42-bd50-4fd9-f4d8-a69e8b4b64f8" }, @@ -378,8 +429,14 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": { + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.500219Z", + "iopub.status.busy": "2026-06-16T21:19:51.500162Z", + "iopub.status.idle": "2026-06-16T21:19:51.664781Z", + "shell.execute_reply": "2026-06-16T21:19:51.664318Z" + }, "id": "CN1N2sn1yUVx" }, "outputs": [], @@ -389,7 +446,7 @@ "from haystack.components.builders import ChatPromptBuilder\n", "from haystack.components.generators.chat import OpenAIChatGenerator\n", "from haystack.dataclasses import ChatMessage\n", - "from haystack.components.routers import TextLanguageRouter\n", + "from haystack_integrations.components.routers.langdetect import TextLanguageRouter\n", "\n", "prompt_template = [\n", " ChatMessage.from_user(\n", @@ -427,15 +484,28 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "metadata": { + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.666081Z", + "iopub.status.busy": "2026-06-16T21:19:51.665964Z", + "iopub.status.idle": "2026-06-16T21:19:51.691427Z", + "shell.execute_reply": "2026-06-16T21:19:51.691047Z" + }, "id": "BN1Hr_BjWKcl" }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ChatPromptBuilder has 2 prompt variables, but `required_variables` is not set. By default, all prompt variables are treated as optional, which may lead to unintended behavior in multi-branch pipelines. To avoid unexpected execution, ensure that variables intended to be required are explicitly set in `required_variables`.\n" + ] + }, { "data": { "text/plain": [ - "\n", + "\n", "🚅 Components\n", " - router: TextLanguageRouter\n", " - en_retriever: InMemoryBM25Retriever\n", @@ -448,14 +518,14 @@ " - router.en -> en_retriever.query (str)\n", " - router.fr -> fr_retriever.query (str)\n", " - router.es -> es_retriever.query (str)\n", - " - en_retriever.documents -> joiner.documents (List[Document])\n", - " - fr_retriever.documents -> joiner.documents (List[Document])\n", - " - es_retriever.documents -> joiner.documents (List[Document])\n", - " - joiner.documents -> prompt_builder.documents (List[Document])\n", - " - prompt_builder.prompt -> llm.messages (List[ChatMessage])" + " - en_retriever.documents -> joiner.documents (list[Document])\n", + " - fr_retriever.documents -> joiner.documents (list[Document])\n", + " - es_retriever.documents -> joiner.documents (list[Document])\n", + " - joiner.documents -> prompt_builder.documents (list[Document])\n", + " - prompt_builder.prompt -> llm.messages (list[ChatMessage])" ] }, - "execution_count": 13, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -492,12 +562,18 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000 }, + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.692547Z", + "iopub.status.busy": "2026-06-16T21:19:51.692485Z", + "iopub.status.idle": "2026-06-16T21:19:51.693914Z", + "shell.execute_reply": "2026-06-16T21:19:51.693560Z" + }, "id": "HAFTD5nt1L9a", "outputId": "90cbf82b-8fe5-439d-b099-08510e1c1098" }, @@ -517,7 +593,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -536,6 +612,12 @@ "f403167cb47840a3b0c796ae4c304401" ] }, + "execution": { + "iopub.execute_input": "2026-06-16T21:19:51.694836Z", + "iopub.status.busy": "2026-06-16T21:19:51.694773Z", + "iopub.status.idle": "2026-06-16T21:19:54.937005Z", + "shell.execute_reply": "2026-06-16T21:19:54.936250Z" + }, "id": "wj24fjXN0l6v", "outputId": "3c1eed33-c31c-4b72-bcda-fdd64744560b" }, @@ -548,11 +630,17 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, + "execution": { + "iopub.execute_input": "2026-06-16T21:19:54.939628Z", + "iopub.status.busy": "2026-06-16T21:19:54.939459Z", + "iopub.status.idle": "2026-06-16T21:19:54.942140Z", + "shell.execute_reply": "2026-06-16T21:19:54.941523Z" + }, "id": "i-2P5oqMeUmC", "outputId": "8151923f-bbb1-4e6a-fe4e-08c0d7cfcd49" }, @@ -561,7 +649,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Yes, the apartment is conveniently located.\n" + "Yes. Both reviews say the apartment is very central and conveniently located.\n" ] } ], @@ -580,7 +668,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -599,6 +687,12 @@ "7b62bd3498bb49ec9e1db68ca088e7ae" ] }, + "execution": { + "iopub.execute_input": "2026-06-16T21:19:54.943957Z", + "iopub.status.busy": "2026-06-16T21:19:54.943828Z", + "iopub.status.idle": "2026-06-16T21:19:57.283912Z", + "shell.execute_reply": "2026-06-16T21:19:57.282733Z" + }, "id": "B4_Be1bs1jxJ", "outputId": "0b96cf29-d633-4c9b-f54c-a785e1c2cbe4" }, @@ -611,11 +705,17 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, + "execution": { + "iopub.execute_input": "2026-06-16T21:19:57.286526Z", + "iopub.status.busy": "2026-06-16T21:19:57.286349Z", + "iopub.status.idle": "2026-06-16T21:19:57.289175Z", + "shell.execute_reply": "2026-06-16T21:19:57.288628Z" + }, "id": "w_1wibY61sjk", "outputId": "54f7506e-9af1-42b8-c0c9-cd13fb4cd9eb" }, @@ -624,7 +724,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Sí, el desayuno es considerado estupendo.\n" + "Sí. Según las reseñas te traen un \"estupendo\" desayuno todas las mañanas.\n" ] } ], @@ -672,7 +772,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.6" + "version": "3.12.8" } }, "nbformat": 4,