Skip to content
Merged
18 changes: 12 additions & 6 deletions src/DataStax.AstraDB.DataApi/Core/CollectionDefinition.cs
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,14 @@ internal static CollectionDefinition CheckAddDefinitionsFromAttributes<T>(Collec
{
definition.Vector = new VectorOptions();
}
if (vectorAttribute.Dimension != -1)
if (vectorAttribute.Dimension.HasValue && vectorAttribute.Dimension.Value != -1)
{
definition.Vector.Dimension = vectorAttribute.Dimension;
definition.Vector.Dimension = vectorAttribute.Dimension.Value;
}
if (vectorAttribute.Metric.HasValue)
{
definition.Vector.Metric = vectorAttribute.Metric.Value;
}
definition.Vector.Metric = vectorAttribute.Metric;
if (!string.IsNullOrEmpty(vectorAttribute.SourceModel))
{
definition.Vector.SourceModel = vectorAttribute.SourceModel;
Expand All @@ -131,11 +134,14 @@ internal static CollectionDefinition CheckAddDefinitionsFromAttributes<T>(Collec
{
definition.Vector = new VectorOptions();
}
if (vectorizeAttribute.Dimension != -1)
if (vectorizeAttribute.Dimension.HasValue && vectorizeAttribute.Dimension.Value != -1)
{
definition.Vector.Dimension = vectorizeAttribute.Dimension.Value;
}
if (vectorizeAttribute.Metric.HasValue)
{
definition.Vector.Dimension = vectorizeAttribute.Dimension;
definition.Vector.Metric = vectorizeAttribute.Metric.Value;
}
definition.Vector.Metric = vectorizeAttribute.Metric;
if (!string.IsNullOrEmpty(vectorizeAttribute.Provider))
{
definition.Vector.Service = new VectorServiceOptions()
Expand Down
31 changes: 30 additions & 1 deletion src/DataStax.AstraDB.DataApi/Core/CollectionVectorAttribute.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public class CollectionVectorAttribute : Attribute
public int? Dimension { get; set; } = null;

/// <summary>The similarity metric to use for vector comparisons.</summary>
public SimilarityMetric Metric { get; set; } = SimilarityMetric.Cosine;
public SimilarityMetric? Metric { get; set; }

/// <summary>
/// Configures the index with the fastest settings for a given source of embeddings vectors.
Expand All @@ -41,4 +41,33 @@ public class CollectionVectorAttribute : Attribute
/// </summary>
public CollectionVectorAttribute() { }

/// <summary>
/// Initializes a new instance, optionally specifying various settings.
/// </summary>
/// <param name="dimension">Optional. The number of dimensions for the vector.</param>
/// <param name="sourceModel">Optional. The source model for embeddings optimization.</param>
public CollectionVectorAttribute(
int dimension = -1,
string sourceModel = null)
{
Dimension = dimension == -1 ? null : dimension;
SourceModel = sourceModel;
}

/// <summary>
/// Initializes a new instance with the specified metric and optionally other settings.
/// </summary>
/// <param name="metric">The similarity metric to use for vector comparisons.</param>
/// <param name="dimension">Optional. The number of dimensions for the vector.</param>
/// <param name="sourceModel">Optional. The source model for embeddings optimization.</param>
public CollectionVectorAttribute(
SimilarityMetric metric,
int dimension = -1,
string sourceModel = null)
{
Metric = metric;
Dimension = dimension == -1 ? null : dimension;
SourceModel = sourceModel;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,58 @@ namespace DataStax.AstraDB.DataApi.Core;
public class CollectionVectorizeAttribute : BaseVectorizeAttribute
{
/// <summary>The similarity metric to use for vector comparisons.</summary>
public SimilarityMetric Metric { get; set; } = SimilarityMetric.Cosine;
public SimilarityMetric? Metric { get; set; }

/// <summary>
/// Initializes a new instance of <see cref="CollectionVectorizeAttribute"/> with default settings.
/// </summary>
public CollectionVectorizeAttribute() { }

/// <summary>
/// Initializes a new instance with the specified provider and model name and optionally other settings.
/// </summary>
/// <param name="provider">The name of the embedding service provider.</param>
/// <param name="modelName">The model name to use for embedding generation.</param>
/// <param name="dimension">Optional: The number of dimensions for the generated vector.</param>
/// <param name="authenticationPairs">Optional: Key-value pairs for authenticating with the embedding service.</param>
/// <param name="parameterPairs">Optional: Additional key-value parameter pairs for the embedding service.</param>
public CollectionVectorizeAttribute(
string provider,
string modelName,
int dimension = -1,
string[] authenticationPairs = null,
object[] parameterPairs = null)
{
Provider = provider;
ModelName = modelName;
Dimension = dimension == -1 ? null : dimension;
AuthenticationPairs = authenticationPairs ?? Array.Empty<string>();
ParameterPairs = parameterPairs ?? Array.Empty<object>();
}

/// <summary>
/// Initializes a new instance with the specified provider and model name and optionally other settings.
/// </summary>
/// <param name="provider">The name of the embedding service provider.</param>
/// <param name="modelName">The model name to use for embedding generation.</param>
/// <param name="metric">The similarity metric to use for vector comparisons.</param>
/// <param name="dimension">Optional: The number of dimensions for the generated vector.</param>
/// <param name="authenticationPairs">Optional: Key-value pairs for authenticating with the embedding service.</param>
/// <param name="parameterPairs">Optional: Additional key-value parameter pairs for the embedding service.</param>
public CollectionVectorizeAttribute(
string provider,
string modelName,
SimilarityMetric metric,
int dimension = -1,
string[] authenticationPairs = null,
object[] parameterPairs = null)
{
Provider = provider;
ModelName = modelName;
Metric = metric;
Dimension = dimension == -1 ? null : dimension;
AuthenticationPairs = authenticationPairs ?? Array.Empty<string>();
ParameterPairs = parameterPairs ?? Array.Empty<object>();
}

}
2 changes: 1 addition & 1 deletion src/DataStax.AstraDB.DataApi/Core/VectorOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public class VectorOptions
/// The similarity metric to use
/// </summary>
[JsonPropertyName("metric")]
public SimilarityMetric Metric { get; set; }
public SimilarityMetric Metric { get; set; } = SimilarityMetric.Cosine;

/// <summary>
/// Options for the service providing the vectorization
Expand Down
125 changes: 124 additions & 1 deletion test/DataStax.AstraDB.DataApi.IntegrationTests/TestObjects.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using DataStax.AstraDB.DataApi.Core;
using DataStax.AstraDB.DataApi.SerDes;
using DataStax.AstraDB.DataApi.Tables;
using DataStax.AstraDB.DataApi.Collections;
using MongoDB.Bson;
using System.Text.Json.Serialization;

Expand All @@ -23,6 +24,31 @@ public class SimpleObjectWithVectorSearchResult : SimpleObjectWithVector
public double? Similarity { get; set; }
}

[CollectionName("coll_SimpleObjectWithVectorAttributeSD")]
[CollectionVector(
SimilarityMetric.Euclidean,
3
)]
public class SimpleObjectWithVectorAttributeSD
{
[DocumentId]
public string Id { get; set; }
[DocumentMapping(DocumentMappingField.Vector)]
public float[] VectorEmbeddings { get; set; }
}

[CollectionName("coll_SimpleObjectWithVectorAttributeD")]
[CollectionVector(
3
)]
public class SimpleObjectWithVectorAttributeD
{
[DocumentId]
public string Id { get; set; }
[DocumentMapping(DocumentMappingField.Vector)]
public float[] VectorEmbeddings { get; set; }
}

public class SimpleObjectWithVectorize
{
[DocumentId]
Expand All @@ -32,7 +58,12 @@ public class SimpleObjectWithVectorize
public string StringToVectorize => Name;
}

[CollectionVectorize(Provider = "nvidia", ModelName = "NV-Embed-QA", Metric = SimilarityMetric.Cosine)]
[CollectionName("coll_SimpleObjectWithVectorize")]
[CollectionVectorize(
"nvidia",
"nvidia/nv-embedqa-e5-v5",
SimilarityMetric.Cosine
)]
public class SimpleObjectWithVectorizeAttribute
{
[DocumentId]
Expand All @@ -42,6 +73,49 @@ public class SimpleObjectWithVectorizeAttribute
public string StringToVectorize => Name;
}

[CollectionName("coll_SimpleObjectWithVectorizeShSecret")]
[CollectionVectorize(
Provider = "openai", ModelName = "text-embedding-3-small",
AuthenticationPairs = new string[] {"providerKey", "SHARED_SECRET_EMBEDDING_API_KEY_OPENAI"}
)]
public class SimpleObjectWithVectorizeAttributeShSecret
{
[DocumentId]
public int? Id { get; set; }
public string Name { get; set; }
[DocumentMapping(DocumentMappingField.Vectorize)]
public string StringToVectorize => Name;
}

[CollectionName("coll_SimpleObjectWithVectorizeShSecret2A")]
[CollectionVector(
SimilarityMetric.Euclidean,
123,
SourceModel="bert"
)]
[CollectionVectorize(
Provider = "openai", ModelName = "text-embedding-3-small",
AuthenticationPairs = new string[] {"providerKey", "SHARED_SECRET_EMBEDDING_API_KEY_OPENAI"}
)]
public class SimpleObjectWithVectorizeAttributeShSecret2A
{
[DocumentId]
public int? Id { get; set; }
public string Name { get; set; }
[DocumentMapping(DocumentMappingField.Vectorize)]
public string StringToVectorize => Name;
}

[CollectionName("coll_SimpleObjectWithVectorizeHeader")]
[CollectionVectorize(Provider = "openai", ModelName = "text-embedding-3-small")]
public class SimpleObjectWithVectorizeAttributeHeader
{
[DocumentId]
public int? Id { get; set; }
public string Name { get; set; }
[DocumentMapping(DocumentMappingField.Vectorize)]
public string StringToVectorize => Name;
}

[LexicalOptions(
TokenizerName = "standard",
Expand Down Expand Up @@ -184,6 +258,55 @@ public class RowBookVectorize
public float Rating { get; set; }
}

[TableName("bookTestTableVectorizeHeaderBased")]
public class RowBookVectorizeHeaderBased
{
[ColumnPrimaryKey(1)]
public string Title { get; set; }
[ColumnVectorize("openai", "text-embedding-3-small", dimension: 1536)]
public object Author { get; set; }
[ColumnPrimaryKey(2)]
public int NumberOfPages { get; set; }
public DateTime? DueDate { get; set; }
public HashSet<string> Genres { get; set; }
public float Rating { get; set; }
}

[TableName("bookTestTableVectorizeSharedSecret")]
public class RowBookVectorizeSharedSecret
{
[ColumnPrimaryKey(1)]
public string Title { get; set; }
[ColumnVectorize(
"openai", "text-embedding-3-small", dimension: 1536,
authenticationPairs: new string[] {"providerKey", "SHARED_SECRET_EMBEDDING_API_KEY_OPENAI"}
)]
public object Author { get; set; }
[ColumnPrimaryKey(2)]
public int NumberOfPages { get; set; }
public DateTime? DueDate { get; set; }
public HashSet<string> Genres { get; set; }
public float Rating { get; set; }
}

[TableName("bookTestTableVeczeShdSecretWithParams")]
public class RowBookVectorizeSharedSecretWithParameters
{
[ColumnPrimaryKey(1)]
public string Title { get; set; }
[ColumnVectorize(
"voyageAI", "voyage-2",
authenticationPairs: new string[] { "providerKey", "SHARED_SECRET_EMBEDDING_API_KEY_VOYAGEAI" },
parameterPairs: new object[] { "autoTruncate", false }
)]
public object Author { get; set; }
[ColumnPrimaryKey(2)]
public int NumberOfPages { get; set; }
public DateTime? DueDate { get; set; }
public HashSet<string> Genres { get; set; }
public float Rating { get; set; }
}

[TableName("bookTestTableSinglePrimaryKey")]
public class RowBookSinglePrimaryKey
{
Expand Down
Loading
Loading