DataStax Astra DB's vector database capabilities can be leveraged to build an efficient fashion catalog similarity search, enabling users to find visually similar clothing items based on uploaded catalog descriptions.
Product descriptions and other relevant text associated with the fashion items (e.g., tags, categories) can be converted into vector embeddings using a text embedding model (like SentenceTransformers).
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider
# Read environment variables
secure_connect_bundle_path = os.getenv('ASTRA_DB_SECURE_CONNECT_BUNDLE')
application_token = os.getenv('ASTRA_DB_TOKEN')
# Setup authentication provider
auth_provider = PlainTextAuthProvider('token', application_token)
# Connect to the Cassandra database using the secure connect bundle
cluster = Cluster(
cloud={"secure_connect_bundle": secure_connect_bundle_path},
auth_provider=auth_provider
)
session = cluster.connect()
# Define keyspace
keyspace = "catalog"
v_dimension = 5
# Set the keyspace
session.set_keyspace(keyspace)
# Verify connection by querying the system.local table
rows = session.execute("SELECT release_version FROM system.local")
for row in rows:
print(f"Connected to Cassandra, release version: {row.release_version}")
# Print the current keyspace
current_keyspace = session.execute("SELECT keyspace_name FROM system_schema.keyspaces WHERE keyspace_name = %s", [keyspace])
for row in current_keyspace:
print(f"Connected to keyspace: {row.keyspace_name}")
print("Connected to AstraDB and keyspace successfully!")
session.execute((
"CREATE TABLE IF NOT EXISTS {keyspace}.ProductDescVectors (ProductId INT PRIMARY KEY, ProductDesc TEXT, ImageURL text, CatalogVector VECTOR<FLOAT,{v_dimension}>);"
).format(keyspace=keyspace, v_dimension=v_dimension))
session.execute((
"CREATE CUSTOM INDEX IF NOT EXISTS idx_ProductDescVectors "
"ON {keyspace}.ProductDescVectors "
"(CatalogVector) USING 'StorageAttachedIndex' WITH OPTIONS = "
"{{'similarity_function' : 'cosine'}};"
).format(keyspace=keyspace))
text_blocks = [
(1, "United colors of Benetton Men White Boxer Trunks","UndercolorsofBenetton-Men-White-Boxer_b4ef04538840c0020e4829ecc042ead1_images.jpg", [-0.0711570307612419, 0.0490173473954201, -0.0348679609596729, -0.0208837632089853, 0.0250527486205101]
),
(2, "Turtle Men Check Red Shirt","Turtle-Men-Check-Red-Shirt_4982b2b1a76a85a85c9adc8b4b2d523a_images.jpg" ,[-0.0678209140896797, 0.0918413251638412, 0.0087888557463884, -0.0005505480221473, 0.0586152337491512]),
(3, "United Colors of Benetton Men White Check Shirt","United-Colors-of-Benetton-Men-White-Check-Shirt_13cfaff26872c298112a8e7da15c1e1d_images.jpg" ,[-0.0697127357125282, 0.0486216545104980, -0.0169006455689669, -0.0160229168832302, 0.0137890130281448]
),
(4, "United Colors of Benetton Men Check White Shirts","UnitedColorsofBenetton-Men-Check-White-Shirts_5bd8cae4fc61052a6f00cfcd69c4a936_images.jpg" ,[-0.0499644242227077, 0.0566278323531151, -0.0294290613383055, -0.0070271748118103, 0.0289674568921328]
),
(5, "Wrangler-Men-Broad-Blue-Shirt","Wrangler-Men-Broad-Blue-Shirt_8211520250143786-1.jpg" ,[-0.0581886917352676, 0.0378338471055031, 0.0425588376820087, -0.0423909239470959, 0.0186673272401094]
)
]
for block in text_blocks:
id, text, text,vector = block
session.execute(
f"INSERT INTO {keyspace}.ProductDescVectors(ProductId, ProductDesc, ImageURL,CatalogVector) VALUES (%s, %s,%s, %s)",
(id, text, text,vector)
)
/*
Performing Catalog Similarity Search:
User Enters Text Description: If a user provides a text description, generate its vector embedding using the appropriate text embedding model like llama_index.embeddings.huggingface
Vector Search in Astra DB: Use Astra DB's vector search functionality to find the most similar embeddings in your database to the user's query embedding.
*/
ann_query = (
f"SELECT ProductDesc, ImageURL,similarity_cosine(CatalogVector, [0.15, 0.1, 0.1, 0.35, 0.55]) as similarity FROM {keyspace}.ProductDescVectors "
"ORDER BY ProductImageVector ANN OF [0.15, 0.1, 0.1, 0.35, 0.55] LIMIT 2"
)
for row in session.execute(ann_query):
print(f"[{row.productdesc}\" (sim: {row.similarity:.4f})")
# Print success message
print("Catalogs with semantic match.")
/*
Example scenario:
A user wants more fashion products similar to "United Colors of Benetton Men Check White Shirts".
The application:
Generates a vector embedding of the "United Colors of Benetton Men Check White Shirts" text.
Performs a vector search in Astra DB to find fashion items with similar catalog(text) embeddings.
Retrieves and displays the details of the most similar catalog, including similar brand,styles, colors, and potentially complementary items like shoes or accessories.
Retrieve Matching Items: Astra DB returns the IDs of the fashion items corresponding to the most similar embeddings.
Display Results: Retrieve the full details (images, descriptions, etc.) of the matching fashion items from Astra DB and display them to the user.
*/
ann_query_matching = (
f"SELECT ProductDesc, ImageURL,similarity_cosine(CatalogVector, [-0.0499644242227077, 0.0566278323531151, -0.0294290613383055, -0.0070271748118103, 0.0289674568921328]) as similarity FROM {keyspace}.ProductDescVectors "
"ORDER BY CatalogVector ANN OF [-0.0499644242227077, 0.0566278323531151, -0.0294290613383055, -0.0070271748118103, 0.0289674568921328] LIMIT 2"
)
for row in session.execute(ann_query_matching):
print(f"[{row.productdesc}\" (sim: {row.similarity:.4f})")
print("Data with similar match.")
Benefits of using Astra DB for fashion similarity search:
Scalability and Performance: Astra DB is designed for high-throughput, low-latency operations, making it suitable for large fashion catalogs and real-time search.
Simplified Architecture: Astra DB can store both your product data and vector embeddings, eliminating the need for separate vector databases.
Cost-Effectiveness: Astra DB offers serverless options, reducing the need for infrastructure management and associated costs.
Integration with GenAI Applications: Astra DB works seamlessly with tools like LangChain for building generative AI applications, potentially enabling features like fashion recommendations, chatbots, and more.