SuperlinkedRetriever Examples
This notebook demonstrates how to build a Superlinked App and Query Descriptor and use them with the LangChain SuperlinkedRetriever
.
Install the integration from PyPI:
pip install -U langchain-superlinked superlinked
import superlinked.framework as sl
from langchain_superlinked import SuperlinkedRetriever
from datetime import timedelta
# Define schema
class DocumentSchema(sl.Schema):
id: sl.IdField
content: sl.String
doc_schema = DocumentSchema()
# Space + index
text_space = sl.TextSimilaritySpace(
text=doc_schema.content, model="sentence-transformers/all-MiniLM-L6-v2"
)
doc_index = sl.Index([text_space])
# Query descriptor
query = (
sl.Query(doc_index)
.find(doc_schema)
.similar(text_space.text, sl.Param("query_text"))
.select([doc_schema.content])
.limit(sl.Param("limit"))
)
# Minimal app
source = sl.InMemorySource(schema=doc_schema)
executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index])
app = executor.run()
# Data
source.put([
{"id": "1", "content": "Machine learning algorithms process data efficiently."},
{"id": "2", "content": "Natural language processing understands human language."},
{"id": "3", "content": "Deep learning models require significant compute."},
])
# Retriever
retriever = SuperlinkedRetriever(
sl_client=app, sl_query=query, page_content_field="content"
)
retriever.invoke("artificial intelligence", limit=2)
# Multi-space example (blog posts)
class BlogPostSchema(sl.Schema):
id: sl.IdField
title: sl.String
content: sl.String
category: sl.String
published_date: sl.Timestamp
blog = BlogPostSchema()
content_space = sl.TextSimilaritySpace(text=blog.content, model="sentence-transformers/all-MiniLM-L6-v2")
title_space = sl.TextSimilaritySpace(text=blog.title, model="sentence-transformers/all-MiniLM-L6-v2")
cat_space = sl.CategoricalSimilaritySpace(category_input=blog.category, categories=["technology","science","business"])
recency_space = sl.RecencySpace(
timestamp=blog.published_date,
period_time_list=[sl.PeriodTime(timedelta(days=30)), sl.PeriodTime(timedelta(days=90))],
)
blog_index = sl.Index([content_space, title_space, cat_space, recency_space])
blog_query = (
sl.Query(
blog_index,
weights={
content_space: sl.Param("content_weight"),
title_space: sl.Param("title_weight"),
cat_space: sl.Param("category_weight"),
recency_space: sl.Param("recency_weight"),
},
)
.find(blog)
.similar(content_space.text, sl.Param("query_text"))
.select([blog.title, blog.content, blog.category, blog.published_date])
.limit(sl.Param("limit"))
)
source = sl.InMemorySource(schema=blog)
app = sl.InMemoryExecutor(sources=[source], indices=[blog_index]).run()
from datetime import datetime
source.put([
{"id": "p1", "title": "Intro to ML", "content": "Machine learning 101", "category": "technology", "published_date": int((datetime.now()-timedelta(days=5)).timestamp())},
{"id": "p2", "title": "AI in Healthcare", "content": "Transforming diagnosis", "category": "science", "published_date": int((datetime.now()-timedelta(days=15)).timestamp())},
])
blog_retriever = SuperlinkedRetriever(
sl_client=app,
sl_query=blog_query,
page_content_field="content",
metadata_fields=["title","category","published_date"],
)
blog_retriever.invoke("machine learning", content_weight=1.0, recency_weight=0.5, limit=2)
Related
- Retriever conceptual guide
- Retriever how-to guides