4 Commits

Author SHA1 Message Date
dce8b45d7f Merge branch 'main' into fix-query-input 2025-11-04 14:40:06 +01:00
cf41c800bb added pipeline sanitization 2025-10-31 16:15:00 +01:00
2d1837ca4a fix team leader fabricated sources 2025-10-31 15:35:25 +01:00
ba8406367c fix query check infinite loop 2025-10-31 15:31:07 +01:00
9 changed files with 59 additions and 158 deletions

View File

@@ -107,7 +107,12 @@ class Pipeline:
def condition_query_ok(step_input: StepInput) -> StepOutput:
val = step_input.previous_step_content
stop = (not val.is_crypto) if isinstance(val, QueryOutputs) else True
return StepOutput(stop=stop)
return StepOutput(stop=stop, content=step_input.input)
def sanitization_output(step_input: StepInput) -> StepOutput:
val = step_input.previous_step_content
content = f"Query: {step_input.input}\n\nRetrieved data: {self.remove_think(str(val))}"
return StepOutput(content=content)
query_check = Step(name=PipelineEvent.QUERY_CHECK, agent=query_check)
info_recovery = Step(name=PipelineEvent.INFO_RECOVERY, team=team)
@@ -118,6 +123,7 @@ class Pipeline:
query_check,
condition_query_ok,
info_recovery,
sanitization_output,
report_generation
])
@@ -150,11 +156,22 @@ class Pipeline:
# Restituisce la risposta finale
if content and isinstance(content, str):
think_str = "</think>"
think = content.rfind(think_str)
yield content[(think + len(think_str)):] if think != -1 else content
yield cls.remove_think(content)
elif content and isinstance(content, QueryOutputs):
yield content.response
else:
logging.error(f"No output from workflow: {content}")
yield "Nessun output dal workflow, qualcosa è andato storto."
@classmethod
def remove_think(cls, text: str) -> str:
"""
Rimuove la sezione di pensiero dal testo.
Args:
text: Il testo da pulire.
Returns:
Il testo senza la sezione di pensiero.
"""
think_str = "</think>"
think = text.rfind(think_str)
return text[(think + len(think_str)):] if think != -1 else text

View File

@@ -13,3 +13,8 @@
- IS_CRYPTO: (empty)
- NOT_CRYPTO: "I can only analyze cryptocurrency topics."
- AMBIGUOUS: "Which cryptocurrency? (e.g., Bitcoin, Ethereum)"
**RULES:**
- DO NOT ANSWER the query.
- DO NOT PROVIDE ADDITIONAL INFORMATION.
- STOP instantly WHEN YOU CLASSIFY the query.

View File

@@ -8,7 +8,7 @@
- NEVER use placeholders ("N/A", "Data not available") - OMIT section instead
- NO example/placeholder data
**INPUT:** Structured report from Team Leader with optional sections:
**INPUT:** You will get the original user query and a structured report with optional sections:
- Overall Summary
- Market & Price Data (opt)
- News & Market Sentiment (opt)

View File

@@ -79,4 +79,6 @@ Timestamp: {{CURRENT_DATE}}
- Never modify MarketAgent prices
- Include all timestamps/sources
- Retry failed tasks (max 3)
- Only report agent data
- Only report agent data
- DO NOT fabricate or add info
- DO NOT add sources if none provided

View File

@@ -19,7 +19,7 @@ Historical: `{Asset, Period: {Start, End}, Data Points, Price Range: {Low, High}
**MANDATORY RULES:**
1. **Include timestamps** for every price data point
2. **Never fabricate** prices or dates - only report tool outputs
3. **Always specify the data source** (which API provided the data)
3. **Specify the data source** if provided, else state "source unavailable"
4. **Report data completeness**: If user asks for 30 days but got 7, state this explicitly
5. **Current date context**: Remind that data is as of {{CURRENT_DATE}}
6. **Token Optimization**: Be extremely concise to save tokens. Provide all necessary data using as few words as possible. Exceed 100 words ONLY if absolutely necessary to include all required data points.

View File

@@ -13,56 +13,43 @@ class ProductInfo(BaseModel):
price: float = 0.0
volume_24h: float = 0.0
currency: str = ""
provider: str = ""
@staticmethod
def aggregate(products: dict[str, list['ProductInfo']], filter_currency: str="USD") -> list['ProductInfo']:
def aggregate(products: dict[str, list['ProductInfo']]) -> list['ProductInfo']:
"""
Aggregates a list of ProductInfo by symbol.
Args:
products (dict[str, list[ProductInfo]]): Map provider -> list of ProductInfo
filter_currency (str): If set, only products with this currency are considered. Defaults to "USD".
Returns:
list[ProductInfo]: List of ProductInfo aggregated by symbol
"""
# Costruzione mappa id -> lista di ProductInfo + lista di provider
id_infos: dict[str, tuple[list[ProductInfo], list[str]]] = {}
for provider, product_list in products.items():
# Costruzione mappa symbol -> lista di ProductInfo
symbols_infos: dict[str, list[ProductInfo]] = {}
for _, product_list in products.items():
for product in product_list:
if filter_currency and product.currency != filter_currency:
continue
id_value = product.id.upper().replace("-", "") # Normalizzazione id per compatibilità (es. BTC-USD -> btcusd)
product_list, provider_list = id_infos.setdefault(id_value, ([], []) )
product_list.append(product)
provider_list.append(provider)
symbols_infos.setdefault(product.symbol, []).append(product)
# Aggregazione per ogni id
# Aggregazione per ogni symbol
aggregated_products: list[ProductInfo] = []
for id_value, (product_list, provider_list) in id_infos.items():
for symbol, product_list in symbols_infos.items():
product = ProductInfo()
product.id = f"{id_value}_AGGREGATED"
product.symbol = next(p.symbol for p in product_list if p.symbol)
product.id = f"{symbol}_AGGREGATED"
product.symbol = symbol
product.currency = next(p.currency for p in product_list if p.currency)
volume_sum = sum(p.volume_24h for p in product_list)
product.volume_24h = volume_sum / len(product_list) if product_list else 0.0
if volume_sum > 0:
# Calcolo del prezzo pesato per volume (VWAP - Volume Weighted Average Price)
prices_weighted = sum(p.price * p.volume_24h for p in product_list if p.volume_24h > 0)
product.price = prices_weighted / volume_sum
else:
# Se non c'è volume, facciamo una media semplice dei prezzi
valid_prices = [p.price for p in product_list if p.price > 0]
product.price = sum(valid_prices) / len(valid_prices) if valid_prices else 0.0
prices = sum(p.price * p.volume_24h for p in product_list)
product.price = (prices / volume_sum) if volume_sum > 0 else 0.0
product.provider = ",".join(provider_list)
aggregated_products.append(product)
return aggregated_products
class Price(BaseModel):
"""
Represents price data for an asset as obtained from market APIs.

View File

@@ -37,7 +37,6 @@ class MarketAPIsTool(MarketWrapper, Toolkit):
self.get_product,
self.get_products,
self.get_historical_prices,
self.get_product_aggregated,
self.get_products_aggregated,
self.get_historical_prices_aggregated,
],
@@ -95,27 +94,6 @@ class MarketAPIsTool(MarketWrapper, Toolkit):
"""
return self.handler.try_call(lambda w: w.get_historical_prices(asset_id, limit))
@friendly_action("🧩 Aggrego le informazioni da più fonti...")
def get_product_aggregated(self, asset_id: str) -> ProductInfo:
"""
Gets product information for a *single* asset from *all available providers* and *aggregates* the results.
This method queries all configured sources (Binance, YFinance, Coinbase, CryptoCompare)
and combines the data using volume-weighted average price (VWAP) to provide
the most accurate and comprehensive price data.
Args:
asset_id (str): The asset ID to retrieve information for (e.g., "BTC", "ETH").
Returns:
ProductInfo: A single ProductInfo object with aggregated data from all providers.
The 'provider' field will list all sources used (e.g., "Binance, YFinance, Coinbase").
Raises:
Exception: If all providers fail to return results.
"""
return self.get_products_aggregated([asset_id])[0]
@friendly_action("🧩 Aggrego le informazioni da più fonti...")
def get_products_aggregated(self, asset_ids: list[str]) -> list[ProductInfo]:
"""

View File

@@ -16,7 +16,7 @@ BASE_URL = "https://finance.yahoo.com/markets/crypto/all/"
class CryptoSymbolsTools(Toolkit):
"""
Class for obtaining cryptocurrency symbols via Yahoo Finance.
Classe per ottenere i simboli delle criptovalute tramite Yahoo Finance.
"""
def __init__(self, cache_file: str = 'resources/cryptos.csv'):
@@ -34,36 +34,29 @@ class CryptoSymbolsTools(Toolkit):
def get_all_symbols(self) -> list[str]:
"""
Returns a complete list of all available cryptocurrency symbols (tickers).
The list could be very long, prefer using 'get_symbols_by_name' for specific searches.
Restituisce tutti i simboli delle criptovalute.
Returns:
list[str]: A comprehensive list of all supported crypto symbols (e.g., "BTC-USD", "ETH-USD").
list[str]: Lista di tutti i simboli delle criptovalute.
"""
return self.final_table['Symbol'].tolist() if not self.final_table.empty else []
def get_symbols_by_name(self, query: str) -> list[tuple[str, str]]:
"""
Searches the cryptocurrency database for assets matching a name or symbol.
Use this to find the exact, correct symbol for a cryptocurrency name.
Cerca i simboli che contengono la query.
Args:
query (str): The name, partial name, or symbol to search for (e.g., "Bitcoin", "ETH").
query (str): Query di ricerca.
Returns:
list[tuple[str, str]]: A list of tuples, where each tuple contains
the (symbol, full_name) of a matching asset.
Returns an empty list if no matches are found.
list[tuple[str, str]]: Lista di tuple (simbolo, nome) che contengono la query.
"""
query_lower = query.lower()
positions = self.final_table['Name'].str.lower().str.contains(query_lower) | \
self.final_table['Symbol'].str.lower().str.contains(query_lower)
filtered_df = self.final_table[positions]
return list(zip(filtered_df['Symbol'], filtered_df['Name']))
positions = self.final_table['Name'].str.lower().str.contains(query_lower)
return self.final_table[positions][['Symbol', 'Name']].apply(tuple, axis=1).tolist()
async def fetch_crypto_symbols(self, force_refresh: bool = False) -> None:
"""
It retrieves all cryptocurrency symbols from Yahoo Finance and caches them.
Recupera tutti i simboli delle criptovalute da Yahoo Finance e li memorizza in cache.
Args:
force_refresh (bool): If True, it forces the retrieval even if the data are already in the cache.
force_refresh (bool): Se True, forza il recupero anche se i dati sono già in cache.
"""
if not force_refresh and not self.final_table.empty:
return

View File

@@ -9,11 +9,11 @@ class TestMarketDataAggregator:
def __product(self, symbol: str, price: float, volume: float, currency: str) -> ProductInfo:
prod = ProductInfo()
prod.id = f"{symbol}-{currency}"
prod.symbol = symbol
prod.price = price
prod.volume_24h = volume
prod.currency = currency
prod.id=f"{symbol}-{currency}"
prod.symbol=symbol
prod.price=price
prod.volume_24h=volume
prod.currency=currency
return prod
def __price(self, timestamp_s: int, high: float, low: float, open: float, close: float, volume: float) -> Price:
@@ -38,16 +38,12 @@ class TestMarketDataAggregator:
info = aggregated[0]
assert info is not None
assert info.id == "BTCUSD_AGGREGATED"
assert info.symbol == "BTC"
assert info.currency == "USD"
assert "Provider1" in info.provider
assert "Provider2" in info.provider
assert "Provider3" in info.provider
avg_weighted_price = (50000.0 * 1000.0 + 50100.0 * 1100.0 + 49900.0 * 900.0) / (1000.0 + 1100.0 + 900.0)
assert info.price == pytest.approx(avg_weighted_price, rel=1e-3) # type: ignore
assert info.volume_24h == pytest.approx(1000.0, rel=1e-3) # type: ignore
assert info.currency == "USD"
def test_aggregate_product_info_multiple_symbols(self):
products = {
@@ -131,80 +127,3 @@ class TestMarketDataAggregator:
assert aggregated[1].timestamp == timestamp_2h_ago
assert aggregated[1].high == pytest.approx(50250.0, rel=1e-3) # type: ignore
assert aggregated[1].low == pytest.approx(49850.0, rel=1e-3) # type: ignore
def test_aggregate_product_info_different_currencies(self):
products = {
"Provider1": [self.__product("BTC", 100000.0, 1000.0, "USD")],
"Provider2": [self.__product("BTC", 70000.0, 800.0, "EUR")],
}
aggregated = ProductInfo.aggregate(products)
assert len(aggregated) == 1
info = aggregated[0]
assert info is not None
assert info.id == "BTCUSD_AGGREGATED"
assert info.symbol == "BTC"
assert info.currency == "USD" # Only USD products are kept
# When currencies differ, only USD is aggregated (only Provider1 in this case)
assert info.price == pytest.approx(100000.0, rel=1e-3) # type: ignore
assert info.volume_24h == pytest.approx(1000.0, rel=1e-3) # type: ignore # Only USD volume
def test_aggregate_product_info_empty_providers(self):
"""Test aggregate_product_info with some providers returning empty lists"""
products: dict[str, list[ProductInfo]] = {
"Provider1": [self.__product("BTC", 50000.0, 1000.0, "USD")],
"Provider2": [],
"Provider3": [self.__product("BTC", 50100.0, 1100.0, "USD")],
}
aggregated = ProductInfo.aggregate(products)
assert len(aggregated) == 1
info = aggregated[0]
assert info.symbol == "BTC"
assert "Provider1" in info.provider
assert "Provider2" not in info.provider
assert "Provider3" in info.provider
def test_aggregate_product_info_mixed_symbols(self):
"""Test that aggregate_product_info correctly separates different symbols"""
products = {
"Provider1": [
self.__product("BTC", 50000.0, 1000.0, "USD"),
self.__product("ETH", 4000.0, 2000.0, "USD"),
self.__product("SOL", 100.0, 500.0, "USD"),
],
"Provider2": [
self.__product("BTC", 50100.0, 1100.0, "USD"),
self.__product("ETH", 4050.0, 2100.0, "USD"),
],
}
aggregated = ProductInfo.aggregate(products)
assert len(aggregated) == 3
symbols = {p.symbol for p in aggregated}
assert symbols == {"BTC", "ETH", "SOL"}
btc = next(p for p in aggregated if p.symbol == "BTC")
assert "Provider1" in btc.provider and "Provider2" in btc.provider
sol = next(p for p in aggregated if p.symbol == "SOL")
assert sol.provider == "Provider1" # Only one provider
def test_aggregate_product_info_zero_volume(self):
"""Test aggregazione quando tutti i prodotti hanno volume zero"""
products = {
"Provider1": [self.__product("BTC", 50000.0, 0.0, "USD")],
"Provider2": [self.__product("BTC", 50100.0, 0.0, "USD")],
"Provider3": [self.__product("BTC", 49900.0, 0.0, "USD")],
}
aggregated = ProductInfo.aggregate(products)
assert len(aggregated) == 1
info = aggregated[0]
# Con volume zero, dovrebbe usare la media semplice dei prezzi
expected_price = (50000.0 + 50100.0 + 49900.0) / 3
assert info.price == pytest.approx(expected_price, rel=1e-3) # type: ignore
assert info.volume_24h == 0.0