Merge branch 'main' into 38-news-problem

This commit is contained in:
2025-10-27 21:10:02 +01:00
59 changed files with 11204 additions and 1504 deletions

View File

@@ -0,0 +1,22 @@
from datetime import datetime
def unified_timestamp(timestamp_ms: int | None = None, timestamp_s: int | None = None) -> str:
"""
Transform the timestamp from milliseconds or seconds to a unified string format.
The resulting string is a formatted string 'YYYY-MM-DD HH:MM'.
Args:
timestamp_ms: Timestamp in milliseconds.
timestamp_s: Timestamp in seconds.
Raises:
ValueError: If neither timestamp_ms nor timestamp_s is provided.
"""
if timestamp_ms is not None:
timestamp = timestamp_ms // 1000
elif timestamp_s is not None:
timestamp = timestamp_s
else:
raise ValueError("Either timestamp_ms or timestamp_s must be provided")
assert timestamp > 0, "Invalid timestamp data received"
return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M')

View File

@@ -1,6 +1,6 @@
import statistics
from datetime import datetime
from pydantic import BaseModel
from app.api.core import unified_timestamp
class ProductInfo(BaseModel):
@@ -64,24 +64,8 @@ class Price(BaseModel):
"""Timestamp in format YYYY-MM-DD HH:MM"""
def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None:
"""
Sets the timestamp from milliseconds or seconds.
The timestamp is saved as a formatted string 'YYYY-MM-DD HH:MM'.
Args:
timestamp_ms: Timestamp in milliseconds.
timestamp_s: Timestamp in seconds.
Raises:
ValueError: If neither timestamp_ms nor timestamp_s is provided.
"""
if timestamp_ms is not None:
timestamp = timestamp_ms // 1000
elif timestamp_s is not None:
timestamp = timestamp_s
else:
raise ValueError("Either timestamp_ms or timestamp_s must be provided")
assert timestamp > 0, "Invalid timestamp data received"
self.timestamp = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M')
""" Use the unified_timestamp function to set the timestamp."""
self.timestamp = unified_timestamp(timestamp_ms, timestamp_s)
@staticmethod
def aggregate(prices: dict[str, list['Price']]) -> list['Price']:

View File

@@ -1,22 +1,34 @@
from pydantic import BaseModel
from app.api.core import unified_timestamp
MAX_COMMENTS = 5
class SocialPost(BaseModel):
"""
Represents a social media post with time, title, description, and comments.
"""
time: str = ""
timestamp: str = ""
title: str = ""
description: str = ""
comments: list["SocialComment"] = []
def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None:
""" Use the unified_timestamp function to set the time."""
self.timestamp = unified_timestamp(timestamp_ms, timestamp_s)
class SocialComment(BaseModel):
"""
Represents a comment on a social media post.
"""
time: str = ""
timestamp: str = ""
description: str = ""
def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None:
""" Use the unified_timestamp function to set the time."""
self.timestamp = unified_timestamp(timestamp_ms, timestamp_s)
class SocialWrapper:
"""

View File

@@ -57,7 +57,9 @@ class BinanceWrapper(MarketWrapper):
"""
Formatta l'asset_id nel formato richiesto da Binance.
"""
return asset_id.replace('-', '') if '-' in asset_id else f"{asset_id}{self.currency}"
i = asset_id.find('-')
if i != -1: asset_id = asset_id[:i]
return f"{asset_id}{self.currency}" if self.currency not in asset_id else asset_id
def get_product(self, asset_id: str) -> ProductInfo:
symbol = self.__format_symbol(asset_id)

View File

@@ -61,7 +61,9 @@ class CoinBaseWrapper(MarketWrapper):
)
def __format(self, asset_id: str) -> str:
return asset_id if '-' in asset_id else f"{asset_id}-{self.currency}"
i = asset_id.find('-')
if i != -1: asset_id = asset_id[:i]
return f"{asset_id}-{self.currency}"
def get_product(self, asset_id: str) -> ProductInfo:
asset_id = self.__format(asset_id)

View File

@@ -47,8 +47,9 @@ class YFinanceWrapper(MarketWrapper):
Formatta il simbolo per yfinance.
Per crypto, aggiunge '-' e la valuta (es. BTC -> BTC-USD).
"""
asset_id = asset_id.upper()
return f"{asset_id}-{self.currency}" if '-' not in asset_id else asset_id
i = asset_id.find('-')
if i != -1: asset_id = asset_id[:i]
return f"{asset_id}-{self.currency}"
def get_product(self, asset_id: str) -> ProductInfo:
symbol = self._format_symbol(asset_id)

View File

@@ -1,3 +1,5 @@
from app.api.social.reddit import RedditWrapper
from app.api.social.x import XWrapper
from app.api.social.chan import ChanWrapper
__all__ = ["RedditWrapper"]
__all__ = ["RedditWrapper", "XWrapper", "ChanWrapper"]

View File

@@ -0,0 +1,94 @@
import re
import html
import requests
import warnings
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
from datetime import datetime
from app.api.core.social import *
# Ignora i warning di BeautifulSoup quando incontra HTML malformato o un link, mentre si aspetta un HTML completo
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
class ChanWrapper(SocialWrapper):
"""
Wrapper per l'API di 4chan, in particolare per la board /biz/ (Business & Finance)
Fonte API: https://a.4cdn.org/biz/catalog.json
"""
def __init__(self):
super().__init__()
def __time_str(self, timestamp: str) -> int:
"""Converte una stringa da MM/GG/AA(DAY)HH:MM:SS di 4chan a millisecondi"""
time = datetime.strptime(timestamp, "%m/%d/%y(%a)%H:%M:%S")
return int(time.timestamp() * 1000)
def __unformat_html_str(self, html_element: str) -> str:
"""Pulisce il commento rimuovendo HTML e formattazioni inutili"""
if not html_element: return ""
html_entities = html.unescape(html_element)
soup = BeautifulSoup(html_entities, 'html.parser')
html_element = soup.get_text(separator=" ")
html_element = re.sub(r"[\\/]+", "/", html_element)
html_element = re.sub(r"\s+", " ", html_element).strip()
return html_element
def get_top_crypto_posts(self, limit: int = 5) -> list[SocialPost]:
url = 'https://a.4cdn.org/biz/catalog.json'
response = requests.get(url)
assert response.status_code == 200, f"Error in 4chan API request [{response.status_code}] {response.text}"
social_posts: list[SocialPost] = []
# Questa lista contiene un dizionario per ogni pagina della board di questo tipo {"page": page_number, "threads": [{thread_data}]}
for page in response.json():
for thread in page['threads']:
# ci indica se il thread è stato fissato o meno, se non è presente vuol dire che non è stato fissato, i thread sticky possono essere ignorati
if 'sticky' in thread:
continue
# la data di creazione del thread tipo "MM/GG/AA(day)hh:mm:ss", ci interessa solo MM/GG/AA
time = self.__time_str(thread.get('now', ''))
# il nome dell'utente
name: str = thread.get('name', 'Anonymous')
# il nome del thread, può contenere anche elementi di formattazione html che saranno da ignorare, potrebbe non essere presente
title = self.__unformat_html_str(thread.get('sub', ''))
title = f"{name} posted: {title}"
# il commento del thread, può contenere anche elementi di formattazione html che saranno da ignorare
thread_description = self.__unformat_html_str(thread.get('com', ''))
if not thread_description:
continue
# una lista di dizionari conteneti le risposte al thread principale, sono strutturate similarmente al thread
response_list = thread.get('last_replies', [])
comments_list: list[SocialComment] = []
for i, response in enumerate(response_list):
if i >= MAX_COMMENTS: break
# la data di creazione della risposta tipo "MM/GG/AA(day)hh:mm:ss", ci interessa solo MM/GG/AA
time = self.__time_str(response['now'])
# il commento della risposta, può contenere anche elementi di formattazione html che saranno da ignorare
comment = self.__unformat_html_str(response.get('com', ''))
if not comment:
continue
social_comment = SocialComment(description=comment)
social_comment.set_timestamp(timestamp_ms=time)
comments_list.append(social_comment)
social_post: SocialPost = SocialPost(
title=title,
description=thread_description,
comments=comments_list
)
social_post.set_timestamp(timestamp_ms=time)
social_posts.append(social_post)
return social_posts[:limit]

View File

@@ -1,10 +1,9 @@
import os
from praw import Reddit # type: ignore
from praw.models import Submission # type: ignore
from app.api.core.social import SocialWrapper, SocialPost, SocialComment
from app.api.core.social import *
MAX_COMMENTS = 5
# metterne altri se necessario.
# fonti: https://lkiconsulting.io/marketing/best-crypto-subreddits/
SUBREDDITS = [
@@ -24,13 +23,13 @@ SUBREDDITS = [
def extract_post(post: Submission) -> SocialPost:
social = SocialPost()
social.time = str(post.created)
social.set_timestamp(timestamp_s=post.created)
social.title = post.title
social.description = post.selftext
for top_comment in post.comments:
comment = SocialComment()
comment.time = str(top_comment.created)
comment.set_timestamp(timestamp_s=top_comment.created)
comment.description = top_comment.body
social.comments.append(comment)

48
src/app/api/social/x.py Normal file
View File

@@ -0,0 +1,48 @@
import os
import json
import subprocess
from shutil import which
from datetime import datetime
from app.api.core.social import SocialWrapper, SocialPost
# This is the list of users that can be interesting
# To get the ID of a new user is necessary to search it on X, copy the url and insert it in a service like "https://get-id-x.foundtt.com/en/"
X_USERS = [
'watcherguru',
'Cointelegraph',
'BTC_Archive',
'elonmusk'
]
class XWrapper(SocialWrapper):
def __init__(self):
'''
This wrapper uses the rettiwt API to get data from X in order to avoid the rate limits of the free X API,
even if improbable this could lead to a ban so do not use the personal account,
In order to work it is necessary to install the rettiwt cli tool, for more information visit the official documentation at https://www.npmjs.com/package/rettiwt-api
'''
self.api_key = os.getenv("X_API_KEY")
assert self.api_key, "X_API_KEY environment variable not set"
assert which('rettiwt') is not None, "Command `rettiwt` not installed"
def get_top_crypto_posts(self, limit:int = 5) -> list[SocialPost]:
posts: list[SocialPost] = []
for user in X_USERS:
cmd = ['rettiwt', '-k', self.api_key, 'tweet', 'search', str(limit), '-f', str(user)]
process = subprocess.run(cmd, capture_output=True)
results = process.stdout.decode()
json_result = json.loads(results)
for tweet in json_result.get('list', []):
time = datetime.fromisoformat(tweet['createdAt'])
social_post = SocialPost()
social_post.set_timestamp(timestamp_s=int(time.timestamp()))
social_post.title = f"{user} tweeted: "
social_post.description = tweet['fullText']
posts.append(social_post)
return posts

View File

@@ -1,5 +1,6 @@
from app.api.tools.market_tool import MarketAPIsTool
from app.api.tools.social_tool import SocialAPIsTool
from app.api.tools.news_tool import NewsAPIsTool
from app.api.tools.symbols_tool import CryptoSymbolsTools
__all__ = ["MarketAPIsTool", "NewsAPIsTool", "SocialAPIsTool"]
__all__ = ["MarketAPIsTool", "NewsAPIsTool", "SocialAPIsTool", "CryptoSymbolsTools"]

View File

@@ -2,33 +2,29 @@ from agno.tools import Toolkit
from app.api.wrapper_handler import WrapperHandler
from app.api.core.markets import MarketWrapper, Price, ProductInfo
from app.api.markets import BinanceWrapper, CoinBaseWrapper, CryptoCompareWrapper, YFinanceWrapper
from app.configs import AppConfig
class MarketAPIsTool(MarketWrapper, Toolkit):
"""
Class that aggregates multiple market API wrappers and manages them using WrapperHandler.
This class supports retrieving product information and historical prices.
This class can also aggregate data from multiple sources to provide a more comprehensive view of the market.
The following wrappers are included in this order:
- BinanceWrapper
- YFinanceWrapper
- CoinBaseWrapper
- CryptoCompareWrapper
Providers can be configured in configs.yaml under api.market_providers.
"""
def __init__(self, currency: str = "USD"):
def __init__(self):
"""
Initialize the MarketAPIsTool with multiple market API wrappers.
The following wrappers are included in this order:
- BinanceWrapper
- YFinanceWrapper
- CoinBaseWrapper
- CryptoCompareWrapper
Args:
currency (str): Valuta in cui restituire i prezzi. Default è "USD".
Initialize the MarketAPIsTool with market API wrappers configured in configs.yaml.
The order of wrappers is determined by the api.market_providers list in the configuration.
"""
kwargs = {"currency": currency or "USD"}
wrappers: list[type[MarketWrapper]] = [BinanceWrapper, YFinanceWrapper, CoinBaseWrapper, CryptoCompareWrapper]
self.handler = WrapperHandler.build_wrappers(wrappers, kwargs=kwargs)
config = AppConfig()
self.handler = WrapperHandler.build_wrappers(
constructors=[BinanceWrapper, YFinanceWrapper, CoinBaseWrapper, CryptoCompareWrapper],
filters=config.api.market_providers,
try_per_wrapper=config.api.retry_attempts,
retry_delay=config.api.retry_delay_seconds
)
Toolkit.__init__( # type: ignore
self,

View File

@@ -2,15 +2,13 @@ from agno.tools import Toolkit
from app.api.wrapper_handler import WrapperHandler
from app.api.core.news import NewsWrapper, Article
from app.api.news import NewsApiWrapper, GoogleNewsWrapper, CryptoPanicWrapper, DuckDuckGoWrapper
from app.configs import AppConfig
class NewsAPIsTool(NewsWrapper, Toolkit):
"""
Aggregates multiple news API wrappers and manages them using WrapperHandler.
This class supports retrieving top headlines and latest news articles by querying multiple sources:
- GoogleNewsWrapper
- DuckDuckGoWrapper
- NewsApiWrapper
- CryptoPanicWrapper
This class supports retrieving top headlines and latest news articles by querying multiple sources.
Providers can be configured in configs.yaml under api.news_providers.
By default, it returns results from the first successful wrapper.
Optionally, it can be configured to collect articles from all wrappers.
@@ -19,16 +17,17 @@ class NewsAPIsTool(NewsWrapper, Toolkit):
def __init__(self):
"""
Initialize the NewsAPIsTool with multiple news API wrappers.
The tool uses WrapperHandler to manage and invoke the different news API wrappers.
The following wrappers are included in this order:
- GoogleNewsWrapper.
- DuckDuckGoWrapper.
- NewsApiWrapper.
- CryptoPanicWrapper.
Initialize the NewsAPIsTool with news API wrappers configured in configs.yaml.
The order of wrappers is determined by the api.news_providers list in the configuration.
"""
wrappers: list[type[NewsWrapper]] = [GoogleNewsWrapper, DuckDuckGoWrapper, NewsApiWrapper, CryptoPanicWrapper]
self.handler = WrapperHandler.build_wrappers(wrappers)
config = AppConfig()
self.handler = WrapperHandler.build_wrappers(
constructors=[NewsApiWrapper, GoogleNewsWrapper, CryptoPanicWrapper, DuckDuckGoWrapper],
filters=config.api.news_providers,
try_per_wrapper=config.api.retry_attempts,
retry_delay=config.api.retry_delay_seconds
)
Toolkit.__init__( # type: ignore
self,

View File

@@ -1,14 +1,15 @@
from agno.tools import Toolkit
from app.api.wrapper_handler import WrapperHandler
from app.api.core.social import SocialPost, SocialWrapper
from app.api.social import RedditWrapper
from app.api.social import *
from app.configs import AppConfig
class SocialAPIsTool(SocialWrapper, Toolkit):
"""
Aggregates multiple social media API wrappers and manages them using WrapperHandler.
This class supports retrieving top crypto-related posts by querying multiple sources:
- RedditWrapper
This class supports retrieving top crypto-related posts by querying multiple sources.
Providers can be configured in configs.yaml under api.social_providers.
By default, it returns results from the first successful wrapper.
Optionally, it can be configured to collect posts from all wrappers.
@@ -17,14 +18,17 @@ class SocialAPIsTool(SocialWrapper, Toolkit):
def __init__(self):
"""
Initialize the SocialAPIsTool with multiple social media API wrappers.
The tool uses WrapperHandler to manage and invoke the different social media API wrappers.
The following wrappers are included in this order:
- RedditWrapper.
Initialize the SocialAPIsTool with social media API wrappers configured in configs.yaml.
The order of wrappers is determined by the api.social_providers list in the configuration.
"""
config = AppConfig()
wrappers: list[type[SocialWrapper]] = [RedditWrapper]
self.handler = WrapperHandler.build_wrappers(wrappers)
self.handler = WrapperHandler.build_wrappers(
constructors=[RedditWrapper, XWrapper, ChanWrapper],
filters=config.api.social_providers,
try_per_wrapper=config.api.retry_attempts,
retry_delay=config.api.retry_delay_seconds
)
Toolkit.__init__( # type: ignore
self,

View File

@@ -0,0 +1,103 @@
import os
import httpx
import asyncio
import logging
import pandas as pd
from io import StringIO
from agno.tools.toolkit import Toolkit
logging.basicConfig(level=logging.INFO)
logging = logging.getLogger("crypto_symbols")
BASE_URL = "https://finance.yahoo.com/markets/crypto/all/"
class CryptoSymbolsTools(Toolkit):
"""
Classe per ottenere i simboli delle criptovalute tramite Yahoo Finance.
"""
def __init__(self, cache_file: str = 'resources/cryptos.csv'):
self.cache_file = cache_file
self.final_table = pd.read_csv(self.cache_file) if os.path.exists(self.cache_file) else pd.DataFrame() # type: ignore
Toolkit.__init__(self, # type: ignore
name="Crypto Symbols Tool",
instructions="Tool to get cryptocurrency symbols and search them by name.",
tools=[
self.get_all_symbols,
self.get_symbols_by_name,
],
)
def get_all_symbols(self) -> list[str]:
"""
Restituisce tutti i simboli delle criptovalute.
Returns:
list[str]: Lista di tutti i simboli delle criptovalute.
"""
return self.final_table['Symbol'].tolist() if not self.final_table.empty else []
def get_symbols_by_name(self, query: str) -> list[tuple[str, str]]:
"""
Cerca i simboli che contengono la query.
Args:
query (str): Query di ricerca.
Returns:
list[tuple[str, str]]: Lista di tuple (simbolo, nome) che contengono la query.
"""
query_lower = query.lower()
positions = self.final_table['Name'].str.lower().str.contains(query_lower)
return self.final_table[positions][['Symbol', 'Name']].apply(tuple, axis=1).tolist()
async def fetch_crypto_symbols(self, force_refresh: bool = False) -> None:
"""
Recupera tutti i simboli delle criptovalute da Yahoo Finance e li memorizza in cache.
Args:
force_refresh (bool): Se True, forza il recupero anche se i dati sono già in cache.
"""
if not force_refresh and not self.final_table.empty:
return
num_currencies = 250 # It looks like this is the max per page otherwise yahoo returns 26
offset = 0
stop = not self.final_table.empty
table = self.final_table.copy()
while not stop:
text = await self.___request(offset, num_currencies)
tables = pd.read_html(text) # type: ignore
df = tables[0]
df.columns = table.columns if not table.empty else df.columns
table = pd.concat([table, df], ignore_index=True)
total_rows = df.shape[0]
offset += total_rows
if total_rows < num_currencies:
stop = True
table.dropna(axis=0, how='all', inplace=True) # type: ignore
table.dropna(axis=1, how='all', inplace=True) # type: ignore
table.to_csv(self.cache_file, index=False)
self.final_table = table
async def ___request(self, offset: int, num_currencies: int) -> StringIO:
while True:
async with httpx.AsyncClient() as client:
resp = await client.get(f"{BASE_URL}?start={offset}&count={num_currencies}", headers={"User-Agent": "Mozilla/5.0"})
if resp.status_code == 429: # Too many requests
secs = int(resp.headers.get("Retry-After", 2))
logging.warning(f"Rate limit exceeded, waiting {secs}s before retrying...")
await asyncio.sleep(secs)
continue
if resp.status_code != 200:
logging.error(f"Error fetching crypto symbols: [{resp.status_code}] {resp.text}")
break
return StringIO(resp.text)
return StringIO("")
if __name__ == "__main__":
crypto_symbols = CryptoSymbolsTools()
asyncio.run(crypto_symbols.fetch_crypto_symbols(force_refresh=True))

View File

@@ -87,7 +87,7 @@ class WrapperHandler(Generic[WrapperType]):
Exception: If all wrappers fail after retries.
"""
logging.info(f"{inspect.getsource(func).strip()} {inspect.getclosurevars(func).nonlocals}")
logging.debug(f"{inspect.getsource(func).strip()} {inspect.getclosurevars(func).nonlocals}")
results: dict[str, OutputType] = {}
starting_index = self.index
@@ -97,12 +97,12 @@ class WrapperHandler(Generic[WrapperType]):
wrapper_name = wrapper.__class__.__name__
if not try_all:
logging.info(f"try_call {wrapper_name}")
logging.debug(f"try_call {wrapper_name}")
for try_count in range(1, self.retry_per_wrapper + 1):
try:
result = func(wrapper)
logging.info(f"{wrapper_name} succeeded")
logging.debug(f"{wrapper_name} succeeded")
results[wrapper_name] = result
break
@@ -131,13 +131,19 @@ class WrapperHandler(Generic[WrapperType]):
return f"{e} [\"{last_frame.filename}\", line {last_frame.lineno}]"
@staticmethod
def build_wrappers(constructors: list[type[WrapperClassType]], try_per_wrapper: int = 3, retry_delay: int = 2, kwargs: dict[str, Any] | None = None) -> 'WrapperHandler[WrapperClassType]':
def build_wrappers(
constructors: list[type[WrapperClassType]],
filters: list[str] | None = None,
try_per_wrapper: int = 3,
retry_delay: int = 2,
kwargs: dict[str, Any] | None = None) -> 'WrapperHandler[WrapperClassType]':
"""
Builds a WrapperHandler instance with the given wrapper constructors.
It attempts to initialize each wrapper and logs a warning if any cannot be initialized.
Only successfully initialized wrappers are included in the handler.
Args:
constructors (list[type[W]]): An iterable of wrapper classes to instantiate. e.g. [WrapperA, WrapperB]
filters (list[str] | None): Optional list of provider names to filter the constructors.
try_per_wrapper (int): Number of retries per wrapper before switching to the next.
retry_delay (int): Delay in seconds between retries.
kwargs (dict | None): Optional dictionary with keyword arguments common to all wrappers.
@@ -148,6 +154,10 @@ class WrapperHandler(Generic[WrapperType]):
"""
assert WrapperHandler.__check(constructors), f"All constructors must be classes. Received: {constructors}"
# Order of wrappers is now determined by the order in filters
if filters:
constructors = [c for name in filters for c in constructors if c.__name__ == name]
result: list[WrapperClassType] = []
for wrapper_class in constructors:
try:
@@ -156,4 +166,4 @@ class WrapperHandler(Generic[WrapperType]):
except Exception as e:
logging.warning(f"'{wrapper_class.__name__}' cannot be initialized: {e}")
return WrapperHandler(result, try_per_wrapper, retry_delay)
return WrapperHandler(result, try_per_wrapper, retry_delay)