diff --git a/pyproject.toml b/pyproject.toml index 127d77a..e7b2209 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "dotenv", # Gestire variabili d'ambiente (generalmente API keys od opzioni) "gradio", # UI web semplice con user_input e output "colorlog", # Log colorati in console + "html5lib", # Parsing HTML & Scraping # Per costruire agenti (ovvero modelli che possono fare più cose tramite tool) https://github.com/agno-agi/agno # altamente consigliata dato che ha anche tools integrati per fare scraping, calcoli e molto altro diff --git a/src/app/api/crypto_symbols.py b/src/app/api/crypto_symbols.py new file mode 100644 index 0000000..f97dcc6 --- /dev/null +++ b/src/app/api/crypto_symbols.py @@ -0,0 +1,71 @@ +import os +import httpx +import asyncio +import logging +import pandas as pd +from io import StringIO + +logging.basicConfig(level=logging.INFO) +logging = logging.getLogger("crypto_symbols") + + +BASE_URL = "https://finance.yahoo.com/markets/crypto/all/" + +class CryptoSymbols: + """ + Classe per ottenere i simboli delle criptovalute tramite Yahoo Finance. + """ + + def __init__(self, cache_file: str = 'cryptos.csv'): + self.cache_file = cache_file + self.final_table = pd.read_csv(self.cache_file) if os.path.exists(self.cache_file) else pd.DataFrame() # type: ignore + + def get_symbols(self) -> list[str]: + return self.final_table['Symbol'].tolist() if not self.final_table.empty else [] + + async def fetch_crypto_symbols(self, force_refresh: bool = False) -> None: + if not force_refresh and not self.final_table.empty: + return + + num_currencies = 250 # It looks like is the max per page otherwise yahoo returns 26 + offset = 0 + stop = not self.final_table.empty + table = self.final_table.copy() + + while not stop: + text = await self.___request(offset, num_currencies) + tables = pd.read_html(text) # type: ignore + df = tables[0] + df.columns = table.columns if not table.empty else df.columns + table = pd.concat([table, df], ignore_index=True) + + total_rows = df.shape[0] + offset += total_rows + if total_rows < num_currencies: + stop = True + + table.dropna(axis=0, how='all', inplace=True) # type: ignore + table.dropna(axis=1, how='all', inplace=True) # type: ignore + table.to_csv(self.cache_file, index=False) + self.final_table = table + + async def ___request(self, offset: int, num_currencies: int) -> StringIO: + while True: + async with httpx.AsyncClient() as client: + resp = await client.get(f"{BASE_URL}?start={offset}&count={num_currencies}", headers={"User-Agent": "Mozilla/5.0"}) + if resp.status_code == 429: # Too many requests + secs = int(resp.headers.get("Retry-After", 2)) + logging.warning(f"Rate limit exceeded, waiting {secs}s before retrying...") + await asyncio.sleep(secs) + continue + if resp.status_code != 200: + logging.error(f"Error fetching crypto symbols: [{resp.status_code}] {resp.text}") + break + return StringIO(resp.text) + return StringIO("") + + + +if __name__ == "__main__": + crypto_symbols = CryptoSymbols() + asyncio.run(crypto_symbols.fetch_crypto_symbols(force_refresh=True)) diff --git a/uv.lock b/uv.lock index 000517c..04c28e5 100644 --- a/uv.lock +++ b/uv.lock @@ -690,6 +690,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, ] +[[package]] +name = "html5lib" +version = "1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, + { name = "webencodings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ac/b6/b55c3f49042f1df3dcd422b7f224f939892ee94f22abcf503a9b7339eaf2/html5lib-1.1.tar.gz", hash = "sha256:b2e5b40261e20f354d198eae92afc10d750afb487ed5e50f9c4eaf07c184146f", size = 272215, upload-time = "2020-06-22T23:32:38.834Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6c/dd/a834df6482147d48e225a49515aabc28974ad5a4ca3215c18a882565b028/html5lib-1.1-py2.py3-none-any.whl", hash = "sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d", size = 112173, upload-time = "2020-06-22T23:32:36.781Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -1662,6 +1675,7 @@ dependencies = [ { name = "gnews" }, { name = "google-genai" }, { name = "gradio" }, + { name = "html5lib" }, { name = "markdown-pdf" }, { name = "newsapi-python" }, { name = "ollama" }, @@ -1682,6 +1696,7 @@ requires-dist = [ { name = "gnews" }, { name = "google-genai" }, { name = "gradio" }, + { name = "html5lib" }, { name = "markdown-pdf" }, { name = "newsapi-python" }, { name = "ollama" }, @@ -1714,6 +1729,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/85/cd/584a2ceb5532af99dd09e50919e3615ba99aa127e9850eafe5f31ddfdb9a/uvicorn-0.37.0-py3-none-any.whl", hash = "sha256:913b2b88672343739927ce381ff9e2ad62541f9f8289664fa1d1d3803fa2ce6c", size = 67976, upload-time = "2025-09-23T13:33:45.842Z" }, ] +[[package]] +name = "webencodings" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/02/ae6ceac1baeda530866a85075641cec12989bd8d31af6d5ab4a3e8c92f47/webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923", size = 9721, upload-time = "2017-04-05T20:21:34.189Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", size = 11774, upload-time = "2017-04-05T20:21:32.581Z" }, +] + [[package]] name = "websocket-client" version = "1.8.0"