CryptoSymbols per recuperare TUTTI i simboli delle criptovalute da Yahoo Finance

This commit is contained in:
2025-10-17 12:28:39 +02:00
parent 38daafce9a
commit 16e0443643
3 changed files with 96 additions and 0 deletions

View File

@@ -14,6 +14,7 @@ dependencies = [
"dotenv", # Gestire variabili d'ambiente (generalmente API keys od opzioni)
"gradio", # UI web semplice con user_input e output
"colorlog", # Log colorati in console
"html5lib", # Parsing HTML & Scraping
# Per costruire agenti (ovvero modelli che possono fare più cose tramite tool) https://github.com/agno-agi/agno
# altamente consigliata dato che ha anche tools integrati per fare scraping, calcoli e molto altro

View File

@@ -0,0 +1,71 @@
import os
import httpx
import asyncio
import logging
import pandas as pd
from io import StringIO
logging.basicConfig(level=logging.INFO)
logging = logging.getLogger("crypto_symbols")
BASE_URL = "https://finance.yahoo.com/markets/crypto/all/"
class CryptoSymbols:
"""
Classe per ottenere i simboli delle criptovalute tramite Yahoo Finance.
"""
def __init__(self, cache_file: str = 'cryptos.csv'):
self.cache_file = cache_file
self.final_table = pd.read_csv(self.cache_file) if os.path.exists(self.cache_file) else pd.DataFrame() # type: ignore
def get_symbols(self) -> list[str]:
return self.final_table['Symbol'].tolist() if not self.final_table.empty else []
async def fetch_crypto_symbols(self, force_refresh: bool = False) -> None:
if not force_refresh and not self.final_table.empty:
return
num_currencies = 250 # It looks like is the max per page otherwise yahoo returns 26
offset = 0
stop = not self.final_table.empty
table = self.final_table.copy()
while not stop:
text = await self.___request(offset, num_currencies)
tables = pd.read_html(text) # type: ignore
df = tables[0]
df.columns = table.columns if not table.empty else df.columns
table = pd.concat([table, df], ignore_index=True)
total_rows = df.shape[0]
offset += total_rows
if total_rows < num_currencies:
stop = True
table.dropna(axis=0, how='all', inplace=True) # type: ignore
table.dropna(axis=1, how='all', inplace=True) # type: ignore
table.to_csv(self.cache_file, index=False)
self.final_table = table
async def ___request(self, offset: int, num_currencies: int) -> StringIO:
while True:
async with httpx.AsyncClient() as client:
resp = await client.get(f"{BASE_URL}?start={offset}&count={num_currencies}", headers={"User-Agent": "Mozilla/5.0"})
if resp.status_code == 429: # Too many requests
secs = int(resp.headers.get("Retry-After", 2))
logging.warning(f"Rate limit exceeded, waiting {secs}s before retrying...")
await asyncio.sleep(secs)
continue
if resp.status_code != 200:
logging.error(f"Error fetching crypto symbols: [{resp.status_code}] {resp.text}")
break
return StringIO(resp.text)
return StringIO("")
if __name__ == "__main__":
crypto_symbols = CryptoSymbols()
asyncio.run(crypto_symbols.fetch_crypto_symbols(force_refresh=True))

24
uv.lock generated
View File

@@ -690,6 +690,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" },
]
[[package]]
name = "html5lib"
version = "1.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "six" },
{ name = "webencodings" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ac/b6/b55c3f49042f1df3dcd422b7f224f939892ee94f22abcf503a9b7339eaf2/html5lib-1.1.tar.gz", hash = "sha256:b2e5b40261e20f354d198eae92afc10d750afb487ed5e50f9c4eaf07c184146f", size = 272215, upload-time = "2020-06-22T23:32:38.834Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6c/dd/a834df6482147d48e225a49515aabc28974ad5a4ca3215c18a882565b028/html5lib-1.1-py2.py3-none-any.whl", hash = "sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d", size = 112173, upload-time = "2020-06-22T23:32:36.781Z" },
]
[[package]]
name = "httpcore"
version = "1.0.9"
@@ -1662,6 +1675,7 @@ dependencies = [
{ name = "gnews" },
{ name = "google-genai" },
{ name = "gradio" },
{ name = "html5lib" },
{ name = "markdown-pdf" },
{ name = "newsapi-python" },
{ name = "ollama" },
@@ -1682,6 +1696,7 @@ requires-dist = [
{ name = "gnews" },
{ name = "google-genai" },
{ name = "gradio" },
{ name = "html5lib" },
{ name = "markdown-pdf" },
{ name = "newsapi-python" },
{ name = "ollama" },
@@ -1714,6 +1729,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/85/cd/584a2ceb5532af99dd09e50919e3615ba99aa127e9850eafe5f31ddfdb9a/uvicorn-0.37.0-py3-none-any.whl", hash = "sha256:913b2b88672343739927ce381ff9e2ad62541f9f8289664fa1d1d3803fa2ce6c", size = 67976, upload-time = "2025-09-23T13:33:45.842Z" },
]
[[package]]
name = "webencodings"
version = "0.5.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/0b/02/ae6ceac1baeda530866a85075641cec12989bd8d31af6d5ab4a3e8c92f47/webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923", size = 9721, upload-time = "2017-04-05T20:21:34.189Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", size = 11774, upload-time = "2017-04-05T20:21:32.581Z" },
]
[[package]]
name = "websocket-client"
version = "1.8.0"