From 06c660b6598c630d0782252311852fe874673c15 Mon Sep 17 00:00:00 2001 From: Nunzi99 <115243475+Nunzi99@users.noreply.github.com> Date: Mon, 20 Oct 2025 16:56:11 +0200 Subject: [PATCH] 14 socials integration (#34) * Create XWrapper.py & ChanWrapper.py * Tests for in XWrapper & ChanWrapper * MAX_COMMENTS in social.py * Soddisfatto Giacomo * unified_timestamp --- .env.example | 9 ++ Dockerfile | 7 +- src/app/api/core/__init__.py | 22 +++++ src/app/api/core/markets.py | 22 +---- src/app/api/core/social.py | 12 +++ src/app/api/social/__init__.py | 4 +- src/app/api/social/chan.py | 89 +++++++++++++++++++ src/app/api/social/reddit.py | 7 +- src/app/api/social/x.py | 46 ++++++++++ src/app/api/tools/social_tool.py | 4 +- tests/api/test_social_4chan.py | 22 +++++ .../{test_reddit.py => test_social_reddit.py} | 3 + tests/api/test_social_x_api.py | 22 +++++ 13 files changed, 242 insertions(+), 27 deletions(-) create mode 100644 src/app/api/social/chan.py create mode 100644 src/app/api/social/x.py create mode 100644 tests/api/test_social_4chan.py rename tests/api/{test_reddit.py => test_social_reddit.py} (92%) create mode 100644 tests/api/test_social_x_api.py diff --git a/.env.example b/.env.example index ce6f756..694300e 100644 --- a/.env.example +++ b/.env.example @@ -42,6 +42,15 @@ CRYPTOPANIC_API_KEY= REDDIT_API_CLIENT_ID= REDDIT_API_CLIENT_SECRET= +# Per ottenere questa API è necessario seguire i seguenti passaggi: +# - Installare l'estensione su chrome X Auth Helper +# - Dargli il permesso di girare in incognito +# - Andare in incognito ed entrare sul proprio account X +# - Aprire l'estensione e fare "get key" +# - Chiudere chrome +# Dovrebbe funzionare per 5 anni o finchè non si si fa il log out, in ogni caso si può ricreare +X_API_KEY= + ############################################################################### # Configurazioni per API di messaggistica diff --git a/Dockerfile b/Dockerfile index 8c7489d..3a354bb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,11 @@ # Utilizziamo Debian slim invece di Alpine per migliore compatibilità FROM debian:bookworm-slim -RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/* + +# Installiamo le dipendenze di sistema +RUN apt-get update && \ + apt-get install -y curl npm && \ + rm -rf /var/lib/apt/lists/* +RUN npm install -g rettiwt-api # Installiamo uv RUN curl -LsSf https://astral.sh/uv/install.sh | sh diff --git a/src/app/api/core/__init__.py b/src/app/api/core/__init__.py index e69de29..3cddea7 100644 --- a/src/app/api/core/__init__.py +++ b/src/app/api/core/__init__.py @@ -0,0 +1,22 @@ +from datetime import datetime + + +def unified_timestamp(timestamp_ms: int | None = None, timestamp_s: int | None = None) -> str: + """ + Transform the timestamp from milliseconds or seconds to a unified string format. + The resulting string is a formatted string 'YYYY-MM-DD HH:MM'. + Args: + timestamp_ms: Timestamp in milliseconds. + timestamp_s: Timestamp in seconds. + Raises: + ValueError: If neither timestamp_ms nor timestamp_s is provided. + """ + if timestamp_ms is not None: + timestamp = timestamp_ms // 1000 + elif timestamp_s is not None: + timestamp = timestamp_s + else: + raise ValueError("Either timestamp_ms or timestamp_s must be provided") + assert timestamp > 0, "Invalid timestamp data received" + + return datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M') \ No newline at end of file diff --git a/src/app/api/core/markets.py b/src/app/api/core/markets.py index 8b6c754..6b53f61 100644 --- a/src/app/api/core/markets.py +++ b/src/app/api/core/markets.py @@ -1,6 +1,6 @@ import statistics -from datetime import datetime from pydantic import BaseModel +from app.api.core import unified_timestamp class ProductInfo(BaseModel): @@ -64,24 +64,8 @@ class Price(BaseModel): """Timestamp in format YYYY-MM-DD HH:MM""" def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None: - """ - Sets the timestamp from milliseconds or seconds. - The timestamp is saved as a formatted string 'YYYY-MM-DD HH:MM'. - Args: - timestamp_ms: Timestamp in milliseconds. - timestamp_s: Timestamp in seconds. - Raises: - ValueError: If neither timestamp_ms nor timestamp_s is provided. - """ - if timestamp_ms is not None: - timestamp = timestamp_ms // 1000 - elif timestamp_s is not None: - timestamp = timestamp_s - else: - raise ValueError("Either timestamp_ms or timestamp_s must be provided") - assert timestamp > 0, "Invalid timestamp data received" - - self.timestamp = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M') + """ Use the unified_timestamp function to set the timestamp.""" + self.timestamp = unified_timestamp(timestamp_ms, timestamp_s) @staticmethod def aggregate(prices: dict[str, list['Price']]) -> list['Price']: diff --git a/src/app/api/core/social.py b/src/app/api/core/social.py index 721ac0c..fe4d5bf 100644 --- a/src/app/api/core/social.py +++ b/src/app/api/core/social.py @@ -1,6 +1,10 @@ from pydantic import BaseModel +from app.api.core import unified_timestamp + +MAX_COMMENTS = 5 + class SocialPost(BaseModel): """ Represents a social media post with time, title, description, and comments. @@ -10,6 +14,10 @@ class SocialPost(BaseModel): description: str = "" comments: list["SocialComment"] = [] + def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None: + """ Use the unified_timestamp function to set the time.""" + self.time = unified_timestamp(timestamp_ms, timestamp_s) + class SocialComment(BaseModel): """ Represents a comment on a social media post. @@ -17,6 +25,10 @@ class SocialComment(BaseModel): time: str = "" description: str = "" + def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None: + """ Use the unified_timestamp function to set the time.""" + self.time = unified_timestamp(timestamp_ms, timestamp_s) + class SocialWrapper: """ diff --git a/src/app/api/social/__init__.py b/src/app/api/social/__init__.py index f50ca7c..13f6f62 100644 --- a/src/app/api/social/__init__.py +++ b/src/app/api/social/__init__.py @@ -1,3 +1,5 @@ from app.api.social.reddit import RedditWrapper +from app.api.social.x import XWrapper +from app.api.social.chan import ChanWrapper -__all__ = ["RedditWrapper"] +__all__ = ["RedditWrapper", "XWrapper", "ChanWrapper"] diff --git a/src/app/api/social/chan.py b/src/app/api/social/chan.py new file mode 100644 index 0000000..a39e517 --- /dev/null +++ b/src/app/api/social/chan.py @@ -0,0 +1,89 @@ +''' +Usiamo le API di 4chan per ottenere un catalogo di threads dalla board /biz/ +''' +import re +import html +import requests +from bs4 import BeautifulSoup +from datetime import datetime +from app.api.core.social import * + + +class ChanWrapper(SocialWrapper): + def __init__(self): + super().__init__() + + def __time_str(self, timestamp: str) -> int: + """Converte una stringa da MM/GG/AA(DAY)HH:MM:SS di 4chan a millisecondi""" + time = datetime.strptime(timestamp, "%m/%d/%y(%a)%H:%M:%S") + return int(time.timestamp() * 1000) + + def __unformat_html_str(self, html_element: str) -> str: + """Pulisce il commento rimuovendo HTML e formattazioni inutili""" + if not html_element: return "" + + html_entities = html.unescape(html_element) + soup = BeautifulSoup(html_entities, 'html.parser') + html_element = soup.get_text(separator=" ") + html_element = re.sub(r"[\\/]+", "/", html_element) + html_element = re.sub(r"\s+", " ", html_element).strip() + return html_element + + def get_top_crypto_posts(self, limit: int = 5) -> list[SocialPost]: + url = 'https://a.4cdn.org/biz/catalog.json' + response = requests.get(url) + assert response.status_code == 200, f"Error in 4chan API request [{response.status_code}] {response.text}" + + social_posts: list[SocialPost] = [] + + # Questa lista contiene un dizionario per ogni pagina della board di questo tipo {"page": page_number, "threads": [{thread_data}]} + for page in response.json(): + for thread in page['threads']: + + # ci indica se il thread è stato fissato o meno, se non è presente vuol dire che non è stato fissato, i thread sticky possono essere ignorati + if 'sticky' in thread: + continue + + # la data di creazione del thread tipo "MM/GG/AA(day)hh:mm:ss", ci interessa solo MM/GG/AA + time = self.__time_str(thread.get('now', '')) + + # il nome dell'utente + name: str = thread.get('name', 'Anonymous') + + # il nome del thread, può contenere anche elementi di formattazione html che saranno da ignorare, potrebbe non essere presente + title = self.__unformat_html_str(thread.get('sub', '')) + title = f"{name} posted: {title}" + + # il commento del thread, può contenere anche elementi di formattazione html che saranno da ignorare + thread_description = self.__unformat_html_str(thread.get('com', '')) + if not thread_description: + continue + + # una lista di dizionari conteneti le risposte al thread principale, sono strutturate similarmente al thread + response_list = thread.get('last_replies', []) + comments_list: list[SocialComment] = [] + + for i, response in enumerate(response_list): + if i >= MAX_COMMENTS: break + + # la data di creazione della risposta tipo "MM/GG/AA(day)hh:mm:ss", ci interessa solo MM/GG/AA + time = self.__time_str(response['now']) + + # il commento della risposta, può contenere anche elementi di formattazione html che saranno da ignorare + comment = self.__unformat_html_str(response.get('com', '')) + if not comment: + continue + + social_comment = SocialComment(description=comment) + social_comment.set_timestamp(timestamp_ms=time) + comments_list.append(social_comment) + + social_post: SocialPost = SocialPost( + title=title, + description=thread_description, + comments=comments_list + ) + social_post.set_timestamp(timestamp_ms=time) + social_posts.append(social_post) + + return social_posts[:limit] diff --git a/src/app/api/social/reddit.py b/src/app/api/social/reddit.py index bda7687..201166c 100644 --- a/src/app/api/social/reddit.py +++ b/src/app/api/social/reddit.py @@ -1,10 +1,9 @@ import os from praw import Reddit # type: ignore from praw.models import Submission # type: ignore -from app.api.core.social import SocialWrapper, SocialPost, SocialComment +from app.api.core.social import * -MAX_COMMENTS = 5 # metterne altri se necessario. # fonti: https://lkiconsulting.io/marketing/best-crypto-subreddits/ SUBREDDITS = [ @@ -24,13 +23,13 @@ SUBREDDITS = [ def extract_post(post: Submission) -> SocialPost: social = SocialPost() - social.time = str(post.created) + social.set_timestamp(timestamp_ms=post.created) social.title = post.title social.description = post.selftext for top_comment in post.comments: comment = SocialComment() - comment.time = str(top_comment.created) + comment.set_timestamp(timestamp_ms=top_comment.created) comment.description = top_comment.body social.comments.append(comment) diff --git a/src/app/api/social/x.py b/src/app/api/social/x.py new file mode 100644 index 0000000..a1b1bd4 --- /dev/null +++ b/src/app/api/social/x.py @@ -0,0 +1,46 @@ +import os +import json +import subprocess +from shutil import which +from app.api.core.social import SocialWrapper, SocialPost + + +# This is the list of users that can be interesting +# To get the ID of a new user is necessary to search it on X, copy the url and insert it in a service like "https://get-id-x.foundtt.com/en/" +X_USERS = [ + 'watcherguru', + 'Cointelegraph', + 'BTC_Archive', + 'elonmusk' +] + +class XWrapper(SocialWrapper): + def __init__(self): + ''' + This wrapper uses the rettiwt API to get data from X in order to avoid the rate limits of the free X API, + even if improbable this could lead to a ban so do not use the personal account, + In order to work it is necessary to install the rettiwt cli tool, for more information visit the official documentation at https://www.npmjs.com/package/rettiwt-api + ''' + + self.api_key = os.getenv("X_API_KEY") + assert self.api_key, "X_API_KEY environment variable not set" + assert which('rettiwt') is not None, "Command `rettiwt` not installed" + + + def get_top_crypto_posts(self, limit:int = 5) -> list[SocialPost]: + social_posts: list[SocialPost] = [] + + for user in X_USERS: + process = subprocess.run(f"rettiwt -k {self.api_key} tweet search -f {str(user)}", capture_output=True) + results = process.stdout.decode() + json_result = json.loads(results) + + tweets = json_result['list'] + for tweet in tweets[:limit]: + social_post = SocialPost() + social_post.time = tweet['createdAt'] + social_post.title = str(user) + " tweeted: " + social_post.description = tweet['fullText'] + social_posts.append(social_post) + + return social_posts diff --git a/src/app/api/tools/social_tool.py b/src/app/api/tools/social_tool.py index 630e14d..c905b5b 100644 --- a/src/app/api/tools/social_tool.py +++ b/src/app/api/tools/social_tool.py @@ -1,7 +1,7 @@ from agno.tools import Toolkit from app.api.wrapper_handler import WrapperHandler from app.api.core.social import SocialPost, SocialWrapper -from app.api.social import RedditWrapper +from app.api.social import * class SocialAPIsTool(SocialWrapper, Toolkit): @@ -23,7 +23,7 @@ class SocialAPIsTool(SocialWrapper, Toolkit): - RedditWrapper. """ - wrappers: list[type[SocialWrapper]] = [RedditWrapper] + wrappers: list[type[SocialWrapper]] = [RedditWrapper, XWrapper, ChanWrapper] self.handler = WrapperHandler.build_wrappers(wrappers) Toolkit.__init__( # type: ignore diff --git a/tests/api/test_social_4chan.py b/tests/api/test_social_4chan.py new file mode 100644 index 0000000..b39a36d --- /dev/null +++ b/tests/api/test_social_4chan.py @@ -0,0 +1,22 @@ +import re +import pytest +from app.api.social.chan import ChanWrapper + +@pytest.mark.social +@pytest.mark.api +class TestChanWrapper: + def test_initialization(self): + wrapper = ChanWrapper() + assert wrapper is not None + + def test_get_top_crypto_posts(self): + wrapper = ChanWrapper() + posts = wrapper.get_top_crypto_posts(limit=2) + assert isinstance(posts, list) + assert len(posts) == 2 + for post in posts: + assert post.title != "" + assert post.time != "" + assert re.match(r'\d{4}-\d{2}-\d{2}', post.time) + assert isinstance(post.comments, list) + diff --git a/tests/api/test_reddit.py b/tests/api/test_social_reddit.py similarity index 92% rename from tests/api/test_reddit.py rename to tests/api/test_social_reddit.py index d4533a5..a83fe8a 100644 --- a/tests/api/test_reddit.py +++ b/tests/api/test_social_reddit.py @@ -1,4 +1,5 @@ import os +import re import pytest from app.api.social.reddit import MAX_COMMENTS, RedditWrapper @@ -18,6 +19,8 @@ class TestRedditWrapper: assert len(posts) == 2 for post in posts: assert post.title != "" + assert re.match(r'\d{4}-\d{2}-\d{2}', post.time) + assert isinstance(post.comments, list) assert len(post.comments) <= MAX_COMMENTS for comment in post.comments: diff --git a/tests/api/test_social_x_api.py b/tests/api/test_social_x_api.py new file mode 100644 index 0000000..15f39c3 --- /dev/null +++ b/tests/api/test_social_x_api.py @@ -0,0 +1,22 @@ +import os +import re +import pytest +from app.api.social.x import XWrapper + +@pytest.mark.social +@pytest.mark.api +@pytest.mark.skipif(not os.getenv("X_API_KEY"), reason="X_API_KEY not set in environment variables") +class TestXWrapper: + def test_initialization(self): + wrapper = XWrapper() + assert wrapper is not None + + def test_get_top_crypto_posts(self): + wrapper = XWrapper() + posts = wrapper.get_top_crypto_posts(limit=2) + assert isinstance(posts, list) + assert len(posts) == 2 + for post in posts: + assert post.title != "" + assert re.match(r'\d{4}-\d{2}-\d{2}', post.time) + assert isinstance(post.comments, list)