From 6a9d8b354b3f5a384d1bef0952131cb1623d0ade Mon Sep 17 00:00:00 2001 From: Giacomo Bertolazzi <31776951+Berack96@users.noreply.github.com> Date: Mon, 27 Oct 2025 12:45:40 +0100 Subject: [PATCH] Fix socials timestamp (#50) * Fix Dockerfile per dipendenze di X * time --> timestamp * fix X command --- .env.example | 8 +------- Dockerfile | 10 +++++++--- src/app/api/core/social.py | 8 ++++---- src/app/api/social/chan.py | 13 +++++++++---- src/app/api/social/reddit.py | 4 ++-- src/app/api/social/x.py | 18 ++++++++++-------- tests/api/test_social_4chan.py | 4 ++-- tests/api/test_social_reddit.py | 2 +- tests/api/test_social_x_api.py | 4 +++- tests/tools/test_socials_tool.py | 4 ++-- 10 files changed, 41 insertions(+), 34 deletions(-) diff --git a/.env.example b/.env.example index 694300e..3127b74 100644 --- a/.env.example +++ b/.env.example @@ -42,13 +42,7 @@ CRYPTOPANIC_API_KEY= REDDIT_API_CLIENT_ID= REDDIT_API_CLIENT_SECRET= -# Per ottenere questa API è necessario seguire i seguenti passaggi: -# - Installare l'estensione su chrome X Auth Helper -# - Dargli il permesso di girare in incognito -# - Andare in incognito ed entrare sul proprio account X -# - Aprire l'estensione e fare "get key" -# - Chiudere chrome -# Dovrebbe funzionare per 5 anni o finchè non si si fa il log out, in ogni caso si può ricreare +# https://www.npmjs.com/package/rettiwt-api X_API_KEY= diff --git a/Dockerfile b/Dockerfile index 3a354bb..17e3234 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,10 +2,9 @@ FROM debian:bookworm-slim # Installiamo le dipendenze di sistema -RUN apt-get update && \ - apt-get install -y curl npm && \ +RUN apt update && \ + apt install -y curl && \ rm -rf /var/lib/apt/lists/* -RUN npm install -g rettiwt-api # Installiamo uv RUN curl -LsSf https://astral.sh/uv/install.sh | sh @@ -20,6 +19,11 @@ COPY uv.lock ./ RUN uv sync --frozen --no-dev ENV PYTHONPATH="./src" +# Installiamo le dipendenze per X (rettiwt, nodejs e npm) +RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - +RUN apt install -y nodejs && rm -rf /var/lib/apt/lists/* +RUN npm install -g rettiwt-api + # Copiamo i file del progetto COPY LICENSE ./ COPY src/ ./src/ diff --git a/src/app/api/core/social.py b/src/app/api/core/social.py index fe4d5bf..05953a3 100644 --- a/src/app/api/core/social.py +++ b/src/app/api/core/social.py @@ -9,25 +9,25 @@ class SocialPost(BaseModel): """ Represents a social media post with time, title, description, and comments. """ - time: str = "" + timestamp: str = "" title: str = "" description: str = "" comments: list["SocialComment"] = [] def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None: """ Use the unified_timestamp function to set the time.""" - self.time = unified_timestamp(timestamp_ms, timestamp_s) + self.timestamp = unified_timestamp(timestamp_ms, timestamp_s) class SocialComment(BaseModel): """ Represents a comment on a social media post. """ - time: str = "" + timestamp: str = "" description: str = "" def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None: """ Use the unified_timestamp function to set the time.""" - self.time = unified_timestamp(timestamp_ms, timestamp_s) + self.timestamp = unified_timestamp(timestamp_ms, timestamp_s) class SocialWrapper: diff --git a/src/app/api/social/chan.py b/src/app/api/social/chan.py index a39e517..66efdb0 100644 --- a/src/app/api/social/chan.py +++ b/src/app/api/social/chan.py @@ -1,15 +1,20 @@ -''' -Usiamo le API di 4chan per ottenere un catalogo di threads dalla board /biz/ -''' import re import html import requests -from bs4 import BeautifulSoup +import warnings +from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning from datetime import datetime from app.api.core.social import * +# Ignora i warning di BeautifulSoup quando incontra HTML malformato o un link, mentre si aspetta un HTML completo +warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) + class ChanWrapper(SocialWrapper): + """ + Wrapper per l'API di 4chan, in particolare per la board /biz/ (Business & Finance) + Fonte API: https://a.4cdn.org/biz/catalog.json + """ def __init__(self): super().__init__() diff --git a/src/app/api/social/reddit.py b/src/app/api/social/reddit.py index 201166c..306e49e 100644 --- a/src/app/api/social/reddit.py +++ b/src/app/api/social/reddit.py @@ -23,13 +23,13 @@ SUBREDDITS = [ def extract_post(post: Submission) -> SocialPost: social = SocialPost() - social.set_timestamp(timestamp_ms=post.created) + social.set_timestamp(timestamp_s=post.created) social.title = post.title social.description = post.selftext for top_comment in post.comments: comment = SocialComment() - comment.set_timestamp(timestamp_ms=top_comment.created) + comment.set_timestamp(timestamp_s=top_comment.created) comment.description = top_comment.body social.comments.append(comment) diff --git a/src/app/api/social/x.py b/src/app/api/social/x.py index a1b1bd4..30d93c0 100644 --- a/src/app/api/social/x.py +++ b/src/app/api/social/x.py @@ -2,6 +2,7 @@ import os import json import subprocess from shutil import which +from datetime import datetime from app.api.core.social import SocialWrapper, SocialPost @@ -28,19 +29,20 @@ class XWrapper(SocialWrapper): def get_top_crypto_posts(self, limit:int = 5) -> list[SocialPost]: - social_posts: list[SocialPost] = [] + posts: list[SocialPost] = [] for user in X_USERS: - process = subprocess.run(f"rettiwt -k {self.api_key} tweet search -f {str(user)}", capture_output=True) + cmd = ['rettiwt', '-k', self.api_key, 'tweet', 'search', str(limit), '-f', str(user)] + process = subprocess.run(cmd, capture_output=True) results = process.stdout.decode() json_result = json.loads(results) - tweets = json_result['list'] - for tweet in tweets[:limit]: + for tweet in json_result.get('list', []): + time = datetime.fromisoformat(tweet['createdAt']) social_post = SocialPost() - social_post.time = tweet['createdAt'] - social_post.title = str(user) + " tweeted: " + social_post.set_timestamp(timestamp_s=int(time.timestamp())) + social_post.title = f"{user} tweeted: " social_post.description = tweet['fullText'] - social_posts.append(social_post) + posts.append(social_post) - return social_posts + return posts diff --git a/tests/api/test_social_4chan.py b/tests/api/test_social_4chan.py index b39a36d..dcf42d2 100644 --- a/tests/api/test_social_4chan.py +++ b/tests/api/test_social_4chan.py @@ -16,7 +16,7 @@ class TestChanWrapper: assert len(posts) == 2 for post in posts: assert post.title != "" - assert post.time != "" - assert re.match(r'\d{4}-\d{2}-\d{2}', post.time) + assert post.timestamp != "" + assert re.match(r'\d{4}-\d{2}-\d{2}', post.timestamp) assert isinstance(post.comments, list) diff --git a/tests/api/test_social_reddit.py b/tests/api/test_social_reddit.py index a83fe8a..adb4e13 100644 --- a/tests/api/test_social_reddit.py +++ b/tests/api/test_social_reddit.py @@ -19,7 +19,7 @@ class TestRedditWrapper: assert len(posts) == 2 for post in posts: assert post.title != "" - assert re.match(r'\d{4}-\d{2}-\d{2}', post.time) + assert re.match(r'\d{4}-\d{2}-\d{2}', post.timestamp) assert isinstance(post.comments, list) assert len(post.comments) <= MAX_COMMENTS diff --git a/tests/api/test_social_x_api.py b/tests/api/test_social_x_api.py index 15f39c3..39f75f9 100644 --- a/tests/api/test_social_x_api.py +++ b/tests/api/test_social_x_api.py @@ -1,11 +1,13 @@ import os import re import pytest +from shutil import which from app.api.social.x import XWrapper @pytest.mark.social @pytest.mark.api @pytest.mark.skipif(not os.getenv("X_API_KEY"), reason="X_API_KEY not set in environment variables") +@pytest.mark.skipif(which('rettiwt') is None, reason="rettiwt not installed") class TestXWrapper: def test_initialization(self): wrapper = XWrapper() @@ -18,5 +20,5 @@ class TestXWrapper: assert len(posts) == 2 for post in posts: assert post.title != "" - assert re.match(r'\d{4}-\d{2}-\d{2}', post.time) + assert re.match(r'\d{4}-\d{2}-\d{2}', post.timestamp) assert isinstance(post.comments, list) diff --git a/tests/tools/test_socials_tool.py b/tests/tools/test_socials_tool.py index c021a90..3a481f7 100644 --- a/tests/tools/test_socials_tool.py +++ b/tests/tools/test_socials_tool.py @@ -17,7 +17,7 @@ class TestSocialAPIsTool: assert len(result) > 0 for post in result: assert post.title is not None - assert post.time is not None + assert post.timestamp is not None def test_social_api_tool_get_top__all_results(self): tool = SocialAPIsTool() @@ -27,4 +27,4 @@ class TestSocialAPIsTool: for _provider, posts in result.items(): for post in posts: assert post.title is not None - assert post.time is not None + assert post.timestamp is not None