Fix socials timestamp (#50)

* Fix Dockerfile per dipendenze di X
* time --> timestamp
* fix X command
This commit was merged in pull request #50.
This commit is contained in:
Giacomo Bertolazzi
2025-10-27 12:45:40 +01:00
committed by GitHub
parent 08774bee1b
commit 6a9d8b354b
10 changed files with 41 additions and 34 deletions

View File

@@ -42,13 +42,7 @@ CRYPTOPANIC_API_KEY=
REDDIT_API_CLIENT_ID= REDDIT_API_CLIENT_ID=
REDDIT_API_CLIENT_SECRET= REDDIT_API_CLIENT_SECRET=
# Per ottenere questa API è necessario seguire i seguenti passaggi: # https://www.npmjs.com/package/rettiwt-api
# - Installare l'estensione su chrome X Auth Helper
# - Dargli il permesso di girare in incognito
# - Andare in incognito ed entrare sul proprio account X
# - Aprire l'estensione e fare "get key"
# - Chiudere chrome
# Dovrebbe funzionare per 5 anni o finchè non si si fa il log out, in ogni caso si può ricreare
X_API_KEY= X_API_KEY=

View File

@@ -2,10 +2,9 @@
FROM debian:bookworm-slim FROM debian:bookworm-slim
# Installiamo le dipendenze di sistema # Installiamo le dipendenze di sistema
RUN apt-get update && \ RUN apt update && \
apt-get install -y curl npm && \ apt install -y curl && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
RUN npm install -g rettiwt-api
# Installiamo uv # Installiamo uv
RUN curl -LsSf https://astral.sh/uv/install.sh | sh RUN curl -LsSf https://astral.sh/uv/install.sh | sh
@@ -20,6 +19,11 @@ COPY uv.lock ./
RUN uv sync --frozen --no-dev RUN uv sync --frozen --no-dev
ENV PYTHONPATH="./src" ENV PYTHONPATH="./src"
# Installiamo le dipendenze per X (rettiwt, nodejs e npm)
RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash -
RUN apt install -y nodejs && rm -rf /var/lib/apt/lists/*
RUN npm install -g rettiwt-api
# Copiamo i file del progetto # Copiamo i file del progetto
COPY LICENSE ./ COPY LICENSE ./
COPY src/ ./src/ COPY src/ ./src/

View File

@@ -9,25 +9,25 @@ class SocialPost(BaseModel):
""" """
Represents a social media post with time, title, description, and comments. Represents a social media post with time, title, description, and comments.
""" """
time: str = "" timestamp: str = ""
title: str = "" title: str = ""
description: str = "" description: str = ""
comments: list["SocialComment"] = [] comments: list["SocialComment"] = []
def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None: def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None:
""" Use the unified_timestamp function to set the time.""" """ Use the unified_timestamp function to set the time."""
self.time = unified_timestamp(timestamp_ms, timestamp_s) self.timestamp = unified_timestamp(timestamp_ms, timestamp_s)
class SocialComment(BaseModel): class SocialComment(BaseModel):
""" """
Represents a comment on a social media post. Represents a comment on a social media post.
""" """
time: str = "" timestamp: str = ""
description: str = "" description: str = ""
def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None: def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None:
""" Use the unified_timestamp function to set the time.""" """ Use the unified_timestamp function to set the time."""
self.time = unified_timestamp(timestamp_ms, timestamp_s) self.timestamp = unified_timestamp(timestamp_ms, timestamp_s)
class SocialWrapper: class SocialWrapper:

View File

@@ -1,15 +1,20 @@
'''
Usiamo le API di 4chan per ottenere un catalogo di threads dalla board /biz/
'''
import re import re
import html import html
import requests import requests
from bs4 import BeautifulSoup import warnings
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
from datetime import datetime from datetime import datetime
from app.api.core.social import * from app.api.core.social import *
# Ignora i warning di BeautifulSoup quando incontra HTML malformato o un link, mentre si aspetta un HTML completo
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
class ChanWrapper(SocialWrapper): class ChanWrapper(SocialWrapper):
"""
Wrapper per l'API di 4chan, in particolare per la board /biz/ (Business & Finance)
Fonte API: https://a.4cdn.org/biz/catalog.json
"""
def __init__(self): def __init__(self):
super().__init__() super().__init__()

View File

@@ -23,13 +23,13 @@ SUBREDDITS = [
def extract_post(post: Submission) -> SocialPost: def extract_post(post: Submission) -> SocialPost:
social = SocialPost() social = SocialPost()
social.set_timestamp(timestamp_ms=post.created) social.set_timestamp(timestamp_s=post.created)
social.title = post.title social.title = post.title
social.description = post.selftext social.description = post.selftext
for top_comment in post.comments: for top_comment in post.comments:
comment = SocialComment() comment = SocialComment()
comment.set_timestamp(timestamp_ms=top_comment.created) comment.set_timestamp(timestamp_s=top_comment.created)
comment.description = top_comment.body comment.description = top_comment.body
social.comments.append(comment) social.comments.append(comment)

View File

@@ -2,6 +2,7 @@ import os
import json import json
import subprocess import subprocess
from shutil import which from shutil import which
from datetime import datetime
from app.api.core.social import SocialWrapper, SocialPost from app.api.core.social import SocialWrapper, SocialPost
@@ -28,19 +29,20 @@ class XWrapper(SocialWrapper):
def get_top_crypto_posts(self, limit:int = 5) -> list[SocialPost]: def get_top_crypto_posts(self, limit:int = 5) -> list[SocialPost]:
social_posts: list[SocialPost] = [] posts: list[SocialPost] = []
for user in X_USERS: for user in X_USERS:
process = subprocess.run(f"rettiwt -k {self.api_key} tweet search -f {str(user)}", capture_output=True) cmd = ['rettiwt', '-k', self.api_key, 'tweet', 'search', str(limit), '-f', str(user)]
process = subprocess.run(cmd, capture_output=True)
results = process.stdout.decode() results = process.stdout.decode()
json_result = json.loads(results) json_result = json.loads(results)
tweets = json_result['list'] for tweet in json_result.get('list', []):
for tweet in tweets[:limit]: time = datetime.fromisoformat(tweet['createdAt'])
social_post = SocialPost() social_post = SocialPost()
social_post.time = tweet['createdAt'] social_post.set_timestamp(timestamp_s=int(time.timestamp()))
social_post.title = str(user) + " tweeted: " social_post.title = f"{user} tweeted: "
social_post.description = tweet['fullText'] social_post.description = tweet['fullText']
social_posts.append(social_post) posts.append(social_post)
return social_posts return posts

View File

@@ -16,7 +16,7 @@ class TestChanWrapper:
assert len(posts) == 2 assert len(posts) == 2
for post in posts: for post in posts:
assert post.title != "" assert post.title != ""
assert post.time != "" assert post.timestamp != ""
assert re.match(r'\d{4}-\d{2}-\d{2}', post.time) assert re.match(r'\d{4}-\d{2}-\d{2}', post.timestamp)
assert isinstance(post.comments, list) assert isinstance(post.comments, list)

View File

@@ -19,7 +19,7 @@ class TestRedditWrapper:
assert len(posts) == 2 assert len(posts) == 2
for post in posts: for post in posts:
assert post.title != "" assert post.title != ""
assert re.match(r'\d{4}-\d{2}-\d{2}', post.time) assert re.match(r'\d{4}-\d{2}-\d{2}', post.timestamp)
assert isinstance(post.comments, list) assert isinstance(post.comments, list)
assert len(post.comments) <= MAX_COMMENTS assert len(post.comments) <= MAX_COMMENTS

View File

@@ -1,11 +1,13 @@
import os import os
import re import re
import pytest import pytest
from shutil import which
from app.api.social.x import XWrapper from app.api.social.x import XWrapper
@pytest.mark.social @pytest.mark.social
@pytest.mark.api @pytest.mark.api
@pytest.mark.skipif(not os.getenv("X_API_KEY"), reason="X_API_KEY not set in environment variables") @pytest.mark.skipif(not os.getenv("X_API_KEY"), reason="X_API_KEY not set in environment variables")
@pytest.mark.skipif(which('rettiwt') is None, reason="rettiwt not installed")
class TestXWrapper: class TestXWrapper:
def test_initialization(self): def test_initialization(self):
wrapper = XWrapper() wrapper = XWrapper()
@@ -18,5 +20,5 @@ class TestXWrapper:
assert len(posts) == 2 assert len(posts) == 2
for post in posts: for post in posts:
assert post.title != "" assert post.title != ""
assert re.match(r'\d{4}-\d{2}-\d{2}', post.time) assert re.match(r'\d{4}-\d{2}-\d{2}', post.timestamp)
assert isinstance(post.comments, list) assert isinstance(post.comments, list)

View File

@@ -17,7 +17,7 @@ class TestSocialAPIsTool:
assert len(result) > 0 assert len(result) > 0
for post in result: for post in result:
assert post.title is not None assert post.title is not None
assert post.time is not None assert post.timestamp is not None
def test_social_api_tool_get_top__all_results(self): def test_social_api_tool_get_top__all_results(self):
tool = SocialAPIsTool() tool = SocialAPIsTool()
@@ -27,4 +27,4 @@ class TestSocialAPIsTool:
for _provider, posts in result.items(): for _provider, posts in result.items():
for post in posts: for post in posts:
assert post.title is not None assert post.title is not None
assert post.time is not None assert post.timestamp is not None