Fix socials timestamp (#50)

* Fix Dockerfile per dipendenze di X
* time --> timestamp
* fix X command
This commit was merged in pull request #50.
This commit is contained in:
Giacomo Bertolazzi
2025-10-27 12:45:40 +01:00
committed by GitHub
parent 08774bee1b
commit 6a9d8b354b
10 changed files with 41 additions and 34 deletions

View File

@@ -42,13 +42,7 @@ CRYPTOPANIC_API_KEY=
REDDIT_API_CLIENT_ID=
REDDIT_API_CLIENT_SECRET=
# Per ottenere questa API è necessario seguire i seguenti passaggi:
# - Installare l'estensione su chrome X Auth Helper
# - Dargli il permesso di girare in incognito
# - Andare in incognito ed entrare sul proprio account X
# - Aprire l'estensione e fare "get key"
# - Chiudere chrome
# Dovrebbe funzionare per 5 anni o finchè non si si fa il log out, in ogni caso si può ricreare
# https://www.npmjs.com/package/rettiwt-api
X_API_KEY=

View File

@@ -2,10 +2,9 @@
FROM debian:bookworm-slim
# Installiamo le dipendenze di sistema
RUN apt-get update && \
apt-get install -y curl npm && \
RUN apt update && \
apt install -y curl && \
rm -rf /var/lib/apt/lists/*
RUN npm install -g rettiwt-api
# Installiamo uv
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
@@ -20,6 +19,11 @@ COPY uv.lock ./
RUN uv sync --frozen --no-dev
ENV PYTHONPATH="./src"
# Installiamo le dipendenze per X (rettiwt, nodejs e npm)
RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash -
RUN apt install -y nodejs && rm -rf /var/lib/apt/lists/*
RUN npm install -g rettiwt-api
# Copiamo i file del progetto
COPY LICENSE ./
COPY src/ ./src/

View File

@@ -9,25 +9,25 @@ class SocialPost(BaseModel):
"""
Represents a social media post with time, title, description, and comments.
"""
time: str = ""
timestamp: str = ""
title: str = ""
description: str = ""
comments: list["SocialComment"] = []
def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None:
""" Use the unified_timestamp function to set the time."""
self.time = unified_timestamp(timestamp_ms, timestamp_s)
self.timestamp = unified_timestamp(timestamp_ms, timestamp_s)
class SocialComment(BaseModel):
"""
Represents a comment on a social media post.
"""
time: str = ""
timestamp: str = ""
description: str = ""
def set_timestamp(self, timestamp_ms: int | None = None, timestamp_s: int | None = None) -> None:
""" Use the unified_timestamp function to set the time."""
self.time = unified_timestamp(timestamp_ms, timestamp_s)
self.timestamp = unified_timestamp(timestamp_ms, timestamp_s)
class SocialWrapper:

View File

@@ -1,15 +1,20 @@
'''
Usiamo le API di 4chan per ottenere un catalogo di threads dalla board /biz/
'''
import re
import html
import requests
from bs4 import BeautifulSoup
import warnings
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
from datetime import datetime
from app.api.core.social import *
# Ignora i warning di BeautifulSoup quando incontra HTML malformato o un link, mentre si aspetta un HTML completo
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
class ChanWrapper(SocialWrapper):
"""
Wrapper per l'API di 4chan, in particolare per la board /biz/ (Business & Finance)
Fonte API: https://a.4cdn.org/biz/catalog.json
"""
def __init__(self):
super().__init__()

View File

@@ -23,13 +23,13 @@ SUBREDDITS = [
def extract_post(post: Submission) -> SocialPost:
social = SocialPost()
social.set_timestamp(timestamp_ms=post.created)
social.set_timestamp(timestamp_s=post.created)
social.title = post.title
social.description = post.selftext
for top_comment in post.comments:
comment = SocialComment()
comment.set_timestamp(timestamp_ms=top_comment.created)
comment.set_timestamp(timestamp_s=top_comment.created)
comment.description = top_comment.body
social.comments.append(comment)

View File

@@ -2,6 +2,7 @@ import os
import json
import subprocess
from shutil import which
from datetime import datetime
from app.api.core.social import SocialWrapper, SocialPost
@@ -28,19 +29,20 @@ class XWrapper(SocialWrapper):
def get_top_crypto_posts(self, limit:int = 5) -> list[SocialPost]:
social_posts: list[SocialPost] = []
posts: list[SocialPost] = []
for user in X_USERS:
process = subprocess.run(f"rettiwt -k {self.api_key} tweet search -f {str(user)}", capture_output=True)
cmd = ['rettiwt', '-k', self.api_key, 'tweet', 'search', str(limit), '-f', str(user)]
process = subprocess.run(cmd, capture_output=True)
results = process.stdout.decode()
json_result = json.loads(results)
tweets = json_result['list']
for tweet in tweets[:limit]:
for tweet in json_result.get('list', []):
time = datetime.fromisoformat(tweet['createdAt'])
social_post = SocialPost()
social_post.time = tweet['createdAt']
social_post.title = str(user) + " tweeted: "
social_post.set_timestamp(timestamp_s=int(time.timestamp()))
social_post.title = f"{user} tweeted: "
social_post.description = tweet['fullText']
social_posts.append(social_post)
posts.append(social_post)
return social_posts
return posts

View File

@@ -16,7 +16,7 @@ class TestChanWrapper:
assert len(posts) == 2
for post in posts:
assert post.title != ""
assert post.time != ""
assert re.match(r'\d{4}-\d{2}-\d{2}', post.time)
assert post.timestamp != ""
assert re.match(r'\d{4}-\d{2}-\d{2}', post.timestamp)
assert isinstance(post.comments, list)

View File

@@ -19,7 +19,7 @@ class TestRedditWrapper:
assert len(posts) == 2
for post in posts:
assert post.title != ""
assert re.match(r'\d{4}-\d{2}-\d{2}', post.time)
assert re.match(r'\d{4}-\d{2}-\d{2}', post.timestamp)
assert isinstance(post.comments, list)
assert len(post.comments) <= MAX_COMMENTS

View File

@@ -1,11 +1,13 @@
import os
import re
import pytest
from shutil import which
from app.api.social.x import XWrapper
@pytest.mark.social
@pytest.mark.api
@pytest.mark.skipif(not os.getenv("X_API_KEY"), reason="X_API_KEY not set in environment variables")
@pytest.mark.skipif(which('rettiwt') is None, reason="rettiwt not installed")
class TestXWrapper:
def test_initialization(self):
wrapper = XWrapper()
@@ -18,5 +20,5 @@ class TestXWrapper:
assert len(posts) == 2
for post in posts:
assert post.title != ""
assert re.match(r'\d{4}-\d{2}-\d{2}', post.time)
assert re.match(r'\d{4}-\d{2}-\d{2}', post.timestamp)
assert isinstance(post.comments, list)

View File

@@ -17,7 +17,7 @@ class TestSocialAPIsTool:
assert len(result) > 0
for post in result:
assert post.title is not None
assert post.time is not None
assert post.timestamp is not None
def test_social_api_tool_get_top__all_results(self):
tool = SocialAPIsTool()
@@ -27,4 +27,4 @@ class TestSocialAPIsTool:
for _provider, posts in result.items():
for post in posts:
assert post.title is not None
assert post.time is not None
assert post.timestamp is not None