From ed6a4d12a6640a05f80d7043ea92ceb5db0e3d27 Mon Sep 17 00:00:00 2001
From: Nunzi99 <claudionunziante@gmail.com>
Date: Sat, 11 Oct 2025 16:37:48 +0200
Subject: [PATCH] Aggiunto Wrapper per 4 chan

---
 src/app/social/__init__.py |   6 +-
 src/app/social/chan.py     | 161 +++++++++++++++++++++++++++++++++++++
 src/app/social/x.py        |   1 +
 3 files changed, 166 insertions(+), 2 deletions(-)
 create mode 100644 src/app/social/chan.py

diff --git a/src/app/social/__init__.py b/src/app/social/__init__.py
index 9ce3708..10f08f5 100644
--- a/src/app/social/__init__.py
+++ b/src/app/social/__init__.py
@@ -2,8 +2,10 @@ from agno.tools import Toolkit
 from app.utils.wrapper_handler import WrapperHandler
 from .base import SocialPost, SocialWrapper
 from .reddit import RedditWrapper
+from .x import XWrapper
+from .chan import ChanWrapper 
 
-__all__ = ["SocialAPIsTool", "SOCIAL_INSTRUCTIONS", "RedditWrapper"]
+__all__ = ["SocialAPIsTool", "SOCIAL_INSTRUCTIONS", "RedditWrapper", "XWrapper", "ChanWrapper"]
 
 
 class SocialAPIsTool(SocialWrapper, Toolkit):
@@ -25,7 +27,7 @@ class SocialAPIsTool(SocialWrapper, Toolkit):
         - RedditWrapper.
         """
 
-        wrappers = [RedditWrapper]
+        wrappers = [RedditWrapper, XWrapper, ChanWrapper]
         self.wrapper_handler: WrapperHandler[SocialWrapper] = WrapperHandler.build_wrappers(wrappers)
 
         Toolkit.__init__(
diff --git a/src/app/social/chan.py b/src/app/social/chan.py
new file mode 100644
index 0000000..a1cecca
--- /dev/null
+++ b/src/app/social/chan.py
@@ -0,0 +1,161 @@
+'''
+Usiamo le API di 4chan per ottenere un catalogo di threads dalla board /biz/
+'''
+import requests
+import re
+import html
+from bs4 import BeautifulSoup
+
+from .base import SocialWrapper, SocialPost, SocialComment
+class ChanWrapper(SocialWrapper):
+    def __init__(self):
+        super().__init__()
+
+    def get_top_crypto_posts(self, limit: int = 5) -> list[SocialPost]:
+        # Url dell'API della board /biz/
+        json_url = 'https://a.4cdn.org/biz/catalog.json'
+        json = requests.get(json_url)
+
+        if json.status_code == 200:
+            page_list: list[dict] = json.json() # Questa lista contiene un dizionario per ogni pagina della board di questo tipo {"page": page_number, "threads": [{thread_data}]}
+        else:
+            print("Error:", json.status_code)
+
+        # Lista dei post
+        social_posts: list[SocialPost] = []
+
+        for page in page_list:
+            thread_list: list[dict] = page['threads']
+            '''
+            Per ogni thread ci interessano i seguenti campi:
+            - "sticky": ci indica se il thread è stato fissato o meno, se non è presente vuol dire che non è stato fissato, i thread sticky possono essere ignorati
+            - "now": la data di creazione del thread tipo "MM/GG/AA(day)hh:mm:ss", ci interessa solo MM/GG/AA
+            - "name": il nome dell'utente
+            - "sub": il nome del thread, può contenere anche elementi di formattazione html che saranno da ignorare, potrebbe non essere presente
+            - "com": il commento del thread, può contenere anche elementi di formattazione html che saranno da ignorare
+            - "last_replies": una lista di dizionari conteneti le risposte al thread principale, sono strutturate similarmente al thread, di queste ci interessano i seguenti campi:
+                - "now": la data di creazione della risposta tipo "MM/GG/AA(day)hh:mm:ss", ci interessa solo MM/GG/AA
+                - "name": il nome dell'utente
+                - "com": il commento della risposta, possono contenere anche elementi di formattazione html che saranno da ignorare
+            '''
+            for thread in thread_list:
+                # Ignoriamo i dizionari dei thread nei quali è presente la key "sticky"
+                if 'sticky' in thread:
+                    continue
+                else:
+                    # print(thread)
+                    # Otteniamo la data 
+                    time: str = thread['now']
+                    # Otteniamo dalla data il mese (primi 2 caratteri)
+                    month: str = time[:2]
+                    # Otteniamo dalla data il giorno (caratteri 4 e 5)
+                    day: str = time[4:6]
+                    # Otteniamo dalla data l'anno (caratteri 7 e 8)
+                    year: str = time[7:9]
+                    # Ricreiamo la data completa come dd/mm/yy
+                    time: str = day + '/' + month + '/' + year
+                    
+                    # Otteniamo il nome dell'utente
+                    name: str = thread['name']
+                    # Proviamo a recuperare il titolo
+                    try:
+                        # Otteniamo il titolo del thread contenuto nella key "sub"
+                        title: str = thread['sub']
+                        # Ripuliamo la stringa
+                        # Decodifichiamo caratteri ed entità HTML
+                        html_entities = html.unescape(title)
+                        # Rimuoviamo caratteri HTML
+                        soup = BeautifulSoup(html_entities, 'html.parser')
+                        title = soup.get_text(separator=" ")
+                        # Rimuoviamo backlash e doppi slash
+                        title = re.sub(r"[\\/]+", "/", title)
+                        # Rimuoviamo spazi in piú
+                        title = re.sub(r"\s+", " ", title).strip()
+                        # Aggiungiamo il nome dell'utente al titolo
+                        title = name + " posted: " + title
+                    except:
+                        title: str = name + " posted"
+
+                    try: 
+                        # Otteniamo il commento del thread contenuto nella key "com"
+                        thread_description: str = thread['com']
+                        # Ripuliamo la stringa
+                        # Decodifichiamo caratteri ed entità HTML
+                        html_entities = html.unescape(thread_description)
+                        # Rimuoviamo caratteri HTML
+                        soup = BeautifulSoup(html_entities, 'html.parser')
+                        thread_description = soup.get_text(separator=" ")
+                        # Rimuoviamo backlash e doppi slash
+                        thread_description = re.sub(r"[\\/]+", "/", thread_description)
+                        # Rimuoviamo spazi in piú
+                        thread_description = re.sub(r"\s+", " ", thread_description).strip()
+                    except:
+                        thread_description = None
+                    # Creiamo la lista delle risposte al thread
+                    try:
+                        response_list: list[dict] = thread['last_replies']
+                    except:
+                        response_list: list[dict] = []
+                    # Creiamo la lista che conterrà i commenti
+                    comments_list: list[SocialComment] = []
+
+                    # Otteniamo i primi 5 commenti
+                    i = 0
+                    for response in response_list:
+                        # Otteniamo la data 
+                        time: str = response['now']
+                        # print(time)
+                        # Otteniamo dalla data il mese (primi 2 caratteri)
+                        month: str = time[:2]
+                        # Otteniamo dalla data il giorno (caratteri 4 e 5)
+                        day: str = time[3:5]
+                        # Otteniamo dalla data l'anno (caratteri 7 e 8)
+                        year: str = time[6:8]
+                        # Ricreiamo la data completa come dd/mm/yy
+                        time: str = day + '/' + month + '/' + year
+
+                        try: 
+                            # Otteniamo il commento della risposta contenuto nella key "com"
+                            comment_description: str = response['com']
+                            # Ripuliamo la stringa
+                            # Decodifichiamo caratteri ed entità HTML
+                            html_entities = html.unescape(comment_description)
+                            # Rimuoviamo caratteri HTML
+                            soup = BeautifulSoup(html_entities, 'html.parser')
+                            comment_description = soup.get_text(separator=" ")
+                            # Rimuoviamo backlash e doppi slash
+                            comment_description = re.sub(r"[\\/]+", "/", comment_description)
+                            # Rimuoviamo spazi in piú
+                            comment_description = re.sub(r"\s+", " ", comment_description).strip()
+                        except:
+                            comment_description = None
+                        # Se la descrizione del commento non esiste, passiamo al commento successivo
+                        if comment_description is None:
+                            continue
+                        else:
+                            # Creiamo il SocialComment
+                            social_comment: SocialComment = SocialComment(
+                                time=time,
+                                description=comment_description
+                            )
+                            comments_list.append(social_comment)
+                        i += 1
+                        if i >= 5:
+                            break
+                    if thread_description is None:
+                        continue
+                    else:
+                        # Creiamo il SocialPost
+                        social_post: SocialPost = SocialPost(
+                            time=time,
+                            title=title,
+                            description=thread_description,
+                            comments=comments_list
+                        )
+                        social_posts.append(social_post)
+        
+        return social_posts[:limit]           
+# Stampiamo i post
+# chan_wrapper = ChanWrapper()
+# social_posts = chan_wrapper.get_top_crypto_posts()
+# print(len(social_posts))
diff --git a/src/app/social/x.py b/src/app/social/x.py
index 3dfba03..200f884 100644
--- a/src/app/social/x.py
+++ b/src/app/social/x.py
@@ -25,6 +25,7 @@ class XWrapper(SocialWrapper):
             'elonmusk'
         ]
         self.api_key = os.getenv("X_API_KEY")
+        assert self.api_key, "X_API_KEY environment variable not set"
         # Connection to the docker deamon
         self.client = docker.from_env()
         # Connect with the relative container