Katbin

import praw
import csv
from datetime import datetime, timedelta
import time

# REDDIT API CREDS HERE
reddit = praw.Reddit(client_id='',
                     client_secret='',
                     user_agent='YOUR_USER_AGENT')

subreddit = reddit.subreddit('SUBREDDIT') # PUT SUBREDDIT NAME HERE

# Define the time range (yesterday)
end_date = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
start_date = end_date - timedelta(days=1)

csv_filename = f"reddit_posts_{start_date.strftime('%Y-%m-%d')}.csv"
with open(csv_filename, 'w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=[
        'title', 'author', 'created_utc', 'created_datetime', 'num_comments', 'score', 'url',
        'upvote_ratio', 'is_original_content', 'over_18', 'spoiler', 'locked', 'stickied', 'selftext'
    ])
    writer.writeheader()

    posts_scraped = 0

    # Iterate through posts in the specified time range for yesterday
    for post in subreddit.new(limit=None):
        post_date = datetime.utcfromtimestamp(post.created_utc)
        if start_date <= post_date < end_date:
            try:
                writer.writerow({
                    'title': post.title,
                    'author': post.author.name if post.author else '[deleted]',
                    'created_utc': post.created_utc,
                    'created_datetime': post_date.strftime("%Y-%m-%d %H:%M:%S"),
                    'num_comments': post.num_comments,
                    'score': post.score,
                    'url': post.url,
                    'upvote_ratio': post.upvote_ratio,
                    'is_original_content': post.is_original_content,
                    'over_18': post.over_18,
                    'spoiler': post.spoiler,
                    'locked': post.locked,
                    'stickied': post.stickied,
                    'selftext': post.selftext
                })

                # Increment counter and print progress
                posts_scraped += 1
                print(f"Posts scraped for {start_date.strftime('%Y-%m-%d')}: {posts_scraped}", end='\r')

            except praw.exceptions.RedditAPIException as e:
                print(f"Rate limit exceeded. Waiting for 10 seconds and retrying...")
                time.sleep(10)  # Wait for 10 seconds
                continue

print(f"\nData extraction completed for {start_date.strftime('%Y-%m-%d')}.")