import praw import csv from datetime import datetime, timedelta import time # REDDIT API CREDS HERE reddit = praw.Reddit(client_id='', client_secret='', user_agent='YOUR_USER_AGENT') subreddit = reddit.subreddit('SUBREDDIT') # PUT SUBREDDIT NAME HERE # Define the time range (yesterday) end_date = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) start_date = end_date - timedelta(days=1) csv_filename = f"reddit_posts_{start_date.strftime('%Y-%m-%d')}.csv" with open(csv_filename, 'w', newline='', encoding='utf-8') as file: writer = csv.DictWriter(file, fieldnames=[ 'title', 'author', 'created_utc', 'created_datetime', 'num_comments', 'score', 'url', 'upvote_ratio', 'is_original_content', 'over_18', 'spoiler', 'locked', 'stickied', 'selftext' ]) writer.writeheader() posts_scraped = 0 # Iterate through posts in the specified time range for yesterday for post in subreddit.new(limit=None): post_date = datetime.utcfromtimestamp(post.created_utc) if start_date <= post_date < end_date: try: writer.writerow({ 'title': post.title, 'author': post.author.name if post.author else '[deleted]', 'created_utc': post.created_utc, 'created_datetime': post_date.strftime("%Y-%m-%d %H:%M:%S"), 'num_comments': post.num_comments, 'score': post.score, 'url': post.url, 'upvote_ratio': post.upvote_ratio, 'is_original_content': post.is_original_content, 'over_18': post.over_18, 'spoiler': post.spoiler, 'locked': post.locked, 'stickied': post.stickied, 'selftext': post.selftext }) # Increment counter and print progress posts_scraped += 1 print(f"Posts scraped for {start_date.strftime('%Y-%m-%d')}: {posts_scraped}", end='\r') except praw.exceptions.RedditAPIException as e: print(f"Rate limit exceeded. Waiting for 10 seconds and retrying...") time.sleep(10) # Wait for 10 seconds continue print(f"\nData extraction completed for {start_date.strftime('%Y-%m-%d')}.")