import praw
import csv
from datetime import datetime, timedelta
import time
# REDDIT API CREDS HERE
reddit = praw.Reddit(client_id='',
client_secret='',
user_agent='YOUR_USER_AGENT')
subreddit = reddit.subreddit('SUBREDDIT') # PUT SUBREDDIT NAME HERE
# Define the time range (yesterday)
end_date = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
start_date = end_date - timedelta(days=1)
csv_filename = f"reddit_posts_{start_date.strftime('%Y-%m-%d')}.csv"
with open(csv_filename, 'w', newline='', encoding='utf-8') as file:
writer = csv.DictWriter(file, fieldnames=[
'title', 'author', 'created_utc', 'created_datetime', 'num_comments', 'score', 'url',
'upvote_ratio', 'is_original_content', 'over_18', 'spoiler', 'locked', 'stickied', 'selftext'
])
writer.writeheader()
posts_scraped = 0
# Iterate through posts in the specified time range for yesterday
for post in subreddit.new(limit=None):
post_date = datetime.utcfromtimestamp(post.created_utc)
if start_date <= post_date < end_date:
try:
writer.writerow({
'title': post.title,
'author': post.author.name if post.author else '[deleted]',
'created_utc': post.created_utc,
'created_datetime': post_date.strftime("%Y-%m-%d %H:%M:%S"),
'num_comments': post.num_comments,
'score': post.score,
'url': post.url,
'upvote_ratio': post.upvote_ratio,
'is_original_content': post.is_original_content,
'over_18': post.over_18,
'spoiler': post.spoiler,
'locked': post.locked,
'stickied': post.stickied,
'selftext': post.selftext
})
# Increment counter and print progress
posts_scraped += 1
print(f"Posts scraped for {start_date.strftime('%Y-%m-%d')}: {posts_scraped}", end='\r')
except praw.exceptions.RedditAPIException as e:
print(f"Rate limit exceeded. Waiting for 10 seconds and retrying...")
time.sleep(10) # Wait for 10 seconds
continue
print(f"\nData extraction completed for {start_date.strftime('%Y-%m-%d')}.")