Baby NSA: The College Years
Thanks for making it to SOURCE Boston! The slides for my talk are available below, and you’ll find my research notes further on down. I’ll update this page with video once it becomes available.
The research notes you see here are a work-in-progress listing of NSA tools and programs along with a brief description of each. As of now, it has around 120 entries, but I hope to keep adding to that (doing this research is surprisingly fun).
Want to grab all of the primary documents? Use this Python script:
""" Automatically download every document in the EFF's NSA documents collection
available from https://www.eff.org/nsa-spying/nsadocs """
""" You'll need to install the robobrowser package """
import os
import csv
from time import sleep
from robobrowser import RoboBrowser
browser = RoboBrowser(history=True, user_agent="NSA Document Crawler", parser='html.parser')
browser.open('https://www.eff.org/nsa-spying/nsadocs')
documents = browser.select('#panel-case-page table.views-table tbody tr')
# set up a folder to put the docs in
folderpath = os.path.abspath(os.path.join(os.getcwd(), 'eff_nsa_docs'))
try:
os.mkdir(folderpath)
except FileExistsError:
# directory already exists
pass
# set up a CSV file to contain the metadata
filename = 'eff_nsa_docs.csv'
with open(filename, 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
# write header
csvwriter.writerow(['date','title','source','link'])
total = len(documents)
this = 0 # the index of the currently-downloading document
failed = 0 # total documents which failed to download
print("Found {} documents.".format(total))
for document in documents:
attempts = 0
this += 1
while True:
try:
fields = document.select("td")
date = fields[0].text.strip()
link = fields[1].select('a')[0]['href']
title = fields[1].select('a')[0].text
source = fields[2].select('a')[0].text
csvwriter.writerow([date,title,source,link])
browser.follow_link(fields[1].select('a')[0])
link_to_file = browser.select('span.file a')[0]
filename = link_to_file.text
file_url = link_to_file['href']
print('({}/{}) Downloading {} ({})...'.format(this, total, title, filename))
filepath = os.path.join(folderpath, filename)
if os.path.isfile(filepath):
print("[*] File already exists!")
break
else:
sleep(2)
request = browser.session.get(file_url, stream=True)
with open(filepath, "wb") as pdf_file:
pdf_file.write(request.content)
break
except IndexError:
attempts += 1
if attempts < 3:
print("[!] Failed! Retrying...")
else:
print("[!] Failed too many times. Unable to download {}".format(filename))
failed += 1
break
print("Successfully downloaded {} of {} documents.".format(total-failed, total))