harvard-lil · ChefAndy · Jul 13, 2021 · Jul 13, 2021 · Jul 14, 2021
@@ -41,9 +41,9 @@ Cases published after 1922 do not include headnotes.
 Here are some tsv-formatted spreadsheets with specific counts from our collection, and links to view those cases in the 
 API:
 
-* [Case Count by Reporter Series]({% static "downloads/cases_by_reporter.tsv" %})
-* [Case Count by Jurisdiction]({% static "downloads/cases_by_jurisdiction.tsv" %})
-* [Case Count by Decision Date]({% static "downloads/cases_by_decision_date.tsv" %})
+* [Case Count by Reporter Series]({% url "download-files" "cases_by_reporter.tsv" %})
+* [Case Count by Jurisdiction]({% url "download-files" "cases_by_jurisdiction.tsv" %})
+* [Case Count by Decision Date]({% url "download-files" "cases_by_decision_date.tsv" %})
 
 # Digitization Process {: class="subtitle" data-toc-label='Digitization' } 
 

@@ -42,7 +42,7 @@
 
 import capdb.tasks as tasks
 from scripts import set_up_postgres, data_migrations, \
-    validate_private_volumes as validate_private_volumes_script, export, update_snippets
+    validate_private_volumes as validate_private_volumes_script, export, update_snippets, update_download_tsv
 from scripts.helpers import copy_file, volume_barcode_from_folder, up_to_date_volumes, storage_lookup
 
 
@@ -842,9 +842,14 @@ def run_edit_script(script=None, dry_run='true', **kwargs):
     else:
         method(dry_run=dry_run, **kwargs)
 
+@task
+def update_files():
+    update_download_tsv.update_all()
+
 @task
 def update_all_snippets():
     update_snippets.update_all()
+    update_files()
 
 @task
 def update_search_snippets():

@@ -14,38 +14,6 @@ def test_map_numbers(case_factory, jurisdiction):
     assert parsed[jurisdiction.slug]['volume_count'] == 3
     assert parsed[jurisdiction.slug]['page_count'] == 15
 
-@pytest.mark.django_db(databases=['capdb'])
-def test_cases_by_decision_date(case_factory):
-    dates = ["2000", "2000-04", "2000-04", "2000-04-15"]
-    _ = [case_factory(decision_date_original=d) for d in dates]
-    update_snippets.cases_by_decision_date_tsv()
-    cases_by_decision_date = Snippet.objects.get(label='cases_by_decision_date')
-    assert cases_by_decision_date.contents == (
-        '"2000"\t4\t"https://api.case.test:8000/v1/cases/?decision_date__gte=2000&decision_date__lte=2000-12-31"\r\n'
-        '"2000-04"\t3\t"https://api.case.test:8000/v1/cases/?decision_date__gte=2000-04&decision_date__lte=2000-04-31"\r\n'
-        '"2000-04-15"\t1\t"https://api.case.test:8000/v1/cases/?decision_date__gte=2000-04-15&decision_date__lte=2000-04-15"\r\n'
-    )
-
-@pytest.mark.django_db(databases=['capdb'])
-def test_cases_by_jurisdiction(jurisdiction, case_factory):
-    [case_factory(jurisdiction=jurisdiction) for i in range(3)]
-    update_snippets.cases_by_jurisdiction_tsv()
-    cases_by_jurisdiction = Snippet.objects.get(label='cases_by_jurisdiction')
-    rows = cases_by_jurisdiction.contents.split("\r\n")[:-1]
-    assert len(rows) == 1
-    assert rows[0].split("\t")[1] == '"%s"' % jurisdiction.name_long
-    assert rows[0].split("\t")[2] == '3'
-
-@pytest.mark.django_db(databases=['capdb'])
-def test_cases_by_reporter(reporter, case_factory):
-    [case_factory(reporter=reporter) for i in range(3)]
-    update_snippets.cases_by_reporter_tsv()
-    cases_by_reporter = Snippet.objects.get(label='cases_by_reporter')
-    rows = cases_by_reporter.contents.split("\r\n")[:-1]
-    assert len(rows) == 1
-    assert rows[0].split("\t")[1] == '"%s"' % reporter.full_name
-    assert rows[0].split("\t")[2] == '3'
-
 @pytest.mark.django_db(databases=['capdb'])
 def test_search_jurisdiction_list(jurisdiction):
     update_snippets.search_jurisdiction_list()

@@ -0,0 +1,59 @@
+import pytest
+
+from django.core.files.storage import FileSystemStorage
+
+from scripts import update_download_tsv
+from capweb.templatetags.api_url import api_url
+
+
+@pytest.mark.django_db(databases=['capdb'])
+def test_cases_by_decision_date(case_factory, client, tmp_path, monkeypatch):
+    dates = ["2000", "2000-04", "2000-04", "2000-04-15"]
+    _ = [case_factory(decision_date_original=d) for d in dates]
+    monkeypatch.setattr("scripts.update_download_tsv.download_files_storage", FileSystemStorage(location=str(tmp_path)))
+    fs_storage = FileSystemStorage(location=str(tmp_path))
+
+    update_download_tsv.cases_by_decision_date_tsv()
+
+    file_contents = fs_storage.open(tmp_path / 'cases_by_decision_date.tsv').read()
+    correct_contents = bytes('"2000"\t4\t"https://api.case.test:8000/v1/cases/?decision_date__gte=2000&decision_date__lte=2000-12-31"\r\n'
+    '"2000-04"\t3\t"https://api.case.test:8000/v1/cases/?decision_date__gte=2000-04&decision_date__lte=2000-04-31"\r\n'
+    '"2000-04-15"\t1\t"https://api.case.test:8000/v1/cases/?decision_date__gte=2000-04-15&decision_date__lte=2000-04-15"\r\n' , encoding='utf-8')
+    assert file_contents == correct_contents
+
+@pytest.mark.django_db(databases=['capdb'])
+def test_cases_by_jurisdiction(jurisdiction, case_factory, client, tmp_path, monkeypatch):
+    [case_factory(jurisdiction=jurisdiction) for i in range(3)]
+    monkeypatch.setattr("scripts.update_download_tsv.download_files_storage", FileSystemStorage(location=str(tmp_path)))
+    fs_storage = FileSystemStorage(location=str(tmp_path))
+
+    update_download_tsv.cases_by_jurisdiction_tsv()
+
+    file_contents = fs_storage.open(tmp_path / 'cases_by_jurisdiction.tsv').read()
+    correct_contents = bytes('"{}"\t"{}"\t{}\t"{}"\t"{}"\r\n'.format(
+        jurisdiction.name,
+        jurisdiction.name_long,
+        3,
+        "{}?jurisdiction={}".format(api_url('cases-list'), jurisdiction.slug),
+        "{}{}".format(api_url('jurisdiction-list'), jurisdiction.pk)), encoding='utf-8')
+    assert file_contents == correct_contents
+
+
+@pytest.mark.django_db(databases=['capdb'])
+def test_cases_by_reporter(reporter, case_factory, client, tmp_path, monkeypatch):
+    [case_factory(reporter=reporter) for i in range(3)]
+    monkeypatch.setattr("scripts.update_download_tsv.download_files_storage", FileSystemStorage(location=str(tmp_path)))
+    fs_storage = FileSystemStorage(location=str(tmp_path))
+
+    update_download_tsv.cases_by_reporter_tsv()
+
+    file_contents = fs_storage.open(tmp_path / 'cases_by_reporter.tsv').read()
+    correct_contents = bytes('"{}"\t"{}"\t{}\t"{}"\t"{}"\r\n'.format(
+        reporter.short_name,
+        reporter.full_name,
+        3,
+        "{}?reporter={}".format(api_url('cases-list'), reporter.pk),
+        "{}{}".format(api_url('reporter-list'), reporter.pk)
+    ), encoding='utf-8')
+    assert correct_contents in file_contents
+
@@ -0,0 +1,115 @@
+import io
+import csv
+from collections import defaultdict
+
+from django.db.models import Count, Q
+from capdb.models import Reporter, Jurisdiction, CaseMetadata
+from capweb.templatetags.api_url import api_url
+from tqdm import tqdm
+from capdb.storages import download_files_storage
+
+def update_all():
+    cases_by_jurisdiction_tsv()
+    cases_by_reporter_tsv()
+    cases_by_decision_date_tsv()
+
+
+def cases_by_decision_date_tsv(directory=""):
+    """
+        count of all cases, grouped by decision date
+    """
+    by_date = (CaseMetadata.objects
+        .in_scope()
+        .values('decision_date_original')
+        .annotate(Count('decision_date_original'))
+        .order_by('decision_date_original'))
+    label="cases_by_decision_date"
+    snippet_format="tsv"
+    output = io.StringIO()
+    writer = csv.writer(output, delimiter='\t', quoting=csv.QUOTE_NONNUMERIC)
+
+    # count dates
+    date_counter = defaultdict(int)
+    for group in tqdm(by_date):
+        date = group['decision_date_original']
+        count = group['decision_date_original__count']
+        # count year
+        date_counter[date[:4]] += count
+        # count year-month
+        if len(date) > 4:
+            date_counter[date[:7]] += count
+        # count year-month-day
+        if len(date) > 7:
+            date_counter[date] += count
+
+    # write dates
+    cases_url = api_url('cases-list')
+    for date, count in date_counter.items():
+        max_date = date + "0000-12-31"[len(date):]
+        writer.writerow([
+            date,
+            count,
+            f"{cases_url}?decision_date__gte={date}&decision_date__lte={max_date}",
+        ])
+
+    write_update(label, snippet_format, output.getvalue(), directory=directory)
+
+
+def cases_by_jurisdiction_tsv(directory=""):
+    """
+        iterate through all reporters, tally each case, output TSV
+    """
+    label="cases_by_jurisdiction"
+    snippet_format="tsv"
+    output = io.StringIO()
+    writer = csv.writer(output, delimiter='\t',quoting=csv.QUOTE_NONNUMERIC)
+    for jurisdiction in tqdm(Jurisdiction.objects.order_by('name').annotate(case_count=Count('case_metadatas', filter=Q(case_metadatas__in_scope=True)))):
+        if jurisdiction.case_count == 0:
+            continue
+        writer.writerow(
+            [
+                jurisdiction.name,
+                jurisdiction.name_long,
+                jurisdiction.case_count,
+                "{}?jurisdiction={}".format(api_url('cases-list'), jurisdiction.slug),
+                "{}{}".format(api_url('jurisdiction-list'), jurisdiction.pk)
+            ]
+        )
+
+    write_update(label, snippet_format, output.getvalue(), directory=directory)
+
+
+def cases_by_reporter_tsv(directory=""):
+    """
+        iterate through all jurisdictions, tally each case, output TSV
+    """
+    label="cases_by_reporter"
+    snippet_format="tsv"
+    output = io.StringIO()
+    writer = csv.writer(output, delimiter='\t',quoting=csv.QUOTE_NONNUMERIC)
+    for reporter in tqdm(Reporter.objects.order_by('full_name').annotate(case_count=Count('case_metadatas', filter=Q(case_metadatas__in_scope=True)))):
+        if reporter.case_count == 0:
+            continue
+        writer.writerow(
+            [
+                reporter.short_name,
+                reporter.full_name,
+                reporter.case_count,
+                "{}?reporter={}".format(api_url('cases-list'), reporter.pk),
+                "{}{}".format(api_url('reporter-list'), reporter.pk)
+            ]
+        )
+
+    write_update(label, snippet_format, output.getvalue(), directory=directory)
+
+
+def write_update(label, snippet_format, contents, directory):
+
+    file_name = "{}.{}".format(label, snippet_format)
+    full_path = "{}{}{}".format(directory, '/' if directory else '', file_name)
+
+    if directory and not download_files_storage.exists(directory):
+        download_files_storage.mkdir(directory, parents=True)
+
+    with download_files_storage.open(full_path, 'w') as d:
+        d.write(contents)
@@ -1,113 +1,14 @@
-import io
-import csv
-from collections import defaultdict
-
 from django.db import connections
-from django.db.models import Count, Q
-from capdb.models import Reporter, Jurisdiction, CaseMetadata, Snippet, Court
+from capdb.models import Reporter, Jurisdiction, Snippet, Court
 import json
-from capweb.templatetags.api_url import api_url
-from tqdm import tqdm
-
 
 def update_all():
     update_map_numbers()
-    cases_by_jurisdiction_tsv()
-    cases_by_reporter_tsv()
-    cases_by_decision_date_tsv()
     search_reporter_list()
     search_court_list()
     court_abbrev_list()
     search_jurisdiction_list()
 
-def cases_by_decision_date_tsv():
-    """
-        count of all cases, grouped by decision date
-    """
-    by_date = (CaseMetadata.objects
-        .in_scope()
-        .values('decision_date_original')
-        .annotate(Count('decision_date_original'))
-        .order_by('decision_date_original'))
-    label="cases_by_decision_date"
-    snippet_format="text/tab-separated-values"
-    output = io.StringIO()
-    writer = csv.writer(output, delimiter='\t', quoting=csv.QUOTE_NONNUMERIC)
-
-    # count dates
-    date_counter = defaultdict(int)
-    for group in tqdm(by_date):
-        print(group)
-        date = group['decision_date_original']
-        count = group['decision_date_original__count']
-        # count year
-        date_counter[date[:4]] += count
-        # count year-month
-        if len(date) > 4:
-            date_counter[date[:7]] += count
-        # count year-month-day
-        if len(date) > 7:
-            date_counter[date] += count
-
-    # write dates
-    cases_url = api_url('cases-list')
-    for date, count in date_counter.items():
-        max_date = date + "0000-12-31"[len(date):]
-        writer.writerow([
-            date,
-            count,
-            f"{cases_url}?decision_date__gte={date}&decision_date__lte={max_date}",
-        ])
-
-    write_update(label, snippet_format, output.getvalue())
-
-def cases_by_jurisdiction_tsv():
-    """
-        iterate through all reporters, tally each case, output TSV
-    """
-    label="cases_by_jurisdiction"
-    snippet_format="text/tab-separated-values"
-    output = io.StringIO()
-    writer = csv.writer(output, delimiter='\t',quoting=csv.QUOTE_NONNUMERIC)
-    for jurisdiction in tqdm(Jurisdiction.objects.order_by('name').annotate(case_count=Count('case_metadatas', filter=Q(case_metadatas__in_scope=True)))):
-        if jurisdiction.case_count == 0:
-            continue
-        writer.writerow(
-            [
-                jurisdiction.name,
-                jurisdiction.name_long,
-                jurisdiction.case_count,
-                "{}?jurisdiction={}".format(api_url('cases-list'), jurisdiction.slug),
-                "{}{}".format(api_url('jurisdiction-list'), jurisdiction.pk)
-            ]
-        )
-
-    write_update(label, snippet_format, output.getvalue())
-
-
-def cases_by_reporter_tsv():
-    """
-        iterate through all jurisdictions, tally each case, output TSV
-    """
-    label="cases_by_reporter"
-    snippet_format="text/tab-separated-values"
-    output = io.StringIO()
-    writer = csv.writer(output, delimiter='\t',quoting=csv.QUOTE_NONNUMERIC)
-    for reporter in tqdm(Reporter.objects.order_by('full_name').annotate(case_count=Count('case_metadatas', filter=Q(case_metadatas__in_scope=True)))):
-        if reporter.case_count == 0:
-            continue
-        writer.writerow(
-            [
-                reporter.short_name,
-                reporter.full_name,
-                reporter.case_count,
-                "{}?reporter={}".format(api_url('cases-list'), reporter.pk),
-                "{}{}".format(api_url('reporter-list'), reporter.pk)
-            ]
-        )
-
-    write_update(label, snippet_format, output.getvalue())
-
 def update_map_numbers():
     """ Write map_numbers snippet. """
     label = "map_numbers"