Convert generate-NOTICE.py to Python 3, fix name.

Python module names should be lower case and not use hyphens (the
former is a convention, the latter is a requirement for importable
modules).

Also updates the shell script to always use Python 3 so we don't need
to maintain Python 2 compatibility.

Test: repo upload, in both a python 2 and python 3 virtualenv
Bug: None
Change-Id: I486e54a12686b4e528dc6c9c47af5c7a52a7b790
diff --git a/libc/tools/generate-NOTICE.py b/libc/tools/generate_notice.py
similarity index 60%
rename from libc/tools/generate-NOTICE.py
rename to libc/tools/generate_notice.py
index b6deb9c..e0e6b32 100755
--- a/libc/tools/generate-NOTICE.py
+++ b/libc/tools/generate_notice.py
@@ -1,28 +1,29 @@
 #!/usr/bin/env python
-# Run with directory arguments from any directory, with no special setup required.
+# Run with directory arguments from any directory, with no special setup
+# required.
 
-import ftplib
-import hashlib
 import os
+from pathlib import Path
 import re
-import shutil
-import string
-import subprocess
 import sys
-import tarfile
-import tempfile
+from typing import Sequence
 
 VERBOSE = False
 
+copyrights = set()
+
+
 def warn(s):
     sys.stderr.write("warning: %s\n" % s)
 
+
 def warn_verbose(s):
     if VERBOSE:
         warn(s)
 
-def is_interesting(path):
-    path = path.lower()
+
+def is_interesting(path_str: str) -> bool:
+    path = Path(path_str.lower())
     uninteresting_extensions = [
         ".bp",
         ".map",
@@ -33,12 +34,13 @@
         ".swp",
         ".txt",
     ]
-    if os.path.splitext(path)[1] in uninteresting_extensions:
+    if path.suffix in uninteresting_extensions:
         return False
-    if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/pylintrc"):
+    if path.name in {"notice", "readme", "pylintrc"}:
         return False
     return True
 
+
 def is_auto_generated(content):
     if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
         return True
@@ -46,14 +48,40 @@
         return True
     return False
 
-copyrights = set()
 
-def extract_copyright_at(lines, i):
-    hash = lines[i].startswith("#")
+def is_copyright_end(line: str, first_line_was_hash: bool) -> bool:
+    endings = [
+        " $FreeBSD: ",
+        "$Citrus$",
+        "$FreeBSD$",
+        "*/",
+        "From: @(#)",
+        # OpenBSD likes to say where stuff originally came from:
+        "Original version ID:",
+        "\t$Citrus: ",
+        "\t$NetBSD: ",
+        "\t$OpenBSD: ",
+        "\t@(#)",
+        "\tcitrus Id: ",
+        "\tfrom: @(#)",
+        "from OpenBSD:",
+    ]
+    if first_line_was_hash and not line:
+        return True
+
+    for ending in endings:
+        if ending in line:
+            return True
+
+    return False
+
+
+def extract_copyright_at(lines: Sequence[str], i: int) -> int:
+    first_line_was_hash = lines[i].startswith("#")
 
     # Do we need to back up to find the start of the copyright header?
     start = i
-    if not hash:
+    if not first_line_was_hash:
         while start > 0:
             if "/*" in lines[start - 1]:
                 break
@@ -62,20 +90,7 @@
     # Read comment lines until we hit something that terminates a
     # copyright header.
     while i < len(lines):
-        if "*/" in lines[i]:
-            break
-        if hash and len(lines[i]) == 0:
-            break
-        if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
-            break
-        if "\tcitrus Id: " in lines[i]:
-            break
-        if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
-            break
-        if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
-            break
-        # OpenBSD likes to say where stuff originally came from:
-        if "Original version ID:" in lines[i]:
+        if is_copyright_end(lines[i], first_line_was_hash):
             break
         i += 1
 
@@ -83,7 +98,10 @@
 
     # Trim trailing cruft.
     while end > 0:
-        if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
+        line = lines[end - 1]
+        if line not in {
+                " *", " * ===================================================="
+        }:
             break
         end -= 1
 
@@ -92,7 +110,7 @@
     for line in lines[start:end]:
         line = line.replace("\t", "    ")
         line = line.replace("/* ", "")
-        line = re.sub("^ \* ", "", line)
+        line = re.sub(r"^ \* ", "", line)
         line = line.replace("** ", "")
         line = line.replace("# ", "")
         if "SPDX-License-Identifier:" in line:
@@ -102,7 +120,7 @@
         line = line.replace("--Copyright--", "")
         line = line.rstrip()
         # These come last and take care of "blank" comment lines.
-        if line == "#" or line == " *" or line == "**" or line == "-":
+        if line in {"#", " *", "**", "-"}:
             line = ""
         clean_lines.append(line)
 
@@ -112,19 +130,18 @@
     while clean_lines[len(clean_lines) - 1] == "":
         clean_lines = clean_lines[0:(len(clean_lines) - 1)]
 
-    copyright = "\n".join(clean_lines)
-    copyrights.add(copyright)
+    copyrights.add("\n".join(clean_lines))
 
     return i
 
 
-def do_file(path):
-    with open(path, "r") as the_file:
-        try:
-            content = open(path, "r").read().decode("utf-8")
-        except UnicodeDecodeError:
-            warn("bad UTF-8 in %s" % path)
-            content = open(path, "r").read().decode("iso-8859-1")
+def do_file(path: str) -> None:
+    raw = Path(path).read_bytes()
+    try:
+        content = raw.decode("utf-8")
+    except UnicodeDecodeError:
+        warn("bad UTF-8 in %s" % path)
+        content = raw.decode("iso-8859-1")
 
     lines = content.split("\n")
 
@@ -140,10 +157,12 @@
         if "public domain" in content.lower():
             warn_verbose("ignoring public domain file %s" % path)
             return
-        warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
+        warn('no copyright notice found in "%s" (%d lines)' %
+             (path, len(lines)))
         return
 
-    # Manually iterate because extract_copyright_at tells us how many lines to skip.
+    # Manually iterate because extract_copyright_at tells us how many lines to
+    # skip.
     i = 0
     while i < len(lines):
         if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
@@ -152,7 +171,7 @@
             i += 1
 
 
-def do_dir(path):
+def do_dir(arg):
     for directory, sub_directories, filenames in os.walk(arg):
         if ".git" in sub_directories:
             sub_directories.remove(".git")
@@ -164,20 +183,23 @@
                 do_file(path)
 
 
-args = sys.argv[1:]
-if len(args) == 0:
-    args = [ "." ]
+def main() -> None:
+    args = sys.argv[1:]
+    if len(args) == 0:
+        args = ["."]
 
-for arg in args:
-    if os.path.isdir(arg):
-        do_dir(arg)
-    else:
-        do_file(arg)
+    for arg in args:
+        if os.path.isdir(arg):
+            do_dir(arg)
+        else:
+            do_file(arg)
 
-for copyright in sorted(copyrights):
-    print copyright.encode("utf-8")
-    print
-    print "-------------------------------------------------------------------"
-    print
+    for notice in sorted(copyrights):
+        print(notice)
+        print()
+        print("-" * 67)
+        print()
 
-sys.exit(0)
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/update_notice.sh b/tools/update_notice.sh
index a309bc2..302974f 100755
--- a/tools/update_notice.sh
+++ b/tools/update_notice.sh
@@ -1,7 +1,11 @@
 #!/bin/bash
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 cd $DIR/..
-./libc/tools/generate-NOTICE.py libc libm > libc/NOTICE
+python3 ./libc/tools/generate_notice.py libc libm > libc/NOTICE
+if [ $? -ne 0 ]; then
+  >&2 echo NOTICE file generation failed
+  exit 1
+fi
 
 git diff --exit-code HEAD libc/NOTICE
 exit $?