Convert generate-NOTICE.py to Python 3, fix name.
Python module names should be lower case and not use hyphens (the
former is a convention, the latter is a requirement for importable
modules).
Also updates the shell script to always use Python 3 so we don't need
to maintain Python 2 compatibility.
Test: repo upload, in both a python 2 and python 3 virtualenv
Bug: None
Change-Id: I486e54a12686b4e528dc6c9c47af5c7a52a7b790
diff --git a/libc/tools/generate-NOTICE.py b/libc/tools/generate_notice.py
similarity index 60%
rename from libc/tools/generate-NOTICE.py
rename to libc/tools/generate_notice.py
index b6deb9c..e0e6b32 100755
--- a/libc/tools/generate-NOTICE.py
+++ b/libc/tools/generate_notice.py
@@ -1,28 +1,29 @@
#!/usr/bin/env python
-# Run with directory arguments from any directory, with no special setup required.
+# Run with directory arguments from any directory, with no special setup
+# required.
-import ftplib
-import hashlib
import os
+from pathlib import Path
import re
-import shutil
-import string
-import subprocess
import sys
-import tarfile
-import tempfile
+from typing import Sequence
VERBOSE = False
+copyrights = set()
+
+
def warn(s):
sys.stderr.write("warning: %s\n" % s)
+
def warn_verbose(s):
if VERBOSE:
warn(s)
-def is_interesting(path):
- path = path.lower()
+
+def is_interesting(path_str: str) -> bool:
+ path = Path(path_str.lower())
uninteresting_extensions = [
".bp",
".map",
@@ -33,12 +34,13 @@
".swp",
".txt",
]
- if os.path.splitext(path)[1] in uninteresting_extensions:
+ if path.suffix in uninteresting_extensions:
return False
- if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/pylintrc"):
+ if path.name in {"notice", "readme", "pylintrc"}:
return False
return True
+
def is_auto_generated(content):
if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
return True
@@ -46,14 +48,40 @@
return True
return False
-copyrights = set()
-def extract_copyright_at(lines, i):
- hash = lines[i].startswith("#")
+def is_copyright_end(line: str, first_line_was_hash: bool) -> bool:
+ endings = [
+ " $FreeBSD: ",
+ "$Citrus$",
+ "$FreeBSD$",
+ "*/",
+ "From: @(#)",
+ # OpenBSD likes to say where stuff originally came from:
+ "Original version ID:",
+ "\t$Citrus: ",
+ "\t$NetBSD: ",
+ "\t$OpenBSD: ",
+ "\t@(#)",
+ "\tcitrus Id: ",
+ "\tfrom: @(#)",
+ "from OpenBSD:",
+ ]
+ if first_line_was_hash and not line:
+ return True
+
+ for ending in endings:
+ if ending in line:
+ return True
+
+ return False
+
+
+def extract_copyright_at(lines: Sequence[str], i: int) -> int:
+ first_line_was_hash = lines[i].startswith("#")
# Do we need to back up to find the start of the copyright header?
start = i
- if not hash:
+ if not first_line_was_hash:
while start > 0:
if "/*" in lines[start - 1]:
break
@@ -62,20 +90,7 @@
# Read comment lines until we hit something that terminates a
# copyright header.
while i < len(lines):
- if "*/" in lines[i]:
- break
- if hash and len(lines[i]) == 0:
- break
- if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
- break
- if "\tcitrus Id: " in lines[i]:
- break
- if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
- break
- if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
- break
- # OpenBSD likes to say where stuff originally came from:
- if "Original version ID:" in lines[i]:
+ if is_copyright_end(lines[i], first_line_was_hash):
break
i += 1
@@ -83,7 +98,10 @@
# Trim trailing cruft.
while end > 0:
- if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
+ line = lines[end - 1]
+ if line not in {
+ " *", " * ===================================================="
+ }:
break
end -= 1
@@ -92,7 +110,7 @@
for line in lines[start:end]:
line = line.replace("\t", " ")
line = line.replace("/* ", "")
- line = re.sub("^ \* ", "", line)
+ line = re.sub(r"^ \* ", "", line)
line = line.replace("** ", "")
line = line.replace("# ", "")
if "SPDX-License-Identifier:" in line:
@@ -102,7 +120,7 @@
line = line.replace("--Copyright--", "")
line = line.rstrip()
# These come last and take care of "blank" comment lines.
- if line == "#" or line == " *" or line == "**" or line == "-":
+ if line in {"#", " *", "**", "-"}:
line = ""
clean_lines.append(line)
@@ -112,19 +130,18 @@
while clean_lines[len(clean_lines) - 1] == "":
clean_lines = clean_lines[0:(len(clean_lines) - 1)]
- copyright = "\n".join(clean_lines)
- copyrights.add(copyright)
+ copyrights.add("\n".join(clean_lines))
return i
-def do_file(path):
- with open(path, "r") as the_file:
- try:
- content = open(path, "r").read().decode("utf-8")
- except UnicodeDecodeError:
- warn("bad UTF-8 in %s" % path)
- content = open(path, "r").read().decode("iso-8859-1")
+def do_file(path: str) -> None:
+ raw = Path(path).read_bytes()
+ try:
+ content = raw.decode("utf-8")
+ except UnicodeDecodeError:
+ warn("bad UTF-8 in %s" % path)
+ content = raw.decode("iso-8859-1")
lines = content.split("\n")
@@ -140,10 +157,12 @@
if "public domain" in content.lower():
warn_verbose("ignoring public domain file %s" % path)
return
- warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
+ warn('no copyright notice found in "%s" (%d lines)' %
+ (path, len(lines)))
return
- # Manually iterate because extract_copyright_at tells us how many lines to skip.
+ # Manually iterate because extract_copyright_at tells us how many lines to
+ # skip.
i = 0
while i < len(lines):
if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
@@ -152,7 +171,7 @@
i += 1
-def do_dir(path):
+def do_dir(arg):
for directory, sub_directories, filenames in os.walk(arg):
if ".git" in sub_directories:
sub_directories.remove(".git")
@@ -164,20 +183,23 @@
do_file(path)
-args = sys.argv[1:]
-if len(args) == 0:
- args = [ "." ]
+def main() -> None:
+ args = sys.argv[1:]
+ if len(args) == 0:
+ args = ["."]
-for arg in args:
- if os.path.isdir(arg):
- do_dir(arg)
- else:
- do_file(arg)
+ for arg in args:
+ if os.path.isdir(arg):
+ do_dir(arg)
+ else:
+ do_file(arg)
-for copyright in sorted(copyrights):
- print copyright.encode("utf-8")
- print
- print "-------------------------------------------------------------------"
- print
+ for notice in sorted(copyrights):
+ print(notice)
+ print()
+ print("-" * 67)
+ print()
-sys.exit(0)
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/update_notice.sh b/tools/update_notice.sh
index a309bc2..302974f 100755
--- a/tools/update_notice.sh
+++ b/tools/update_notice.sh
@@ -1,7 +1,11 @@
#!/bin/bash
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd $DIR/..
-./libc/tools/generate-NOTICE.py libc libm > libc/NOTICE
+python3 ./libc/tools/generate_notice.py libc libm > libc/NOTICE
+if [ $? -ne 0 ]; then
+ >&2 echo NOTICE file generation failed
+ exit 1
+fi
git diff --exit-code HEAD libc/NOTICE
exit $?