Skip to content

Commit

Permalink
Added parse_changelog_md() method to keep a record of all the logs …
Browse files Browse the repository at this point in the history
…that are already added to the changelog file

Signed-off-by: Ayush Joshi <[email protected]>
  • Loading branch information
joshiayush committed Nov 17, 2023
1 parent 0e96d8e commit 5bc1c3a
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 131 deletions.
2 changes: 1 addition & 1 deletion tools/ai_logs/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .changelog import AIGitLog
from .changelog import Changelog
303 changes: 193 additions & 110 deletions tools/ai_logs/changelog.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,132 +11,215 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A changelog generator for `ai`."""

from __future__ import annotations

from typing import Union

import re
import os
import git
import copy
import pathlib

from datetime import date

class AIGitLog(git.Git):
class _AIChangeLogParser:
def __init__(self, changelogs: list[str]):
self.changelogs = changelogs

def _filter_out_logs(self,
log_type: str,
*,
capitalize: bool = False) -> list[str]:
filtered_logs = tuple()
for changelog in self.changelogs:
if log_type.lower() == changelog.split()[0].lower():
filtered_logs = (
*filtered_logs,
changelog if capitalize is False else changelog[0].upper() +
changelog[1:]
)
return filtered_logs

def parse(self) -> dict[str, list]:
self.added = self._filter_out_logs('added', capitalize=True)
self.changed = self._filter_out_logs('changed', capitalize=True)
self.deprecated = self._filter_out_logs('deprecated', capitalize=True)
self.removed = self._filter_out_logs('removed', capitalize=True)
self.fixed = self._filter_out_logs('fixed', capitalize=True)
self.security = self._filter_out_logs('security', capitalize=True)
self.yanked = self._filter_out_logs('yanked', capitalize=True)

return {
'added': self.added,
'changed': self.changed,
'deprecated': self.deprecated,
'removed': self.removed,
'fixed': self.fixed,
'security': self.security,
'yanked': self.yanked
}

class Changelog(git.Git):
"""Generates changelogs for `ai`."""
git_log_fmt = '%s ([#%h](https://www.github.com/joshiayush/ai/commit/%h))'

def __init__(self, working_dir: Union[str, pathlib.Path]):
super().__init__(working_dir)
self.changelog = dict()
self.date = date.today()

def _format_git_logs(self) -> str:
"""Formats git logs for writing into the changelog file."""
changelog = '# Changelog'
changelog += '\n' * 2
for key in self.changelog:

# Keys at the first level were the follow paths given during log
# generation
changelog += f'## {key}{self.date}'
changelog += '\n' * 2

for git_log_t in self.changelog[key]:
if len(self.changelog[key][git_log_t]) == 0:
continue

changelog += f'### {git_log_t.capitalize()}'
changelog += '\n' * 2
changelog += '\n'.join(
map(lambda log: f'- {log}', self.changelog[key][git_log_t])
)
changelog += '\n' * 2
return changelog

def _get_git_logs(self, pretty: str,
follow: Union[str, pathlib.Path]) -> dict[str, set]:
"""Returns a dictionary containing the log records for every type of change.
def _get_changelog_splitted(self,
pretty: str,
follow: str = None) -> list[str]:
The log records are returned in a form of set to later subtract already
published logs from it.
Args:
pretty: A prettified string format for the log statement.
follow: A path for which the logs are to be generated. For example, if
given `docs/`, it means follow the changes made to this directory only.
Returns:
A dictionary containing the log records for every type of change.
"""
if follow:
return self.log(f'--pretty={pretty}', '--follow', follow).split('\n')
return self.log(f'--pretty={pretty}').split('\n')

def get_changelog(self, pretty: str, follow: str = None) -> dict[str, tuple]:
parser = AIGitLog._AIChangeLogParser(
self._get_changelog_splitted(pretty, follow)
)
return parser.parse()

def _prettify(self, changelog: list[str]) -> str:
def write_for(heading: str) -> str:
nonlocal changelog
prettified = ''
if changelog[heading]:
added = []
for log in changelog[heading]:
added = [*added, f'- {log}']
prettified += f'### {heading.capitalize()}'
prettified += '\n\n'
prettified += '\n'.join(added)
return prettified

return '\n'.join(
[
write_for('added'),
write_for('changed'),
write_for('deprecated'),
write_for('fixed'),
write_for('removed'),
write_for('security'),
write_for('yanked')
]
)

def parse_changelogs_for_all(self,
follow_path: tuple[str]) -> dict[str, tuple]:
merged_changelog_dict = {
'added': tuple(),
'changed': tuple(),
'deprecated': tuple(),
'fixed': tuple(),
'removed': tuple(),
'security': tuple(),
'yanked': tuple(),
}
for follow in follow_path:
changelogs = self.get_changelog(
'%s ([#%h](https://www.github.com/joshiayush/ai/commit/%h))',
follow=follow
git_logs = self.log(f'--pretty={pretty}', '--follow', follow)
else:
git_logs = self.log(f'--pretty={pretty}')
git_logs = git_logs.split('\n')

git_logs_dict = dict()
for git_log_t in (
'added',
'changed',
'deprecated',
'fixed',
'removed',
'security',
'yanked',
):
git_logs_dict.update(
{
f'{git_log_t}':
set(
filter(
lambda changelog: changelog is not None,
filter(
lambda changelog: changelog.capitalize()
if git_log_t == changelog.split()[0].lower() else None,
git_logs
)
)
)
}
)
for key in merged_changelog_dict:
merged_changelog_dict[key] = (
*merged_changelog_dict[key], *changelogs[key]
)

for key in merged_changelog_dict:
merged_changelog_dict[key] = tuple(set(merged_changelog_dict[key]))
return self._prettify(merged_changelog_dict)
return git_logs_dict

def read_changelog_md(self, fpath: Union[str, pathlib.Path]) -> str:
"""Returns the contents of the changelog file; omitting the first line.
The first line is just `# CHANGELOG` which does not contribute to the actual
values that needs to be parsed before updating the changelog file.
Args:
fpath: Path to the changelog file.
def write_changelog(
self, fpath: Union[str, pathlib.Path], changelog: str
Returns:
The changelogs.
"""
changelog = ''
with open(fpath, mode='r', encoding='utf-8') as f:
changelog = ''.join(f.readlines()[1:])
return changelog

def write_changelog_md(
self, fpath: Union[str, pathlib.Path],
follow: tuple[dict[str, tuple[Union[str, pathlib.Path]]]]
) -> None:
old_changelogs = None
if os.access(os.fspath(fpath), os.F_OK):
with open(fpath, mode='r', encoding='utf-8') as f:
old_changelogs = ''.join(f.readlines()[1:])

changelog = f'# CHANGELOG\n\n{changelog}'
with open(fpath, mode='w', encoding='utf-8') as f:
f.write(changelog)

if old_changelogs:
with open(fpath, mode='a', encoding='utf-8') as f:
f.writelines(['\n', '\n', old_changelogs])
"""Writes the newly added logs to the changelog file.
Args:
fpath: Writable changelog file path.
follow: A path for which the logs are to be generated. For example, if
given `docs/`, it means follow the changes made to this directory only.
"""
for f in follow:
for k in f:
for fp in f[k]:
git_logs = self._get_git_logs(self.git_log_fmt, follow=fp)
if k not in self.changelog:
self.changelog[k] = git_logs
else:
for git_log_t in git_logs:
self.changelog[k][git_log_t].update(git_logs[git_log_t])

old_changelog = self.read_changelog_md(fpath)
old_changelog = self.parse_changelog_md(old_changelog)
tmp_changelog = dict()
for k in old_changelog:
tmp_changelog[k] = dict()
for date in old_changelog[k]:
for git_log_t in old_changelog[k][date]:
if git_log_t not in tmp_changelog[k]:
tmp_changelog[k][git_log_t] = old_changelog[k][date][git_log_t]
else:
tmp_changelog[k][git_log_t].update(
old_changelog[k][date][git_log_t]
)
old_changelog = copy.deepcopy(tmp_changelog)
del tmp_changelog

if old_changelog:
for k in self.changelog:
for git_log_t in self.changelog[k]:
if git_log_t in self.changelog[k] and git_log_t in old_changelog[k]:
self.changelog[k][git_log_t].difference(
old_changelog[k][git_log_t]
)

# with open(fpath, mode='r', encoding='utf-8') as f:
# old_changelog = ''.join(f.readlines()[1:])

# with open(fpath, mode='w', encoding='utf-8') as f:
# f.write(self._format_git_logs())

# with open(fpath, mode='a', encoding='utf-8') as f:
# f.write(old_changelog)

def parse_changelog_md(self,
changelog: str) -> dict[str, dict[str, set[str]]]:
"""Parse the changelog file to keep a list of all the logs recorded.
The logs are parsed in the form of a dictionary containing the follow path
mapped to each version of their changelogs.
Args:
changelog: A list containing individual lines of the changelog file.
Returns:
The changelog dictionary containing the follow path mapped to each version
of their changelogs.
"""
changelog_dict = dict()
cur_follow = None
cur_date = None
cur_section = None

date_pattern = re.compile(r'##\s(\w+)\s—\s(\d{4}-\d{2}-\d{2})')
section_pattern = re.compile(r'###\s(\w+)')
entry_pattern = re.compile(r'-\s(.*)')

for line in changelog.split('\n'):
date_match = date_pattern.match(line)
section_match = section_pattern.match(line)
entry_match = entry_pattern.match(line)

if date_match:
cur_date = date_match.group(2)
cur_follow = date_match.group(1)
if cur_follow not in changelog_dict:
changelog_dict[cur_follow] = dict()
changelog_dict[cur_follow][cur_date] = dict()
cur_section = None
elif section_match:
cur_section = section_match.group(1)
cur_section = cur_section.lower()
changelog_dict[cur_follow][cur_date][cur_section] = set()
elif entry_match and cur_section:
changelog_dict[cur_follow][cur_date][cur_section].update(
[entry_match.group(1)]
)

return changelog_dict
41 changes: 21 additions & 20 deletions tools/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,31 +54,32 @@ def GenerateAIDocs() -> int:
return error_code


_BASE_AI_MODULE_PATH = ('ai/', )
_BASE_AI_DOCS_PATH = (
'docs/',
'notebooks/',
'templates/',
)
_BASE_AI_MODULE_PATH = {'ai': ('ai/', )}
_BASE_AI_DOCS_PATH = {
'docs': (
'docs/',
'notebooks/',
'templates/',
)
}


def GenerateAILogs() -> int:
error_code = 0

logger = ai_logs.AIGitLog(_BASE_AI_REPO)
today = date.today()

changelog = ''
changelog += f'## ai — {today}'
changelog += '\n\n'
changelog += logger.parse_changelogs_for_all(_BASE_AI_MODULE_PATH)

changelog += f'## docs — {today}'
changelog += '\n\n'
changelog += logger.parse_changelogs_for_all(_BASE_AI_DOCS_PATH)
"""Completes the action of command "--generate-logs" by generating changelog
file for the changes made in the "ai" or "docs" submodules.
Returns:
Error code of `PermissionError`.
"""
error_code = 0
changelog = ai_logs.Changelog(_BASE_AI_REPO)
try:
logger.write_changelog('CHANGELOG.md', changelog.strip())
changelog.write_changelog_md(
'CHANGELOG.md', follow=(
_BASE_AI_MODULE_PATH,
_BASE_AI_DOCS_PATH,
)
)
except PermissionError as exc:
error_code = exc.errno
return error_code
Expand Down

0 comments on commit 5bc1c3a

Please sign in to comment.