For a project I am trying to retrieve every commit and for each updated file, I want to store the entire file (without the commit syntax, just the vanilla file) and which lines were updated. I am using the Gitlab API in python. Whilst I can get the updated lines, I struggle retrieving the file's complete contents at the time of the commit.
Here is a snippet on how I try to retrieve the files, but the issue really only lies in the __get_file_content - all else works like I intend it to.
def __get_file_content(self, project, commit_id, file_path):
try:
# Get the file content from a specific commit
file_content = project.files.get(file_path=file_path, ref=commit_id)
return file_content.decode()
except Exception as e:
print(f"Error fetching file content: {e}, {file_path}, {commit_id}, {project}")
return None
def generate_commit_dict(self, commits, project):
# Prepare commit diffs into dict to save time when iterating
commit_diffs = {}
for commit in commits:
diffs = commit.diff(get_all=True)
commit_dict = []
commit_time = commit.created_at
commit_id = commit.short_id
# print(diffs)
# logging.info(f'Transforming commit {commit_id}...')
for diff in diffs:
diff_file = diff['diff']
diff_code = self.__get_file_content(project, commit_id, diff['new_path'])
diff_updated_code = self.__get_commit_diff_contents(diff_file)
commit_dict.append({
'file': diff['new_path'],
'type': self.get_file_extension(diff['new_path']),
'change': diff_code,
'updated_lines': diff_updated_code
})
commit_diffs[commit_id] = {
"commit_time": commit_time,
"commits": commit_dict
}
return commit_diffs
project = gl.projects.get(int(project_id))
commits = get_commits(gitlab_user, project)
user_commit_contents = generate_commit_dict(commits, project)
repo.git.show('{}:{}'.format(commit.hexsha, entry.path))function is not available for the project object? Please correct me if I am missing something