Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Clean up
all_dates = all_dates.drop(0, axis=1)
all_dates = all_dates.replace(np.nan, 0)
all_dates = all_dates.astype(int)
return all_dates
# --- Run the script ---
try:
os.makedirs("build/images")
except OSError:
pass
meta = pd.read_csv('.project_info.csv')
db = GithubDatabase()
projects = [ii.split('/')[-2:] for ii in db.projects]
groupby = 'weekday'
start = '2017-03-01'
stop = '2017-03-13'
exceptions = []
all_dates = []
for user, project in tqdm(projects):
try:
this_meta = meta.query('github_org == "{}/{}"'.format(user, project))
words = this_meta['words'].values[0]
words = None if isinstance(words, float) else words
words = '' if words == '' else words
branch = this_meta['branch'].values[0]
branch = None if isinstance(branch, float) else branch
projects = projects[list(rename.values())]
projects['url'] = projects['url'].apply(validate_url)
def is_doc(row):
doc_words = ['doc', 'documentation', 'docathon']
is_doc = 0
for word in doc_words:
if word in row['title']:
is_doc += 1
if word in row['label_names']:
is_doc += 1
return is_doc > 0
projects['doc_issues'] = None
db = GithubDatabase()
for ix, project in tqdm(projects.iterrows()):
if not isinstance(project['github_org'], str):
continue
org, repo = project['github_org'].split('/')[-2:]
proj = db.load(org, repo)
if proj.issues is None:
continue
issues = proj.issues.query('state == "open"')
issues = issues[pd.isnull(issues['pull_request'])]
if len(issues) == 0:
print('{}: No open issues w/o a PR'.format(repo))
continue
issues['is_doc'] = issues.apply(is_doc, axis=1)
doc_issues = [{'url': issue['html_url'],
'title': issue['title']}
for ix, issue in issues.iterrows()
changes = []
for ifile in resp['files']:
found = 0
for word in search_words:
ifilename = ifile['filename'].lower()
if word in ifilename and not any(bword in ifilename for bword in DOC_CHEATING_WORDS):
found += 1
found = found > 0
changes.append({'filename': ifile['filename'],
'changes': ifile['changes'],
'additions': ifile['additions'],
'deletions': ifile['deletions'],
'found': found})
return pd.DataFrame(changes)
db = GithubDatabase()
meta = pd.read_csv('.project_info.csv')
all_diffs = []
for proj in db.projects:
print(proj)
user, project = proj.split('/')
this_meta = meta.query('github_org == "{}/{}"'.format(user, project))
branch = this_meta['branch'].values[0]
branch = None if isinstance(branch, float) else branch
diffs = find_commit_diffs(user, project, branch=branch)
if diffs is None or len(diffs) == 0:
continue
diffs['project'] = project
diffs['user'] = user
all_diffs.append(diffs)