Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Define full date range
all_dates = pd.date_range(start, stop, freq='D').tz_localize('US/Pacific')
all_dates = pd.DataFrame(np.zeros(all_dates.shape[0], dtype=int),
index=all_dates)
# Remove commits from the past we don't want
commits = commits.query('date > @start and date < @stop')
if len(commits) == 0:
# In case no commits for this project
all_dates = all_dates.drop(0, axis=1).astype(int)
all_dates['all'] = 0
all_dates['doc'] = 0
return all_dates
commits.loc[:, 'is_doc'] = commits['message'].apply(
find_word_in_string, args=(search_queries,))
# Tally the total number vs. doc-related commits
commits_doc = commits['is_doc'].resample('D').sum()
commits_all = commits['is_doc'].resample('D').count()
for date, val in commits_all.items():
all_dates.loc[date, 'all'] = val
for date, val in commits_doc.items():
all_dates.loc[date, 'doc'] = val
# Clean up
all_dates = all_dates.drop(0, axis=1)
all_dates = all_dates.replace(np.nan, 0)
all_dates = all_dates.astype(int)
return all_dates
messages, dates = zip(*[(jj['message'], idate)
for idate, ii in user_db.PushEvent.iterrows()
for jj in ii['payload']['commits']])
dates = list(dates)
for ii, idate in enumerate(dates):
if idate.tzinfo is None:
idate = idate.tz_localize('UTC')
idate = idate.tz_convert('US/Pacific')
dates[ii] = idate
dates = np.array(dates)
messages = np.array(messages)
mask = (dates > start) * (dates <= end)
messages = messages[mask]
dates = dates[mask]
for message, date in zip(messages, dates):
is_doc = find_word_in_string(message, search_queries)
activity.append((user, date, is_doc))
except Exception as e:
exceptions.append((user, e))
activity.append((user, np.nan, np.nan))
continue
print('Exceptions: ', '\n'.join([str(ii) for ii in exceptions]))
activity = pd.DataFrame(activity, columns=['user', 'date', 'is_doc'])
activity = activity.set_index('date')
activity.to_csv('.user_totals.csv')