Skip to content

Commit e2c349a

Browse files
Merge branch 'dev-1.0.2'
2 parents 4fd6259 + 57c5549 commit e2c349a

File tree

11 files changed

+121
-48
lines changed

11 files changed

+121
-48
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "rusty_hogs"
3-
version = "1.0.1"
3+
version = "1.0.2"
44
authors = ["Scott Cutler <[email protected]>"]
55
edition = "2018"
66
description = "This project provides a set of scanners that will use regular expressions to try and detect the presence of sensitive information such as API keys, passwords, and personal information. It includes a set of regular expressions by default, but will also accept a JSON object containing your custom regular expressions."

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ Download and unzip the [latest ZIP](https://github.com/newrelic/rusty-hog/releas
3939
on the releases tab. Then, run each binary with `-h` to see the usage.
4040

4141
```shell script
42-
wget https://github.com/newrelic/rusty-hog/releases/download/v1.0.1/rustyhogs-musl_darwin_1.0.1.zip
43-
unzip rustyhogs-musl_darwin_1.0.1.zip
42+
wget https://github.com/newrelic/rusty-hog/releases/download/v1.0.2/rustyhogs-musl_darwin_1.0.2.zip
43+
unzip rustyhogs-musl_darwin_1.0.2.zip
4444
darwin_releases/choctaw_hog -h
4545
```
4646

scripts/gh_org_scanner.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
def f(x):
2626
filename = os.path.join(tempdir, str(uuid.uuid4()))
2727
# expects choctaw_hog in your path
28-
s = subprocess.run(["choctaw_hog", "--outputfile", filename, "--regex", "trufflehog_rules.json", x.ssh_url],
28+
s = subprocess.run(["choctaw_hog", "--outputfile", filename, x.ssh_url],
2929
capture_output=True)
3030
return {"repo": x.name, "results": filename}
3131

@@ -46,13 +46,14 @@ def f(x):
4646
result_list = json.load(f)
4747
for finding in result_list:
4848
writer.writerow([result['repo'],
49-
result['reason'],
49+
finding['reason'],
5050
str(finding['stringsFound']),
5151
finding['path'],
5252
finding['commit'],
5353
finding['commitHash'],
5454
finding['date']])
5555
except:
5656
pass
57+
os.remove(result['results'])
5758

5859
print("Output written to output.csv")

scripts/ghe_secret_monitor.py

Lines changed: 47 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,16 @@
1010
import uuid
1111
import logging
1212
import sys
13+
import random
14+
import urllib.parse
1315

14-
if len(sys.argv) == 2 and sys.argv[1].startswith("--log="):
15-
loglevel = sys.argv[1][6:]
16-
else:
17-
loglevel = "WARNING"
16+
loglevel = "WARNING"
17+
sample = False
18+
for arg in sys.argv:
19+
if arg.startswith("--sample="):
20+
sample = int(arg[9:])
21+
if arg.startswith("--log="):
22+
loglevel = arg[6:]
1823

1924
numeric_level = getattr(logging, loglevel.upper(), None)
2025
if not isinstance(numeric_level, int):
@@ -30,16 +35,20 @@
3035
CHOCTAW_HOG_PATH = os.environ["CHOCTAW_HOG_PATH"]
3136

3237
# initialize GitHub object and list of all repos
38+
logging.info("Trying to authenticate to Github...")
3339
g = Github(base_url=f"https://{GHE_DOMAIN}/api/v3", login_or_token=GHE_REPO_TOKEN, per_page=100)
3440
repos = g.get_repos()
41+
if sample:
42+
logging.info(f"sample size set to {sample}, retrieving list of repos...")
43+
repos = random.sample(list(repos), sample)
3544

3645
# use the datetime library to get an object representing 48 hours ago
3746
today = datetime.today()
3847
twentyfourhoursago = today - timedelta(hours=24)
3948

4049
# start the first main set of work: translate our list of repo objects to a dict of { git_url : since_commit_hash }
4150
repo_dict = {}
42-
51+
logging.info("Getting a list of all commits since 24 hours ago for each repo...")
4352
for repo in repos:
4453
commits = []
4554
try:
@@ -54,8 +63,9 @@
5463
logging.debug("no SSH URL")
5564
continue
5665
logging.info(f"({repo.ssh_url}, {commits[-1].sha}")
57-
repo_dict[repo.ssh_url] = (commits[-1].sha, f"{repo.html_url}/commit/")
66+
repo_dict[repo.ssh_url] = (commits[-1].sha, repo.html_url)
5867

68+
logging.info("Completed Github API requests...")
5969
repo_dict = dict(
6070
filter(lambda x: x[1], repo_dict.items())
6171
) # and filter out key/value pairs with None as a value
@@ -66,25 +76,24 @@
6676
# git url as the key and the filename containing the results as the value
6777
tempdir = tempfile.gettempdir()
6878

69-
79+
logging.info("Starting choctaw hog scan of all commits over the last 24 hours...")
7080
def scan_repo(x):
7181
filename = os.path.join(tempdir, str(uuid.uuid4()))
72-
s = subprocess.run(
73-
[
74-
CHOCTAW_HOG_PATH,
75-
"--outputfile",
76-
filename,
77-
"--since_commit",
78-
x[1][0],
79-
"--sshkeypath",
80-
SSH_KEY_PATH,
81-
x[0],
82-
],
83-
capture_output=True,
84-
)
82+
cmdline = [
83+
CHOCTAW_HOG_PATH,
84+
"--outputfile",
85+
filename,
86+
"--since_commit",
87+
x[1][0],
88+
"--sshkeypath",
89+
SSH_KEY_PATH,
90+
x[0],
91+
]
92+
logging.info(f"Running choctaw hog: {str(cmdline)}")
93+
s = subprocess.run(cmdline, capture_output=True)
94+
logging.info(f"choctaw hog output: {s.stdout} {s.stderr}")
8595
return {"repo": x[0], "results": filename, "url": x[1][1]}
8696

87-
8897
output = []
8998

9099
# increase this number to the number of cores you have - runs great on a c5n.4xlarge with 14
@@ -95,27 +104,42 @@ def scan_repo(x):
95104
logging.debug(output)
96105

97106
# the last block of work, iterate through each JSON file from choctaw_hog and put the results in Insights
107+
logging.info("Collecting choctaw hog output into a single python list...")
98108
output_array = []
99109
for result_dict in output:
100110
try:
101111
f = open(result_dict["results"], "r")
102112
except:
113+
# TODO: add better error handling here. the file won't exist if we couldn't
114+
# access the git repo
115+
logging.warning("failed to open " + result_dict["results"])
103116
continue
104117

105118
with f:
106119
result_list = json.load(f)
107120
for finding in result_list:
121+
fileurl = ""
122+
if finding["new_line_num"] != 0:
123+
fileurl = f"{result_dict['url']}/blob/{finding['commitHash']}/{finding['path']}#L{finding['new_line_num']}"
124+
else:
125+
fileurl = f"{result_dict['url']}/blob/{finding['parent_commit_hash']}/{finding['path']}#L{finding['old_line_num']}"
108126
output_array.append(
109127
{
110128
"eventType": "ghe_secret_monitor",
111129
"commitHash": finding["commitHash"],
112130
"reason": finding["reason"],
113131
"path": finding["path"],
114132
"repo": result_dict["repo"],
115-
"url": result_dict["url"] + finding["commitHash"]
133+
"url": f"{result_dict['url']}/commit/{finding['commitHash']}/{finding['path']}",
134+
"fileurl": fileurl,
135+
"old_line_num": finding["old_line_num"],
136+
"new_line_num": finding["new_line_num"],
137+
"parent_commitHash": finding["parent_commit_hash"]
116138
}
117139
)
118140

141+
os.remove(result_dict["results"])
142+
119143
url = "https://insights-collector.newrelic.com/v1/accounts/{INSIGHTS_ACCT_ID}/events"
120144
headers = {
121145
"Content-Type": "application/json",
@@ -125,6 +149,6 @@ def scan_repo(x):
125149
post = gzip.compress(json.dumps(output_array).encode("utf-8"))
126150
logging.info(f"len(output_array) = {len(output_array)}")
127151
logging.debug(output_array)
152+
logging.info("Submitting data to New Relic Insights...")
128153
r = requests.post(url, data=post, headers=headers)
129154
logging.info(f"insights status code: {r.status_code}")
130-

scripts/ghe_secret_monitor.service

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@ Wants=ghe_secret_monitor.timer
44

55
[Service]
66
Type=oneshot
7-
ExecStart=/usr/bin/git pull origin
87
ExecStart=/home/ec2-user/anaconda3/bin/python scripts/ghe_secret_monitor.py
9-
WorkingDirectory=/home/ec2-user/rusty-hog
8+
WorkingDirectory=/home/ec2-user/rusty_hog_1.0.2
109
User=ec2-user
1110
Group=ec2-user
1211
Environment='PATH=/root/anaconda3/bin:/root/anaconda3/condabin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/root/.local/bin:/root/bin:/root/.local/bin:/root/bin'
@@ -19,4 +18,4 @@ Environment=CHOCTAW_HOG_PATH=musl_releases/choctaw_hog
1918

2019

2120
[Install]
22-
WantedBy=multi-user.target
21+
WantedBy=multi-user.target

scripts/jira_secret_scanner.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,12 @@
3232

3333
issues = []
3434
r = requests.get(url, headers=headers)
35-
result = r.json()
35+
result = None
36+
try:
37+
result = r.json()
38+
except:
39+
print(f"JIRA error: {r.text}")
40+
sys.exit(1)
3641
total = result['total']
3742
issues.extend(result['issues'])
3843
while len(issues) < total:
@@ -46,6 +51,7 @@
4651
gdoc_re = re.compile(r'https://docs.google.com/[^\s|\]]+', re.IGNORECASE)
4752
links = defaultdict(set)
4853

54+
logging.info("Reading issue descriptions...")
4955
for issue in issues:
5056
description = issue['fields']['description']
5157
if not description:
@@ -54,6 +60,7 @@
5460
for match in matches:
5561
links[issue['key']].add(match)
5662

63+
logging.info("Retrieving issue comments...")
5764
for issue in issues:
5865
url = f"https://newrelic.atlassian.net/rest/api/2/issue/{issue['key']}/comment"
5966
r = requests.get(url, headers=headers)
@@ -67,20 +74,32 @@
6774
gdoc_id_re = re.compile(r'https://docs.google.com/\w+/d/([a-zA-Z0-9-_]+)/?.*',re.IGNORECASE)
6875
output = []
6976

77+
logging.info("Running ankamali hog on each Google Drive link found in Jira...")
7078
for x in links.items():
79+
logging.debug(f"x: {str(x)}")
7180
filename = os.path.join(tempdir, str(uuid.uuid4()))
7281
results = []
7382
for gdoc_link in x[1]:
74-
gdocid = gdoc_id_re.match(gdoc_link).group(1)
83+
logging.debug(f"gdoc_link: {gdoc_link}")
84+
logging.debug(f"gdoc_id_re.match(gdoc_link): {str(gdoc_id_re.match(gdoc_link))}")
85+
gdoc_id_match = gdoc_id_re.match(gdoc_link)
86+
if not gdoc_id_match:
87+
continue
88+
gdocid = gdoc_id_match.group(1)
7589
s = subprocess.run(
7690
[
7791
ANKAMALI_HOG_PATH,
7892
"--outputfile",
7993
filename,
8094
gdocid
8195
],
82-
capture_output=True,
96+
capture_output=True
8397
)
98+
logging.debug(f"ankamali hog output: {s.stdout}")
99+
if s.returncode != 0:
100+
logging.warning(f"ankamali hog exited with a non-zero status code: {s.stdout} {s.stderr}")
101+
# TODO: add better error handling here. some will fail because you don't have
102+
# permission to the doc. others will fail because you setup your token wrong.
84103
results.append({"gdoc_link": gdoc_link, "results": filename, "key": x[0]})
85104
output.extend(results)
86105

@@ -93,7 +112,9 @@
93112
try:
94113
f = open(result_dict["results"], "r")
95114
except:
96-
logging.debug("failed to open " + result_dict["results"])
115+
# TODO: add better error handling here. the file won't exist if we couldn't
116+
# access the file
117+
logging.warning("failed to open " + result_dict["results"])
97118
continue
98119

99120
with f:
@@ -108,8 +129,9 @@
108129
"reason": finding["reason"]
109130
}
110131
)
132+
os.remove(result_dict["results"])
111133

112-
url = "https://insights-collector.newrelic.com/v1/accounts/{INSIGHTS_ACCT_ID}/events"
134+
url = f"https://insights-collector.newrelic.com/v1/accounts/{INSIGHTS_ACCT_ID}/events"
113135
headers = {
114136
"Content-Type": "application/json",
115137
"X-Insert-Key": INSIGHTS_INSERT_KEY,
@@ -119,4 +141,4 @@
119141
logging.info(f"len(output_array) = {len(output_array)}")
120142
logging.debug(output_array)
121143
r = requests.post(url, data=post, headers=headers)
122-
logging.info(f"insights status code: {r.status_code}")
144+
logging.info(f"insights status code: {r.status_code}")

scripts/jira_secret_scanner.service

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,16 @@ Wants=jira_secret_scanner.timer
44

55
[Service]
66
Type=oneshot
7-
ExecStart=/usr/bin/git pull origin
87
ExecStart=/home/ec2-user/anaconda3/bin/python jira_secret_scanner.py
9-
WorkingDirectory=/home/ec2-user/secret_scanner
8+
WorkingDirectory=/home/ec2-user/rusty_hog_1.0.2
109
User=ec2-user
1110
Group=ec2-user
1211
Environment='PATH=/root/anaconda3/bin:/root/anaconda3/condabin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/root/.local/bin:/root/bin:/root/.local/bin:/root/bin'
1312
Environment=INSIGHTS_INSERT_KEY=<redacted for git>
1413
Environment=INSIGHTS_ACCT_ID=<redacted for git>
1514
Environment=JIRA_TOKEN=<redacted for git>
16-
Environment=ANKAMALI_HOG_PATH=./ankamali_hog
15+
Environment=ANKAMALI_HOG_PATH=musl_releases/ankamali_hog
1716

1817

1918
[Install]
20-
WantedBy=multi-user.target
19+
WantedBy=multi-user.target

src/bin/ankamali_hog.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ use rusty_hogs::{SecretScanner, SecretScannerBuilder};
4444
/// Main entry function that uses the [clap crate](https://docs.rs/clap/2.33.0/clap/)
4545
fn main() {
4646
let matches = clap_app!(ankamali_hog =>
47-
(version: "1.0.1")
47+
(version: "1.0.2")
4848
(author: "Scott Cutler <[email protected]>")
4949
(about: "Google Drive secret scanner in Rust.")
5050
(@arg REGEX: --regex +takes_value "Sets a custom regex JSON file")

src/bin/berkshire_hog.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ use std::iter::FromIterator;
4545
/// Main entry function that uses the [clap crate](https://docs.rs/clap/2.33.0/clap/)
4646
fn main() {
4747
let matches = clap_app!(berkshire_hog =>
48-
(version: "1.0.1")
48+
(version: "1.0.2")
4949
(author: "Scott Cutler <[email protected]>")
5050
(about: "S3 secret hunter in Rust. Avoid bandwidth costs, run this within a VPC!")
5151
(@arg REGEX: --regex +takes_value "Sets a custom regex JSON file")

src/bin/choctaw_hog.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ use rusty_hogs::{SecretScanner, SecretScannerBuilder};
4949
/// Main entry function that uses the [clap crate](https://docs.rs/clap/2.33.0/clap/)
5050
fn main() {
5151
let matches = clap_app!(choctaw_hog =>
52-
(version: "1.0.1")
52+
(version: "1.0.2")
5353
(author: "Scott Cutler <[email protected]>")
5454
(about: "Git secret scanner in Rust")
5555
(@arg REGEX: -r --regex +takes_value "Sets a custom regex JSON file")

0 commit comments

Comments
 (0)