Skip to main content

Masking PII Data in Grafana LOKI

It's important to ensure that your log aggregator tools do not contain any kind of sensitive PII data. In this article, we will learn ways to mask sensitive data before sending that to Loki.

Let's start by creating a Python program which kind of generates mock PII data for the lab environment.

Python program to generate PII logs

  • Random Password
  • Social Security Number
  • Credit Card
  • Email
  • IP4
Important
Do these before running this python program
- [x] Ensure to change the log file name to the desired filename
- [x] Ensure to change the sleep time to generate more or less frequent logs
import logging
import random
import string
import time
import re

# Configure logging
logging.basicConfig(
filename='/var/log/test.log', # Change this to your desired log file path
level=logging.INFO,
format='ts=%(asctime)s level=%(levelname)s caller=%(module)s.%(funcName)s msg="%(message)s"'
)

def generate_random_password(length=12):
characters = string.ascii_letters + string.digits + string.punctuation
password = ''.join(random.choice(characters) for i in range(length))
return password

def generate_random_ssn():
ssn = ''.join(random.choice(string.digits) for _ in range(9))
return f"{ssn[:3]}-{ssn[3:5]}-{ssn[5:]}"

def generate_random_ip4():
ip4 = '.'.join(str(random.randint(0, 255)) for _ in range(4))
return ip4

def generate_random_email():
domains = ["gmail.com", "yahoo.com", "hotmail.com", "example.com"]
username = ''.join(random.choice(string.ascii_lowercase) for _ in range(8))
domain = random.choice(domains)
return f"{username}@{domain}"

def generate_random_creditcard():
card_number = ''.join(random.choice(string.digits) for _ in range(16))
return f"{card_number[:4]}-{card_number[4:8]}-{card_number[8:12]}-{card_number[12:]}"

def generate_random_log_message():
messages_with_sensitive_info = [
"User logged in with password: {}",
"Failed login attempt with password: {}",
"User changed password to: {}",
"User registered with password: {}",
"Password reset request for password: {}",
"User accessed SSN: {}",
"User accessed IP address: {}",
"User sent email to: {}",
"User made payment with credit card: {}"
]
messages_without_sensitive_info = [
"User logged out",
"User profile updated",
"User account locked due to multiple failed login attempts",
"Password reset link sent",
"User registered successfully",
"User session expired",
"User account verified",
"User login attempt from new device"
]
if random.choice([True, False]):
message = random.choice(messages_with_sensitive_info)
if "{}" in message:
sensitive_info = random.choice([
generate_random_password(),
generate_random_ssn(),
generate_random_ip4(),
generate_random_email(),
generate_random_creditcard()
])
return message.format(sensitive_info), True
else:
return random.choice(messages_without_sensitive_info), False

def generate_logs():
while True:
# Generate a random log message
log_message, includes_sensitive_info = generate_random_log_message()

# Log the message at a random level
log_level = random.choice([logging.INFO, logging.WARNING, logging.ERROR])
logging.log(log_level, log_message)

# Sleep for 5 seconds
time.sleep(1)

if __name__ == "__main__":
generate_logs()

Scrape Config for Promtail

  1. Update the promtail configuration with the pipeline stage.
scrape_configs:
- job_name: test_log
static_configs:
- targets:
- localhost
labels:
job: test_log
__path__: /var/log/test.log
host: ${HOSTNAME}
pipeline_stages:
- replace:
expression: "password(.+)"
replace: "****"
  1. Restart promtail services.
  2. Observe the logs in Loki now, You will notice any string after the word password has been replaced with ****