{ "profile": { "first_name": "Jonathan", "middle_name": "D", "last_name": "Kelley", "tagline": "Linux Systems / Python Devops / Site Reliability Engineer", "date_of_birth": "1985-05-02", "place_of_birth": "Dallas", "biography": "I am a 14 year internet technology veteran with a passion for devops and site reliability. I got my start in the late 1990s in highschool over IRC and designing web pages on Redhat Linux 9 (Shrike). \n\nI consider myself an expert at Linux, Kubernetes, Networking, AWS and all the things in between.", "current_location": { "street": "********", "housenumber": 0, "zipcode": "********", "city": "Kalamazoo", "state": "Michigan", "country": "USA", "planet": "Earth" } }, "contact_details": { "email": "jonkelley@gmail.com", "phone": "8179-998-200", "skype": "********", "website": "https://jon-kelley.com/", "keybase": "https://keybase.io/jondkelley", "linkedin": "https://linkedin.com/in/jonathandkelley/" }, "educations": [ { "title": "High School", "specification": "Computer Science (Specialization: A+)", "location": "Fort Worth, TX", "level": "Highschool", "description": "I graduated highschool, where I enjoyed doing science, math, biology and arts. I realized in my freshman year of school that I wanted to do something with Linux, so I learned PHP and other technologies.", "start_date": "2014-03-01", "end_date": "2014-05-01", "completed": true } ], "careers": [ { "title": "Activtrak", "specification": "Staff Cloud Engineer", "location": "Kalamazoo, MI", "level": "Full Time", "description": "Providing workforce analytics and productivity monitoring software to help organizations optimize employee performance and improve operational efficiency.", "accomplishments": [ "Handles container security and dependency updates across hundreds of container images using SonarCloud and Snyk.", "Manages development team requests for IAM permissions following the security model of least privilege and service accounts per container or VM.", "Added GCP artifact registry process to CircleCI build & helm scripts for 150 repositories and build pipelines to retire Google Container Registry.", "Right-sizes pod resource limits to help optimize costs around pod resource constraints, in combination with right-sizing GCP GKE (Kubernetes) node pools, saving up to $42,000 a year in cloud compute costs.", "Migrated legacy Google Cloud VPN tunnels bridging VPCs together in 11 GCP projects with next-gen Google Cloud VPN tunnels while maintaining zero downtime with BGP session transition.", "Built a slack chat framework that can be extended for running many automation tasks through slack such as account migrations, with a REST API as well as a GCP pubsub queue.", "Patched existing slack bots to manage MySQL node pools more effectively improving customer experience." ], "start_date": "2023-01-01", "end_date": null, "link": "http://activtrak.com" }, { "title": "Mezmo", "specification": "Site Reliability Engineer", "location": "Kalamazoo, MI", "level": "Full Time", "description": "Worked on the site reliability engineering team supporting a myriad of log ingestion configurations.", "accomplishments": [ "Added LinkerD service mesh in Kubernetes for ~70 microservices for pod end-to-end encryption.", "Expanded access tools to internal CLI Python client (logdnactl) for better integration into backend systems.", "Regularly contributed to internal tooling using the k8s API and pymongo libraries to manage administrative operations across the product.", "Built a support dashboard for support to manage and integrated Flask/Rebrow Redis blueprints into the app along with Python-eve (REST toolkit) for full-search MongoORM via REST, secured with python-authlib and OpenIDConnect/Okta for RBAC.", "Re-wrote the Ansible integration for LogDNA logging library with new features for customers.", "Added functionality to the support dashboard to look into ElasticSearch field mappings to troubleshoot index limits and indicate growth needs for customers.", "Developed a proxy request tool for webhooks so support could easily debug webhook payloads.", "Rebuilt Ansible-logdna handler in Python (git: logdna/ansible-logdna PR #8) to improve functionality and modernize the framework.", "Helped take senior escalations for k8s agent, logstash, IBM Cloudfoundry, logspout, and other products." ], "start_date": "2020-06-01", "end_date": "2023-01-01", "link": "http://mezmo.com" }, { "title": "Doximity, Inc.", "specification": "DevOps Engineer", "location": "Dallas, TX", "level": "Full Time", "description": "Worked in a cross-discipline DevOps team managing everything from Kubernetes via Terraform infrastructure-as-code to Chef on EC2 instances. Used best security practices in a strict HIPAA environment with segmented networks and RBAC/IAM policies on AWS.", "accomplishments": [ "Built Doximity's first platform under k8s on EKS with Istio using Ansible, Terraform, and Helm charts.", "Refactored Terraform across teams using multiple state files with both Jenkins pipelines and Atlantis. Migrated to Terraform module patterns rather than sprawled HCL resources.", "Configured Sensu monitoring scripts for production systems.", "Wrote and managed cookbooks in Chef as well as the dependencies for patch management and better inspect testing/kitchen tests.", "Moved Jenkins jobs from traditional EC2 swarm builders to ECS-based pipelines for better resource utilization/cost savings.", "Built Jenkins automation for Chef jobs using Jenkins Job Builder and later helped implement JCasC and managed job creation using sandboxed Groovy scripts.", "Built an Ansible/Terraform-based deployment system for Kubernetes and Istio onto EKS so the entire Doximity platform could be migrated." ], "start_date": "2019-05-01", "end_date": "2020-05-01", "link": "http://doximity.com" }, { "title": "BoomTown!", "specification": "DevOps Engineer / Site Reliability Engineer", "location": "Charleston, SC", "level": "Full Time", "description": "Managed a rapidly growing real estate SaaS platform with over 10,000 agent brokers deployed through Wordpress.", "accomplishments": [ "Participated in on-call SRE style rotation for a split Windows/Linux environment.", "Worked with common Amazon AWS toolsets such as ECS, EC2, VPC, ELB, SQS, and Lambda.", "Served as a subject matter expert and mentor for Linux applications and platform tools.", "Managed TeamCity build pipelines along with Jenkins for operational tasks.", "Re-tooled container stack for frontend Wordpress product from a 5-year-old shell magic to docker-compose v3 + python-paver build process (for gulp/yarn/phpunit).", "Upgraded php-fpm to PHP 7 from legacy PHP 5 stack, with weighted load canary testing as proof to launch.", "Worked with Varnishd, Memcached, Wordpress Network Sites, HAProxy, MySQL, and Docker.", "Used Ansible to convert stacks previously hand-hacked into ground-up infrastructure as code." ], "start_date": "2018-04-01", "end_date": "2019-05-01", "link": "http://boomtownroi.com" }, { "title": "Rackspace", "specification": "Linux Systems Engineer II", "location": "San Antonio, TX", "level": "Full Time", "description": "Worked with the Cloud Integrations Team / Rackspace Internal Tools Engineering team. Supported internal billing, cloud identity systems, and internal Rackertools.", "accomplishments": [ "Started conversion of 230 or so Jenkins jobs to store them in YAML and re-deploy with a tool called Jenkins Job Builder. This enabled full Jenkins disaster recovery in <1 hour instead of >2 weeks.", "Migrated tooling from Puppet2 to Puppet3 with PuppetDB, Hiera, puppetserver, directory environments, and wrote a tool to wrap r10k. Implemented a security-compliant PuppetDB database security model. This reduced complexity and applied missing patterns.", "Maintained Ansible modules that allowed storing firewall configurations in git. Built a release pipeline to deploy on git push. This created \"working documentation\" and a git log to enable instant rollbacks in production environments.", "Managed deployment of billing, integration, and pubsub environments for the Rackspace Cloud." ], "start_date": "2014-08-01", "end_date": "2017-10-01", "link": "http://rackspace.com" }, { "title": "Rackspace", "specification": "Linux Systems Engineer I", "location": "San Antonio, TX", "level": "Full Time", "description": "Worked with the Cloud Integrations Team / Rackspace Internal Tools Engineering team. Supported internal billing, cloud identity systems, and internal Rackertools.", "accomplishments": [ "Built continuous integration in Jenkins for 5 new products using a Python API to trigger builds on a build server and cycle app deployments.", "Wrote a Python SDK for the build system platform and deployment system so developers could hook, deploy, and release 4 products via REST API from Python CLI, Jenkins, or virtually anything.", "Managed staging, pre-prod, and prod environments for a complex Ruby web app used by 6000+ growing employees 24/7 to manage the administrative backends for the world's 2nd largest public cloud.", "Managed deployments in a one-click (actually 3 clicks) fashion using Salt and yum repositories to distribute software and configurations.", "Moved apps to hosted Mongo service called ObjectRocket to keep in-house operations costs at a minimum.", "Handled deployment and design considerations for next-gen architecture replacing legacy internal controls design from serial to fully async process with a service backend layer to connect to backends/dbs/APIs. The next-gen was Python 3/Tornado with Angular.js web frontends. This replaced a traditionally monolithic serial-threaded Ruby 1.8.7 application on Phusion Passenger which bundled business logic and couldn't scale.", "Seeing weak points in ops/qe tests, wrote BDD Gherkin style templates in Python using \"behave\" to test any API that is REST. This was helpful for ops or QA functional end-to-end testing. It's in the projects section called testvAPI." ], "start_date": "2013-04-01", "end_date": "2014-08-01", "link": "http://rackspace.com" }, { "title": "Rackspace", "specification": "Linux Systems Administrator III", "location": "San Antonio, TX", "level": "Full Time", "description": "Worked on the OpenStack/Cloud servers as a systems administrator supporting the Rackspace Cloud Control panel and backend systems.", "accomplishments": [ "Worked closely with Cloud integration teams running deployment and maintenance on large-scale backend infrastructure used for the world's 2nd largest public cloud.", "Diagnosed complex Linux application and system problems.", "Updated and wrote deployment templates and syntax for Puppet/Chef.", "Served as a subject matter expert and senior support escalation for Cloud Load Balancers, Cloud Databases, Cloud Compute, and infrastructure.", "Designed migration plans for the Cloud identity token API of a multi-node environment. Moved identity database cluster and load balancer stack into a new subnet while performing schema upgrades and a software upgrade dependent on schema changes with record-setting minimal impacting downtime.", "Took direct action to correct cloud infrastructure issues in an on-call 24/7 environment.", "Identified the root cause for key and critical issues in various application stacks throughout Rackspace Cloud and helped identify and report bugs, issues, or aid in fixing with appropriate product development groups.", "Deployed Tomcat application code releases regularly to staging and production, sometimes under tight deadlines and always with zero downtime.", "Experienced with the entire Rackspace Cloud suite, including parts of OpenStack, as well as writing and deploying Python applications that utilized public and internal APIs.", "Conceptualized, wrote, and built monitoring tools and metrics collection systems to track the health of various products and their subsystems. These were used as an early warning system for service faults in various APIs within Rackspace.", "Served as an escalation point for all Rackspace Cloud products in the Rackspace suite.", "Planned efficient cloud resource consumption in new/existing environments for internal infrastructure.", "Interviewed new candidates for jobs in cross-team discipline." ], "start_date": "2011-03-01", "end_date": "2013-04-01", "link": "http://rackspace.com" }, { "title": "Zixcorp", "specification": "Embedded Device Support Engineer II", "location": "Dallas, TX", "level": "Full Time", "description": "Supported on-premise and SaaS cloud-hosted solutions for HIPAA encryption. Supported Java Tomcat application as well as Postfix mail system. Managed network security policies and on-premise datacenter networks.", "accomplishments": [ "Worked in a HIPAA compliant environment dealing with personal data e-mail encryption with well-recognized companies and government agencies.", "Worked in accordance with UK data privacy and export laws related to mail administration.", "Administered an E-Mail appliance based on Postfix that implemented FIPS-compliant e-mail encryption solutions with SSL failover to an HTTPS webpage-gateway for secure email transmission.", "Handled telephone support with customers who located the appliance in their own datacenters, often assisting with network troubleshooting in unfamiliar network environments.", "Troubleshot mail flow issues with different network topologies and layouts with dozens of companies daily." ], "start_date": "2010-02-01", "end_date": "2011-03-01", "link": "http://zixcorp.com" }, { "title": "1-800-Hosting", "specification": "Linux Administrator II", "location": "Dallas, TX", "level": "Full Time", "description": "Performed Linux technical support by phone/tickets. Did professional service migrations, hardware upgrades, cabinet wiring, and performed network maintenance in the datacenter on Cisco Catalyst 6500 series hardware. Supported Cpanel and Plesk control panels.", "accomplishments": [ "Planned and executed primary data-center DNS cut-over to use new BIND stack with Linux HEARTBEAT failover. Wrote failover scripts to handle system failure with zero downtime in DNS lookups.", "Performed phone and ticket work to meet customer SLA.", "Managed OS patching, migrations, and upgrades as professional services to customers.", "Worked as DC OPS managing hardware diagnostics, upgrades, and provisioning.", "Worked as DC network operations handling switch upgrades, troubleshooting, DDoS mitigation, or disaster recovery.", "Diagnosed routing and other concerns escalating to network operations when required.", "Upgraded Nagios monitoring to the latest version and scaled for performance.", "Upgraded MRTG graphing system for Catalyst switch monitoring to improve bandwidth graph capability.", "Created SSL monitoring in Nagios for customers who paid for SSL." ], "start_date": "2006-07-01", "end_date": "2010-02-01", "link": "http://800hosting.com" }, { "title": "C I Host", "specification": "Linux Technical Support 1", "location": "Bedford, TX", "level": "Full Time", "description": "Handled generalized Linux phone/ticket-based technical support for a web-hosting firm. Supported Miva Merchant shopping, Cpanel, and Plesk control panels.", "accomplishments": [ "Handled Linux Admin tasks for helping customers install and configure software within the hosting environment.", "Configured Merchant and other shopping utilities for customers who ordered the software.", "Worked in the data center across the street when staffing was low, assisting colo clients as well as responding to NOC escalations." ], "start_date": "2006-04-01", "end_date": "2006-07-01" } ], "skills_hypervisors": [ { "title": "AWS ec2", "level": 5, "endorsement": "" }, { "title": "XenServer", "level": 5, "endorsement": "" }, { "title": "VMWare ESXSI", "level": 5, "endorsement": "" }, { "title": "KVM", "level": 5, "endorsement": "" }, { "title": "Openstack", "level": 5, "endorsement": "" } ], "skills_devops_tools": [ { "title": "Terraform/HCL", "level": 5, "endorsement": "" }, { "title": "docker-compose", "level": 5, "endorsement": "" }, { "title": "Vagrant", "level": 5, "endorsement": "" }, { "title": "Puppet", "level": 5, "endorsement": "" }, { "title": "SaltStack", "level": 4, "endorsement": "" }, { "title": "Chef", "level": 3, "endorsement": "" } ], "skills_ci_cd": [ { "title": "Jenkins", "level": 5, "endorsement": "" }, { "title": "Bamboo", "level": 5, "endorsement": "" }, { "title": "Teamcity", "level": 4, "endorsement": "" }, { "title": "CircleCI", "level": 3, "endorsement": "" }, { "title": "Github Actions", "level": 2, "endorsement": "" }, { "title": "Gitlab", "level": 2, "endorsement": "" } ], "skills": [ { "title": "Python", "level": 5, "endorsement": "" }, { "title": "Ansible", "level": 5, "endorsement": "" }, { "title": "Kubernetes", "level": 5, "endorsement": "" }, { "title": "REST Frameworks", "level": 5, "endorsement": "" }, { "title": "HTML(5)", "level": 5, "endorsement": "" }, { "title": "CSS(3)", "level": 5, "endorsement": "" }, { "title": "Observium", "level": 5, "endorsement": "" }, { "title": "MongoDB", "level": 5, "endorsement": "" }, { "title": "MySQL", "level": 5, "endorsement": "" }, { "title": "Postgres", "level": 5, "endorsement": "" }, { "title": "AWS DynamoDB", "level": 5, "endorsement": "" }, { "title": "Docker", "level": 5, "endorsement": "" }, { "title": "Linux", "level": 5, "endorsement": "" }, { "title": "Nginx", "level": 5, "endorsement": "" }, { "title": "Apache", "level": 5, "endorsement": "" }, { "title": "JSON", "level": 5, "endorsement": "" }, { "title": "XML", "level": 5, "endorsement": "" }, { "title": "Bootstrap Framework", "level": 5, "endorsement": "" }, { "title": "SMTP", "level": 5, "endorsement": "" }, { "title": "DNS", "level": 5, "endorsement": "" }, { "title": "Wordpress", "level": 5, "endorsement": "" }, { "title": "Git (SCM)", "level": 4, "endorsement": "" }, { "title": "Postfix", "level": 4, "endorsement": "" }, { "title": "Nagios", "level": 4, "endorsement": "" }, { "title": "Tomcat", "level": 4, "endorsement": "" }, { "title": "ElasticSearch", "level": 4, "endorsement": "" }, { "title": "Redis", "level": 4, "endorsement": "" }, { "title": "Zabbix", "level": 4, "endorsement": "" }, { "title": "Networking", "level": 4, "endorsement": "" }, { "title": "Ruby", "level": 3, "endorsement": "" }, { "title": "Sensu", "level": 3, "endorsement": "" }, { "title": "PHP", "level": 3, "endorsement": "" }, { "title": "JQuery", "level": 3, "endorsement": "" }, { "title": "C/C++", "level": 2, "endorsement": "" }, { "title": "Java", "level": 2, "endorsement": "" }, { "title": "Golang", "level": 2, "endorsement": "" }, { "title": "Oracle", "level": 2, "endorsement": "" }, { "title": "Javascript", "level": 2, "endorsement": "" } ], "languages": [ { "title": "English", "level": 5, "endorsement": "Daily use" } ], "tools": [], "interests": [], "publications": [ { "title": "Building my last resume, ever, in Kubernetes.", "image": "https://i.imgur.com/QPqUbnW.png", "tags": "#resume #python #flask #pandoc #kubernetes #sidecar #docker-compose", "link": "https://www.linkedin.com/pulse/building-my-last-resume-ever-jonathan-kelley/", "slug": "building-my-last-resume-ever", "description": "An article about this website, my resume!" }, { "title": "Reconnecting after Postgres failover, introductory guide for application developers.", "image": "https://i.imgur.com/WtR5iHd.png", "tags": "#posgres #python #failover", "link": "https://www.linkedin.com/pulse/reconnecting-after-postgres-failover-guide-developer-jonathan-kelley/", "slug": "reconnecting-after-postgres-failover-guide", "description": "A brief article targeted at application developers on how to use reconnection based connection strings using libpq." }, { "title": "Error Handling from backends to the frontend!", "image": "https://i.imgur.com/b8sDiqi.jpg", "tags": "#exceptions #stacktrace #frontend #backend", "link": "https://www.linkedin.com/pulse/error-handling-tips-from-backend-your-frontend-jonathan-kelley/", "slug": "error-handling-tips-from-backend-your-frontend", "description": "The history about error handling in computing, and how a modern developer can handle frontend and backend errors better." }, { "title": "How to design subnets the right way", "image": "https://i.imgur.com/Dp1BRVR.png", "tags": "#subnet #networking #vpc #vlan #dnat #snat", "link": "https://www.linkedin.com/pulse/how-design-subnets-right-way-jonathan-kelley/", "slug": "how-design-subnets-right-way", "description": "Regardless if you are running networks in a physical datacenter or across VPCs in the cloud, you only get one chance. Learn how to do it right the first time, every time." } ], "projects": [ { "title": "demo-jenkins-config-as-code", "description": "A showcase of Jenkins configurations and job creation out of the box. Using groovy and the configuration as code plugin, you can create fully viable Jenkins instances on docker.", "tagline": "Jenkins configurations and job creation right out of the box.", "tags": "Jenkins, Groovy, Configuration-as-code", "link": "https://github.com/jondkelley/demo-jenkins-config-as-code" }, { "title": "python_resume", "description": "A showcase of Python/Flask/Jinja2/HTML(5)/Bootstrap/JQuery used to both generate jon-kelley.com and my hardcopy resume.", "tagline": "The backend code for this website!", "tags": "Python, Flask, Bootstrap, Jinja2, Resume", "link": "https://github.com/jondkelley/python_resume" }, { "title": "logdna_line_export_cli", "description": "This command will recursively fetch all logs from the logdna export API. This is useful to get logs beyond the 10,000 line limit as the API does not natively provide pagination.", "tagline": "A paginator for LogDNA log export", "tags": "Python, Recursion, Pagination", "link": "https://github.com/jondkelley/logdna_line_export_cli" }, { "title": "rebrow_modernized", "description": "Built for the Python developer who needs to look into a Redis store. Allows for inspection and deletion of keys and follows PubSub messages. Also displays some runtime and configuration information.", "tagline": "Redis \"workbench\" style tool in Python/Flask.", "tags": "Python, Flask, Bootstrap, Redis", "link": "https://github.com/jondkelley/rebrow-modernized" }, { "title": "carphunter", "description": "Simple tool to locate Cisco IP/ARP and display results. Useful for small to medium datacenters (1-100 network devices)", "tagline": "A ARP cache tool for multi-device CISCO newtorks and search tool.", "tags": "Python, Flask, Bootstrap, Redis", "link": "https://github.com/jondkelley/carphunter" }, { "title": "zabbix_api_monitor", "description": "This was created to monitor HTTP / REST Endpoints under Zabbix.", "tagline": "A monitoring plugin for Zabbix.", "tags": "Python, REST, Monitoring, Zabbix, HTTP", "link": "https://github.com/jonkelleyatrackspace/zabbix_url_monitor" }, { "title": "canakit1104-api", "description": "This was created to expose a REST API for a relay board microcontroller so we could build light-based alerting systems while at Rackspace.", "tagline": "REST API to drive electronic relays.", "tags": "Python, REST, Monitoring, Electronics, Relay", "link": "https://github.com/jonkelleyatrackspace/canakit1104-api" }, { "title": "testVapi", "description": "Based on the python behave features this project emulates a cucumber style syntax to run HTTP API tests and is able to forward the messages to ELK stack. This uses a language called Gherkin to make plain-english QA testing a breeze.", "tagline": "Creative python suite which runs QA tests using plain English syntax.", "tags": "Python, Elk, Behave, Cucumber, Gherkin", "link": "https://github.com/jonkelleyatrackspace/testvapi" }, { "title": "vagrantfile-skel", "description": "I made this repo because I keep encountering quick environments setup / breakfix I need from time to time.", "tagline": "Common Vagrantfiles I tend to be using", "tags": null, "link": "https://github.com/jondkelley/vagrantfile-skel" } ] }