Part2 question
parent
88a5340fea
commit
e21718c89d
|
|
@ -0,0 +1,287 @@
|
||||||
|
# Replicate the environment
|
||||||
|
|
||||||
|
Python script to replicate grep -r.
|
||||||
|
|
||||||
|
## Python venv
|
||||||
|
|
||||||
|
```sh
|
||||||
|
python3 -m venv --prompt part2 ~/part2/.venv
|
||||||
|
source ~/part2/.venv/bin/activate
|
||||||
|
python --version # Python 3.10.6
|
||||||
|
pip install --upgrade pip
|
||||||
|
```
|
||||||
|
|
||||||
|
## enter/exit venv
|
||||||
|
|
||||||
|
```sh
|
||||||
|
source $HOME/part2/.venv/bin/activate
|
||||||
|
deactivate
|
||||||
|
```
|
||||||
|
|
||||||
|
## Install python dependencies
|
||||||
|
|
||||||
|
```sh
|
||||||
|
nano -cw pip_requirements.txt
|
||||||
|
|
||||||
|
binaryornot==0.4.4
|
||||||
|
chardet==5.0.0
|
||||||
|
|
||||||
|
python -m pip install -r pip_requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
# Foreword
|
||||||
|
|
||||||
|
There are no classes, threads or data structures in these scripts, this would take me much longer as a non native coder.
|
||||||
|
Its all functions, simple lists, defacto libraries and raw print to terminal, it shouldnt be painfully slow.
|
||||||
|
|
||||||
|
# 1st attempt script
|
||||||
|
|
||||||
|
- This script explored loop mechanism and parsing the RE from the cli, it did not handle errors well and the results against grep were not very consistent.
|
||||||
|
|
||||||
|
```
|
||||||
|
python test1.py '.*pass.*' '/home/openstack/stack /home/openstack/.ssh /home/openstack/openstack_prep.sh' | wc -l
|
||||||
|
29
|
||||||
|
|
||||||
|
grep -r '.*pass.*' /home/openstack/stack /home/openstack/.ssh /home/openstack/openstack_prep.sh | wc -l
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/local/2.2.3/linux_amd64/terraform-provider-local_v2.2.3_x5: binary file matches
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/null/3.2.1/linux_amd64/terraform-provider-null_v3.2.1_x5: binary file matches
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/time/0.9.1/linux_amd64/terraform-provider-time_v0.9.1_x5: binary file matches
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/terraform-provider-openstack/openstack/1.48.0/linux_amd64/terraform-provider-openstack_v1.48.0: binary file matches
|
||||||
|
29
|
||||||
|
```
|
||||||
|
|
||||||
|
Script was named `test1.py`
|
||||||
|
|
||||||
|
```python
|
||||||
|
import argparse
|
||||||
|
import pathlib
|
||||||
|
import re
|
||||||
|
from binaryornot.check import is_binary
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("pattern", help="[Regex]")
|
||||||
|
parser.add_argument("files", help="[File File ...]")
|
||||||
|
pattern = re.compile(parser.parse_args().pattern)
|
||||||
|
filesPath = parser.parse_args().files
|
||||||
|
pathList = re.split('\s+', filesPath)
|
||||||
|
|
||||||
|
# behavioural vars
|
||||||
|
filesRecursive = 1
|
||||||
|
hideBin = 1
|
||||||
|
|
||||||
|
def validInput(path):
|
||||||
|
paths = []
|
||||||
|
for item in path:
|
||||||
|
if not pathlib.Path(item).exists():
|
||||||
|
print(' %s: No such file or directory' % (item))
|
||||||
|
else:
|
||||||
|
paths = paths + [item]
|
||||||
|
return(paths)
|
||||||
|
|
||||||
|
def findFiles(path, recursive):
|
||||||
|
files = []
|
||||||
|
if pathlib.Path(path).is_file():
|
||||||
|
files.append(str(pathlib.Path(path).absolute()))
|
||||||
|
elif pathlib.Path(path).is_dir():
|
||||||
|
for item in pathlib.Path(path).iterdir():
|
||||||
|
if item.is_dir() and recursive:
|
||||||
|
files = files + findFiles(item.absolute(), recursive)
|
||||||
|
elif item.is_file():
|
||||||
|
files.append(str(item.absolute()))
|
||||||
|
return(files)
|
||||||
|
|
||||||
|
def findPattern(files):
|
||||||
|
for file in files:
|
||||||
|
if not is_binary(file):
|
||||||
|
for i, line in enumerate(open(file)):
|
||||||
|
match = re.search(pattern, line)
|
||||||
|
if match is not None:
|
||||||
|
print('%s: %s' % (file, (match.string).strip()))
|
||||||
|
elif not hideBin:
|
||||||
|
print('%s: binary file excluded' % (file))
|
||||||
|
|
||||||
|
def main():
|
||||||
|
fileList = []
|
||||||
|
validPath = validInput(pathList)
|
||||||
|
for item in validPath:
|
||||||
|
fileList = fileList + findFiles(item,filesRecursive)
|
||||||
|
# print(fileList)
|
||||||
|
findPattern(fileList)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
```
|
||||||
|
|
||||||
|
# 2nd attempt script
|
||||||
|
|
||||||
|
- This script was intended to handle missing paths, permission issues and odd files such as binaries gracefully.
|
||||||
|
- The recursive looping mechanism was changed to try to emulate the order of of grep results output.
|
||||||
|
- To try to keep parity between grep and pathlib word count list output, stderr was used.
|
||||||
|
- I guess whatever RE engine is used in the shell is different from standard Python, what is used in this script will likely show differences as more complex queries are built. For time and complexity this was not explored. Grep handles greedy matches much more gracefully.
|
||||||
|
|
||||||
|
Script was named `test2.py`
|
||||||
|
|
||||||
|
```python
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
import pathlib
|
||||||
|
import re
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("pattern", help="[Regex]")
|
||||||
|
parser.add_argument("files", help="[File File ...]")
|
||||||
|
pattern = re.compile(parser.parse_args().pattern)
|
||||||
|
filesPath = parser.parse_args().files
|
||||||
|
pathList = re.split('\s+', filesPath)
|
||||||
|
fileList = []
|
||||||
|
filesRecursive = 1
|
||||||
|
|
||||||
|
def findDirs(path, recursive):
|
||||||
|
for item in pathlib.Path(path).iterdir():
|
||||||
|
try:
|
||||||
|
if item.is_file():
|
||||||
|
findFiles((str(item.absolute())), recursive)
|
||||||
|
except (Exception) as f:
|
||||||
|
sys.stderr.write('%s: %s \n' % (path, f))
|
||||||
|
try:
|
||||||
|
if pathlib.Path(item).is_dir() and recursive:
|
||||||
|
findDirs(item, recursive)
|
||||||
|
except (Exception) as d:
|
||||||
|
sys.stderr.write('%s: %s \n' % (path, d))
|
||||||
|
|
||||||
|
def findFiles(path, recursive):
|
||||||
|
if not pathlib.Path(path).exists():
|
||||||
|
sys.stderr.write('%s: No such file or directory \n' % (path))
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
if pathlib.Path(path).is_file():
|
||||||
|
fileList.append(str(pathlib.Path(path).absolute()))
|
||||||
|
except (Exception) as f:
|
||||||
|
sys.stderr.write('%s: %s \n' % (path, f))
|
||||||
|
try:
|
||||||
|
if pathlib.Path(path).is_dir():
|
||||||
|
findDirs(path, recursive)
|
||||||
|
except (Exception) as d:
|
||||||
|
sys.stderr.write('%s: %s \n' % (path, d))
|
||||||
|
|
||||||
|
def findPattern(files):
|
||||||
|
for file in files:
|
||||||
|
try:
|
||||||
|
with open(file) as f:
|
||||||
|
for i, line in enumerate(f):
|
||||||
|
match = re.search(pattern, line)
|
||||||
|
if match is not None:
|
||||||
|
print('%s: %s' % (file, (match.string).strip()))
|
||||||
|
# generally this would be the place to change messaging to match grep format and add indicators such as line numbers
|
||||||
|
except (PermissionError) as g:
|
||||||
|
sys.stderr.write('%s: %s \n' % (file, g))
|
||||||
|
except (Exception) as h:
|
||||||
|
sys.stderr.write('%s: %s \n' % (file, h))
|
||||||
|
|
||||||
|
def main():
|
||||||
|
global fileList
|
||||||
|
for item in pathList:
|
||||||
|
findFiles(item,filesRecursive)
|
||||||
|
findPattern(fileList)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
```
|
||||||
|
|
||||||
|
## Results
|
||||||
|
|
||||||
|
Matching comparisons.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
## 1
|
||||||
|
python test2.py '.*pass.*' '/home/openstack/stack /home/openstack/.ssh /home/openstack/openstack_prep.sh' | wc -l
|
||||||
|
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/local/2.2.3/linux_amd64/terraform-provider-local_v2.2.3_x5: 'utf-8' codec can't decode byte 0xc0 in position 24: invalid start byte
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/null/3.2.1/linux_amd64/terraform-provider-null_v3.2.1_x5: 'utf-8' codec can't decode byte 0xe0 in position 24: invalid continuation byte
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/time/0.9.1/linux_amd64/terraform-provider-time_v0.9.1_x5: 'utf-8' codec can't decode byte 0xe0 in position 24: invalid continuation byte
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/terraform-provider-openstack/openstack/1.48.0/linux_amd64/terraform-provider-openstack_v1.48.0: 'utf-8' codec can't decode byte 0xc8 in position 40: invalid continuation byte
|
||||||
|
29
|
||||||
|
|
||||||
|
grep -r '.*pass.*' /home/openstack/stack /home/openstack/.ssh /home/openstack/openstack_prep.sh | wc -l
|
||||||
|
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/local/2.2.3/linux_amd64/terraform-provider-local_v2.2.3_x5: binary file matches
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/null/3.2.1/linux_amd64/terraform-provider-null_v3.2.1_x5: binary file matches
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/time/0.9.1/linux_amd64/terraform-provider-time_v0.9.1_x5: binary file matches
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/terraform-provider-openstack/openstack/1.48.0/linux_amd64/terraform-provider-openstack_v1.48.0: binary file matches
|
||||||
|
29
|
||||||
|
|
||||||
|
## 2
|
||||||
|
|
||||||
|
python test2.py '^.*auth.*"Password0"$' '/home/openstack/stack /home/.ssh' | wc -l
|
||||||
|
|
||||||
|
/home/.ssh: No such file or directory
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/local/2.2.3/linux_amd64/terraform-provider-local_v2.2.3_x5: 'utf-8' codec can't decode byte 0xc0 in position 24: invalid start byte
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/null/3.2.1/linux_amd64/terraform-provider-null_v3.2.1_x5: 'utf-8' codec can't decode byte 0xe0 in position 24: invalid continuation byte
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/time/0.9.1/linux_amd64/terraform-provider-time_v0.9.1_x5: 'utf-8' codec can't decode byte 0xe0 in position 24: invalid continuation byte
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/terraform-provider-openstack/openstack/1.48.0/linux_amd64/terraform-provider-openstack_v1.48.0: 'utf-8' codec can't decode byte 0xc8 in position 40: invalid continuation byte
|
||||||
|
2
|
||||||
|
|
||||||
|
grep -r '^.*auth.*"Password0"$' /home/openstack/stack /home/.ssh | wc -l
|
||||||
|
|
||||||
|
grep: /home/.ssh: No such file or directory
|
||||||
|
2
|
||||||
|
|
||||||
|
## 3
|
||||||
|
|
||||||
|
python test2.py Password. /home/openstack/stack | wc -l
|
||||||
|
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/local/2.2.3/linux_amd64/terraform-provider-local_v2.2.3_x5: 'utf-8' codec can't decode byte 0xc0 in position 24: invalid start byte
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/null/3.2.1/linux_amd64/terraform-provider-null_v3.2.1_x5: 'utf-8' codec can't decode byte 0xe0 in position 24: invalid continuation byte
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/time/0.9.1/linux_amd64/terraform-provider-time_v0.9.1_x5: 'utf-8' codec can't decode byte 0xe0 in position 24: invalid continuation byte
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/terraform-provider-openstack/openstack/1.48.0/linux_amd64/terraform-provider-openstack_v1.48.0: 'utf-8' codec can't decode byte 0xc8 in position 40: invalid continuation byte
|
||||||
|
9
|
||||||
|
|
||||||
|
grep -r Password. /home/openstack/stack | wc -l
|
||||||
|
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/local/2.2.3/linux_amd64/terraform-provider-local_v2.2.3_x5: binary file matches
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/null/3.2.1/linux_amd64/terraform-provider-null_v3.2.1_x5: binary file matches
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/time/0.9.1/linux_amd64/terraform-provider-time_v0.9.1_x5: binary file matches
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/terraform-provider-openstack/openstack/1.48.0/linux_amd64/terraform-provider-openstack_v1.48.0: binary file matches
|
||||||
|
9
|
||||||
|
|
||||||
|
## 4
|
||||||
|
|
||||||
|
python test2.py '^.*word' /home/openstack/stack | wc -l
|
||||||
|
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/local/2.2.3/linux_amd64/terraform-provider-local_v2.2.3_x5: 'utf-8' codec can't decode byte 0xc0 in position 24: invalid start byte
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/null/3.2.1/linux_amd64/terraform-provider-null_v3.2.1_x5: 'utf-8' codec can't decode byte 0xe0 in position 24: invalid continuation byte
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/time/0.9.1/linux_amd64/terraform-provider-time_v0.9.1_x5: 'utf-8' codec can't decode byte 0xe0 in position 24: invalid continuation byte
|
||||||
|
/home/openstack/stack/.terraform/providers/registry.terraform.io/terraform-provider-openstack/openstack/1.48.0/linux_amd64/terraform-provider-openstack_v1.48.0: 'utf-8' codec can't decode byte 0xc8 in position 40: invalid continuation byte
|
||||||
|
17
|
||||||
|
|
||||||
|
grep -r '^.*word' /home/openstack/stack | wc -l
|
||||||
|
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/local/2.2.3/linux_amd64/terraform-provider-local_v2.2.3_x5: binary file matches
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/null/3.2.1/linux_amd64/terraform-provider-null_v3.2.1_x5: binary file matches
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/hashicorp/time/0.9.1/linux_amd64/terraform-provider-time_v0.9.1_x5: binary file matches
|
||||||
|
grep: /home/openstack/stack/.terraform/providers/registry.terraform.io/terraform-provider-openstack/openstack/1.48.0/linux_amd64/terraform-provider-openstack_v1.48.0: binary file matches
|
||||||
|
17
|
||||||
|
```
|
||||||
|
|
||||||
|
- The parity between the grep and pathlib results ends here, the script starts to unravel requiring much more logic to emulate grep.
|
||||||
|
- Grep can traverse some binary files that pathlib cannot, whilst pathlib lists files as permission denied where grep will not traverse the containing directory with knowledge that it cannot traverse the children.
|
||||||
|
- There are many behavioural differences that would require isolation then logic building to fully emulate results for grep.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
## 5
|
||||||
|
|
||||||
|
python test2.py '^.*a' /etc | wc -l
|
||||||
|
|
||||||
|
/etc/ssl: [Errno 13] Permission denied: '/etc/ssl/private'
|
||||||
|
...
|
||||||
|
/etc/ssh/ssh_host_ecdsa_key: [Errno 13] Permission denied: '/etc/ssh/ssh_host_ecdsa_key'
|
||||||
|
37248
|
||||||
|
|
||||||
|
grep -r '^.*a' /etc | wc -l
|
||||||
|
|
||||||
|
grep: /etc/gshadow: Permission denied
|
||||||
|
...
|
||||||
|
grep: /etc/ssh/ssh_host_ecdsa_key: Permission denied
|
||||||
|
26294
|
||||||
|
|
||||||
|
```
|
||||||
Loading…
Reference in New Issue