husseinelsaadi commited on
Commit
e69177d
·
2 Parent(s): aea35a7 9f95384

Merge branch 'main' of https://huggingface.co/spaces/husseinelsaadi/Codingo

Browse files
.venv/bin/Activate.ps1 DELETED
@@ -1,247 +0,0 @@
1
- <#
2
- .Synopsis
3
- Activate a Python virtual environment for the current PowerShell session.
4
-
5
- .Description
6
- Pushes the python executable for a virtual environment to the front of the
7
- $Env:PATH environment variable and sets the prompt to signify that you are
8
- in a Python virtual environment. Makes use of the command line switches as
9
- well as the `pyvenv.cfg` file values present in the virtual environment.
10
-
11
- .Parameter VenvDir
12
- Path to the directory that contains the virtual environment to activate. The
13
- default value for this is the parent of the directory that the Activate.ps1
14
- script is located within.
15
-
16
- .Parameter Prompt
17
- The prompt prefix to display when this virtual environment is activated. By
18
- default, this prompt is the name of the virtual environment folder (VenvDir)
19
- surrounded by parentheses and followed by a single space (ie. '(.venv) ').
20
-
21
- .Example
22
- Activate.ps1
23
- Activates the Python virtual environment that contains the Activate.ps1 script.
24
-
25
- .Example
26
- Activate.ps1 -Verbose
27
- Activates the Python virtual environment that contains the Activate.ps1 script,
28
- and shows extra information about the activation as it executes.
29
-
30
- .Example
31
- Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
32
- Activates the Python virtual environment located in the specified location.
33
-
34
- .Example
35
- Activate.ps1 -Prompt "MyPython"
36
- Activates the Python virtual environment that contains the Activate.ps1 script,
37
- and prefixes the current prompt with the specified string (surrounded in
38
- parentheses) while the virtual environment is active.
39
-
40
- .Notes
41
- On Windows, it may be required to enable this Activate.ps1 script by setting the
42
- execution policy for the user. You can do this by issuing the following PowerShell
43
- command:
44
-
45
- PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
46
-
47
- For more information on Execution Policies:
48
- https://go.microsoft.com/fwlink/?LinkID=135170
49
-
50
- #>
51
- Param(
52
- [Parameter(Mandatory = $false)]
53
- [String]
54
- $VenvDir,
55
- [Parameter(Mandatory = $false)]
56
- [String]
57
- $Prompt
58
- )
59
-
60
- <# Function declarations --------------------------------------------------- #>
61
-
62
- <#
63
- .Synopsis
64
- Remove all shell session elements added by the Activate script, including the
65
- addition of the virtual environment's Python executable from the beginning of
66
- the PATH variable.
67
-
68
- .Parameter NonDestructive
69
- If present, do not remove this function from the global namespace for the
70
- session.
71
-
72
- #>
73
- function global:deactivate ([switch]$NonDestructive) {
74
- # Revert to original values
75
-
76
- # The prior prompt:
77
- if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
78
- Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
79
- Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
80
- }
81
-
82
- # The prior PYTHONHOME:
83
- if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
84
- Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
85
- Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
86
- }
87
-
88
- # The prior PATH:
89
- if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
90
- Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
91
- Remove-Item -Path Env:_OLD_VIRTUAL_PATH
92
- }
93
-
94
- # Just remove the VIRTUAL_ENV altogether:
95
- if (Test-Path -Path Env:VIRTUAL_ENV) {
96
- Remove-Item -Path env:VIRTUAL_ENV
97
- }
98
-
99
- # Just remove VIRTUAL_ENV_PROMPT altogether.
100
- if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
101
- Remove-Item -Path env:VIRTUAL_ENV_PROMPT
102
- }
103
-
104
- # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
105
- if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
106
- Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
107
- }
108
-
109
- # Leave deactivate function in the global namespace if requested:
110
- if (-not $NonDestructive) {
111
- Remove-Item -Path function:deactivate
112
- }
113
- }
114
-
115
- <#
116
- .Description
117
- Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
118
- given folder, and returns them in a map.
119
-
120
- For each line in the pyvenv.cfg file, if that line can be parsed into exactly
121
- two strings separated by `=` (with any amount of whitespace surrounding the =)
122
- then it is considered a `key = value` line. The left hand string is the key,
123
- the right hand is the value.
124
-
125
- If the value starts with a `'` or a `"` then the first and last character is
126
- stripped from the value before being captured.
127
-
128
- .Parameter ConfigDir
129
- Path to the directory that contains the `pyvenv.cfg` file.
130
- #>
131
- function Get-PyVenvConfig(
132
- [String]
133
- $ConfigDir
134
- ) {
135
- Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
136
-
137
- # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
138
- $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
139
-
140
- # An empty map will be returned if no config file is found.
141
- $pyvenvConfig = @{ }
142
-
143
- if ($pyvenvConfigPath) {
144
-
145
- Write-Verbose "File exists, parse `key = value` lines"
146
- $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
147
-
148
- $pyvenvConfigContent | ForEach-Object {
149
- $keyval = $PSItem -split "\s*=\s*", 2
150
- if ($keyval[0] -and $keyval[1]) {
151
- $val = $keyval[1]
152
-
153
- # Remove extraneous quotations around a string value.
154
- if ("'""".Contains($val.Substring(0, 1))) {
155
- $val = $val.Substring(1, $val.Length - 2)
156
- }
157
-
158
- $pyvenvConfig[$keyval[0]] = $val
159
- Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
160
- }
161
- }
162
- }
163
- return $pyvenvConfig
164
- }
165
-
166
-
167
- <# Begin Activate script --------------------------------------------------- #>
168
-
169
- # Determine the containing directory of this script
170
- $VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
171
- $VenvExecDir = Get-Item -Path $VenvExecPath
172
-
173
- Write-Verbose "Activation script is located in path: '$VenvExecPath'"
174
- Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
175
- Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
176
-
177
- # Set values required in priority: CmdLine, ConfigFile, Default
178
- # First, get the location of the virtual environment, it might not be
179
- # VenvExecDir if specified on the command line.
180
- if ($VenvDir) {
181
- Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
182
- }
183
- else {
184
- Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
185
- $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
186
- Write-Verbose "VenvDir=$VenvDir"
187
- }
188
-
189
- # Next, read the `pyvenv.cfg` file to determine any required value such
190
- # as `prompt`.
191
- $pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
192
-
193
- # Next, set the prompt from the command line, or the config file, or
194
- # just use the name of the virtual environment folder.
195
- if ($Prompt) {
196
- Write-Verbose "Prompt specified as argument, using '$Prompt'"
197
- }
198
- else {
199
- Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
200
- if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
201
- Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
202
- $Prompt = $pyvenvCfg['prompt'];
203
- }
204
- else {
205
- Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
206
- Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
207
- $Prompt = Split-Path -Path $venvDir -Leaf
208
- }
209
- }
210
-
211
- Write-Verbose "Prompt = '$Prompt'"
212
- Write-Verbose "VenvDir='$VenvDir'"
213
-
214
- # Deactivate any currently active virtual environment, but leave the
215
- # deactivate function in place.
216
- deactivate -nondestructive
217
-
218
- # Now set the environment variable VIRTUAL_ENV, used by many tools to determine
219
- # that there is an activated venv.
220
- $env:VIRTUAL_ENV = $VenvDir
221
-
222
- if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
223
-
224
- Write-Verbose "Setting prompt to '$Prompt'"
225
-
226
- # Set the prompt to include the env name
227
- # Make sure _OLD_VIRTUAL_PROMPT is global
228
- function global:_OLD_VIRTUAL_PROMPT { "" }
229
- Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
230
- New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
231
-
232
- function global:prompt {
233
- Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
234
- _OLD_VIRTUAL_PROMPT
235
- }
236
- $env:VIRTUAL_ENV_PROMPT = $Prompt
237
- }
238
-
239
- # Clear PYTHONHOME
240
- if (Test-Path -Path Env:PYTHONHOME) {
241
- Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
242
- Remove-Item -Path Env:PYTHONHOME
243
- }
244
-
245
- # Add the venv to the PATH
246
- Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
247
- $Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/activate DELETED
@@ -1,70 +0,0 @@
1
- # This file must be used with "source bin/activate" *from bash*
2
- # You cannot run it directly
3
-
4
- deactivate () {
5
- # reset old environment variables
6
- if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
7
- PATH="${_OLD_VIRTUAL_PATH:-}"
8
- export PATH
9
- unset _OLD_VIRTUAL_PATH
10
- fi
11
- if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
12
- PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
13
- export PYTHONHOME
14
- unset _OLD_VIRTUAL_PYTHONHOME
15
- fi
16
-
17
- # Call hash to forget past commands. Without forgetting
18
- # past commands the $PATH changes we made may not be respected
19
- hash -r 2> /dev/null
20
-
21
- if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
22
- PS1="${_OLD_VIRTUAL_PS1:-}"
23
- export PS1
24
- unset _OLD_VIRTUAL_PS1
25
- fi
26
-
27
- unset VIRTUAL_ENV
28
- unset VIRTUAL_ENV_PROMPT
29
- if [ ! "${1:-}" = "nondestructive" ] ; then
30
- # Self destruct!
31
- unset -f deactivate
32
- fi
33
- }
34
-
35
- # unset irrelevant variables
36
- deactivate nondestructive
37
-
38
- # on Windows, a path can contain colons and backslashes and has to be converted:
39
- if [ "${OSTYPE:-}" = "cygwin" ] || [ "${OSTYPE:-}" = "msys" ] ; then
40
- # transform D:\path\to\venv to /d/path/to/venv on MSYS
41
- # and to /cygdrive/d/path/to/venv on Cygwin
42
- export VIRTUAL_ENV=$(cygpath "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv")
43
- else
44
- # use the path as-is
45
- export VIRTUAL_ENV="/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv"
46
- fi
47
-
48
- _OLD_VIRTUAL_PATH="$PATH"
49
- PATH="$VIRTUAL_ENV/bin:$PATH"
50
- export PATH
51
-
52
- # unset PYTHONHOME if set
53
- # this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
54
- # could use `if (set -u; : $PYTHONHOME) ;` in bash
55
- if [ -n "${PYTHONHOME:-}" ] ; then
56
- _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
57
- unset PYTHONHOME
58
- fi
59
-
60
- if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
61
- _OLD_VIRTUAL_PS1="${PS1:-}"
62
- PS1="(.venv) ${PS1:-}"
63
- export PS1
64
- VIRTUAL_ENV_PROMPT="(.venv) "
65
- export VIRTUAL_ENV_PROMPT
66
- fi
67
-
68
- # Call hash to forget past commands. Without forgetting
69
- # past commands the $PATH changes we made may not be respected
70
- hash -r 2> /dev/null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/activate.csh DELETED
@@ -1,27 +0,0 @@
1
- # This file must be used with "source bin/activate.csh" *from csh*.
2
- # You cannot run it directly.
3
-
4
- # Created by Davide Di Blasi <[email protected]>.
5
- # Ported to Python 3.3 venv by Andrew Svetlov <[email protected]>
6
-
7
- alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
8
-
9
- # Unset irrelevant variables.
10
- deactivate nondestructive
11
-
12
- setenv VIRTUAL_ENV "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv"
13
-
14
- set _OLD_VIRTUAL_PATH="$PATH"
15
- setenv PATH "$VIRTUAL_ENV/bin:$PATH"
16
-
17
-
18
- set _OLD_VIRTUAL_PROMPT="$prompt"
19
-
20
- if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
21
- set prompt = "(.venv) $prompt"
22
- setenv VIRTUAL_ENV_PROMPT "(.venv) "
23
- endif
24
-
25
- alias pydoc python -m pydoc
26
-
27
- rehash
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/activate.fish DELETED
@@ -1,69 +0,0 @@
1
- # This file must be used with "source <venv>/bin/activate.fish" *from fish*
2
- # (https://fishshell.com/). You cannot run it directly.
3
-
4
- function deactivate -d "Exit virtual environment and return to normal shell environment"
5
- # reset old environment variables
6
- if test -n "$_OLD_VIRTUAL_PATH"
7
- set -gx PATH $_OLD_VIRTUAL_PATH
8
- set -e _OLD_VIRTUAL_PATH
9
- end
10
- if test -n "$_OLD_VIRTUAL_PYTHONHOME"
11
- set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
12
- set -e _OLD_VIRTUAL_PYTHONHOME
13
- end
14
-
15
- if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
16
- set -e _OLD_FISH_PROMPT_OVERRIDE
17
- # prevents error when using nested fish instances (Issue #93858)
18
- if functions -q _old_fish_prompt
19
- functions -e fish_prompt
20
- functions -c _old_fish_prompt fish_prompt
21
- functions -e _old_fish_prompt
22
- end
23
- end
24
-
25
- set -e VIRTUAL_ENV
26
- set -e VIRTUAL_ENV_PROMPT
27
- if test "$argv[1]" != "nondestructive"
28
- # Self-destruct!
29
- functions -e deactivate
30
- end
31
- end
32
-
33
- # Unset irrelevant variables.
34
- deactivate nondestructive
35
-
36
- set -gx VIRTUAL_ENV "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv"
37
-
38
- set -gx _OLD_VIRTUAL_PATH $PATH
39
- set -gx PATH "$VIRTUAL_ENV/bin" $PATH
40
-
41
- # Unset PYTHONHOME if set.
42
- if set -q PYTHONHOME
43
- set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
44
- set -e PYTHONHOME
45
- end
46
-
47
- if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
48
- # fish uses a function instead of an env var to generate the prompt.
49
-
50
- # Save the current fish_prompt function as the function _old_fish_prompt.
51
- functions -c fish_prompt _old_fish_prompt
52
-
53
- # With the original prompt function renamed, we can override with our own.
54
- function fish_prompt
55
- # Save the return status of the last command.
56
- set -l old_status $status
57
-
58
- # Output the venv prompt; color taken from the blue of the Python logo.
59
- printf "%s%s%s" (set_color 4B8BBE) "(.venv) " (set_color normal)
60
-
61
- # Restore the return status of the previous command.
62
- echo "exit $old_status" | .
63
- # Output the original/"old" prompt.
64
- _old_fish_prompt
65
- end
66
-
67
- set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
68
- set -gx VIRTUAL_ENV_PROMPT "(.venv) "
69
- end
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/chardetect DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from chardet.cli.chardetect import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/docx2txt DELETED
@@ -1,10 +0,0 @@
1
- #!/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12
2
-
3
- import docx2txt
4
-
5
- if __name__ == '__main__':
6
- import sys
7
- args = docx2txt.process_args()
8
- text = docx2txt.process(args.docx, args.img_dir)
9
- output = getattr(sys.stdout, 'buffer', sys.stdout)
10
- output.write(text.encode('utf-8'))
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/dumppdf.py DELETED
@@ -1,480 +0,0 @@
1
- #!/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12
2
- """Extract pdf structure in XML format"""
3
-
4
- import logging
5
- import os.path
6
- import re
7
- import sys
8
- from argparse import ArgumentParser
9
- from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, Union, cast
10
-
11
- import pdfminer
12
- from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback
13
- from pdfminer.pdfexceptions import (
14
- PDFIOError,
15
- PDFObjectNotFound,
16
- PDFTypeError,
17
- PDFValueError,
18
- )
19
- from pdfminer.pdfpage import PDFPage
20
- from pdfminer.pdfparser import PDFParser
21
- from pdfminer.pdftypes import PDFObjRef, PDFStream, resolve1, stream_value
22
- from pdfminer.psparser import LIT, PSKeyword, PSLiteral
23
- from pdfminer.utils import isnumber
24
-
25
- logging.basicConfig()
26
- logger = logging.getLogger(__name__)
27
-
28
- ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
29
-
30
-
31
- def escape(s: Union[str, bytes]) -> str:
32
- if isinstance(s, bytes):
33
- us = str(s, "latin-1")
34
- else:
35
- us = s
36
- return ESC_PAT.sub(lambda m: "&#%d;" % ord(m.group(0)), us)
37
-
38
-
39
- def dumpxml(out: TextIO, obj: object, codec: Optional[str] = None) -> None:
40
- if obj is None:
41
- out.write("<null />")
42
- return
43
-
44
- if isinstance(obj, dict):
45
- out.write('<dict size="%d">\n' % len(obj))
46
- for k, v in obj.items():
47
- out.write("<key>%s</key>\n" % k)
48
- out.write("<value>")
49
- dumpxml(out, v)
50
- out.write("</value>\n")
51
- out.write("</dict>")
52
- return
53
-
54
- if isinstance(obj, list):
55
- out.write('<list size="%d">\n' % len(obj))
56
- for v in obj:
57
- dumpxml(out, v)
58
- out.write("\n")
59
- out.write("</list>")
60
- return
61
-
62
- if isinstance(obj, (str, bytes)):
63
- out.write('<string size="%d">%s</string>' % (len(obj), escape(obj)))
64
- return
65
-
66
- if isinstance(obj, PDFStream):
67
- if codec == "raw":
68
- # Bug: writing bytes to text I/O. This will raise TypeError.
69
- out.write(obj.get_rawdata()) # type: ignore [arg-type]
70
- elif codec == "binary":
71
- # Bug: writing bytes to text I/O. This will raise TypeError.
72
- out.write(obj.get_data()) # type: ignore [arg-type]
73
- else:
74
- out.write("<stream>\n<props>\n")
75
- dumpxml(out, obj.attrs)
76
- out.write("\n</props>\n")
77
- if codec == "text":
78
- data = obj.get_data()
79
- out.write('<data size="%d">%s</data>\n' % (len(data), escape(data)))
80
- out.write("</stream>")
81
- return
82
-
83
- if isinstance(obj, PDFObjRef):
84
- out.write('<ref id="%d" />' % obj.objid)
85
- return
86
-
87
- if isinstance(obj, PSKeyword):
88
- # Likely bug: obj.name is bytes, not str
89
- out.write("<keyword>%s</keyword>" % obj.name) # type: ignore [str-bytes-safe]
90
- return
91
-
92
- if isinstance(obj, PSLiteral):
93
- # Likely bug: obj.name may be bytes, not str
94
- out.write("<literal>%s</literal>" % obj.name) # type: ignore [str-bytes-safe]
95
- return
96
-
97
- if isnumber(obj):
98
- out.write("<number>%s</number>" % obj)
99
- return
100
-
101
- raise PDFTypeError(obj)
102
-
103
-
104
- def dumptrailers(
105
- out: TextIO,
106
- doc: PDFDocument,
107
- show_fallback_xref: bool = False,
108
- ) -> None:
109
- for xref in doc.xrefs:
110
- if not isinstance(xref, PDFXRefFallback) or show_fallback_xref:
111
- out.write("<trailer>\n")
112
- dumpxml(out, xref.get_trailer())
113
- out.write("\n</trailer>\n\n")
114
- no_xrefs = all(isinstance(xref, PDFXRefFallback) for xref in doc.xrefs)
115
- if no_xrefs and not show_fallback_xref:
116
- msg = (
117
- "This PDF does not have an xref. Use --show-fallback-xref if "
118
- "you want to display the content of a fallback xref that "
119
- "contains all objects."
120
- )
121
- logger.warning(msg)
122
-
123
-
124
- def dumpallobjs(
125
- out: TextIO,
126
- doc: PDFDocument,
127
- codec: Optional[str] = None,
128
- show_fallback_xref: bool = False,
129
- ) -> None:
130
- visited = set()
131
- out.write("<pdf>")
132
- for xref in doc.xrefs:
133
- for objid in xref.get_objids():
134
- if objid in visited:
135
- continue
136
- visited.add(objid)
137
- try:
138
- obj = doc.getobj(objid)
139
- if obj is None:
140
- continue
141
- out.write('<object id="%d">\n' % objid)
142
- dumpxml(out, obj, codec=codec)
143
- out.write("\n</object>\n\n")
144
- except PDFObjectNotFound as e:
145
- print("not found: %r" % e)
146
- dumptrailers(out, doc, show_fallback_xref)
147
- out.write("</pdf>")
148
-
149
-
150
- def dumpoutline(
151
- outfp: TextIO,
152
- fname: str,
153
- objids: Any,
154
- pagenos: Container[int],
155
- password: str = "",
156
- dumpall: bool = False,
157
- codec: Optional[str] = None,
158
- extractdir: Optional[str] = None,
159
- ) -> None:
160
- fp = open(fname, "rb")
161
- parser = PDFParser(fp)
162
- doc = PDFDocument(parser, password)
163
- pages = {
164
- page.pageid: pageno
165
- for (pageno, page) in enumerate(PDFPage.create_pages(doc), 1)
166
- }
167
-
168
- def resolve_dest(dest: object) -> Any:
169
- if isinstance(dest, (str, bytes)):
170
- dest = resolve1(doc.get_dest(dest))
171
- elif isinstance(dest, PSLiteral):
172
- dest = resolve1(doc.get_dest(dest.name))
173
- if isinstance(dest, dict):
174
- dest = dest["D"]
175
- if isinstance(dest, PDFObjRef):
176
- dest = dest.resolve()
177
- return dest
178
-
179
- try:
180
- outlines = doc.get_outlines()
181
- outfp.write("<outlines>\n")
182
- for level, title, dest, a, se in outlines:
183
- pageno = None
184
- if dest:
185
- dest = resolve_dest(dest)
186
- pageno = pages[dest[0].objid]
187
- elif a:
188
- action = a
189
- if isinstance(action, dict):
190
- subtype = action.get("S")
191
- if subtype and repr(subtype) == "/'GoTo'" and action.get("D"):
192
- dest = resolve_dest(action["D"])
193
- pageno = pages[dest[0].objid]
194
- s = escape(title)
195
- outfp.write(f'<outline level="{level!r}" title="{s}">\n')
196
- if dest is not None:
197
- outfp.write("<dest>")
198
- dumpxml(outfp, dest)
199
- outfp.write("</dest>\n")
200
- if pageno is not None:
201
- outfp.write("<pageno>%r</pageno>\n" % pageno)
202
- outfp.write("</outline>\n")
203
- outfp.write("</outlines>\n")
204
- except PDFNoOutlines:
205
- pass
206
- parser.close()
207
- fp.close()
208
-
209
-
210
- LITERAL_FILESPEC = LIT("Filespec")
211
- LITERAL_EMBEDDEDFILE = LIT("EmbeddedFile")
212
-
213
-
214
- def extractembedded(fname: str, password: str, extractdir: str) -> None:
215
- def extract1(objid: int, obj: Dict[str, Any]) -> None:
216
- filename = os.path.basename(obj.get("UF") or cast(bytes, obj.get("F")).decode())
217
- fileref = obj["EF"].get("UF") or obj["EF"].get("F")
218
- fileobj = doc.getobj(fileref.objid)
219
- if not isinstance(fileobj, PDFStream):
220
- error_msg = (
221
- "unable to process PDF: reference for %r is not a "
222
- "PDFStream" % filename
223
- )
224
- raise PDFValueError(error_msg)
225
- if fileobj.get("Type") is not LITERAL_EMBEDDEDFILE:
226
- raise PDFValueError(
227
- "unable to process PDF: reference for %r "
228
- "is not an EmbeddedFile" % (filename),
229
- )
230
- path = os.path.join(extractdir, "%.6d-%s" % (objid, filename))
231
- if os.path.exists(path):
232
- raise PDFIOError("file exists: %r" % path)
233
- print("extracting: %r" % path)
234
- os.makedirs(os.path.dirname(path), exist_ok=True)
235
- out = open(path, "wb")
236
- out.write(fileobj.get_data())
237
- out.close()
238
-
239
- with open(fname, "rb") as fp:
240
- parser = PDFParser(fp)
241
- doc = PDFDocument(parser, password)
242
- extracted_objids = set()
243
- for xref in doc.xrefs:
244
- for objid in xref.get_objids():
245
- obj = doc.getobj(objid)
246
- if (
247
- objid not in extracted_objids
248
- and isinstance(obj, dict)
249
- and obj.get("Type") is LITERAL_FILESPEC
250
- ):
251
- extracted_objids.add(objid)
252
- extract1(objid, obj)
253
-
254
-
255
- def dumppdf(
256
- outfp: TextIO,
257
- fname: str,
258
- objids: Iterable[int],
259
- pagenos: Container[int],
260
- password: str = "",
261
- dumpall: bool = False,
262
- codec: Optional[str] = None,
263
- extractdir: Optional[str] = None,
264
- show_fallback_xref: bool = False,
265
- ) -> None:
266
- fp = open(fname, "rb")
267
- parser = PDFParser(fp)
268
- doc = PDFDocument(parser, password)
269
- if objids:
270
- for objid in objids:
271
- obj = doc.getobj(objid)
272
- dumpxml(outfp, obj, codec=codec)
273
- if pagenos:
274
- for pageno, page in enumerate(PDFPage.create_pages(doc)):
275
- if pageno in pagenos:
276
- if codec:
277
- for obj in page.contents:
278
- obj = stream_value(obj)
279
- dumpxml(outfp, obj, codec=codec)
280
- else:
281
- dumpxml(outfp, page.attrs)
282
- if dumpall:
283
- dumpallobjs(outfp, doc, codec, show_fallback_xref)
284
- if (not objids) and (not pagenos) and (not dumpall):
285
- dumptrailers(outfp, doc, show_fallback_xref)
286
- fp.close()
287
- if codec not in ("raw", "binary"):
288
- outfp.write("\n")
289
-
290
-
291
- def create_parser() -> ArgumentParser:
292
- parser = ArgumentParser(description=__doc__, add_help=True)
293
- parser.add_argument(
294
- "files",
295
- type=str,
296
- default=None,
297
- nargs="+",
298
- help="One or more paths to PDF files.",
299
- )
300
-
301
- parser.add_argument(
302
- "--version",
303
- "-v",
304
- action="version",
305
- version=f"pdfminer.six v{pdfminer.__version__}",
306
- )
307
- parser.add_argument(
308
- "--debug",
309
- "-d",
310
- default=False,
311
- action="store_true",
312
- help="Use debug logging level.",
313
- )
314
- procedure_parser = parser.add_mutually_exclusive_group()
315
- procedure_parser.add_argument(
316
- "--extract-toc",
317
- "-T",
318
- default=False,
319
- action="store_true",
320
- help="Extract structure of outline",
321
- )
322
- procedure_parser.add_argument(
323
- "--extract-embedded",
324
- "-E",
325
- type=str,
326
- help="Extract embedded files",
327
- )
328
-
329
- parse_params = parser.add_argument_group(
330
- "Parser",
331
- description="Used during PDF parsing",
332
- )
333
- parse_params.add_argument(
334
- "--page-numbers",
335
- type=int,
336
- default=None,
337
- nargs="+",
338
- help="A space-seperated list of page numbers to parse.",
339
- )
340
- parse_params.add_argument(
341
- "--pagenos",
342
- "-p",
343
- type=str,
344
- help="A comma-separated list of page numbers to parse. Included for "
345
- "legacy applications, use --page-numbers for more idiomatic "
346
- "argument entry.",
347
- )
348
- parse_params.add_argument(
349
- "--objects",
350
- "-i",
351
- type=str,
352
- help="Comma separated list of object numbers to extract",
353
- )
354
- parse_params.add_argument(
355
- "--all",
356
- "-a",
357
- default=False,
358
- action="store_true",
359
- help="If the structure of all objects should be extracted",
360
- )
361
- parse_params.add_argument(
362
- "--show-fallback-xref",
363
- action="store_true",
364
- help="Additionally show the fallback xref. Use this if the PDF "
365
- "has zero or only invalid xref's. This setting is ignored if "
366
- "--extract-toc or --extract-embedded is used.",
367
- )
368
- parse_params.add_argument(
369
- "--password",
370
- "-P",
371
- type=str,
372
- default="",
373
- help="The password to use for decrypting PDF file.",
374
- )
375
-
376
- output_params = parser.add_argument_group(
377
- "Output",
378
- description="Used during output generation.",
379
- )
380
- output_params.add_argument(
381
- "--outfile",
382
- "-o",
383
- type=str,
384
- default="-",
385
- help='Path to file where output is written. Or "-" (default) to '
386
- "write to stdout.",
387
- )
388
- codec_parser = output_params.add_mutually_exclusive_group()
389
- codec_parser.add_argument(
390
- "--raw-stream",
391
- "-r",
392
- default=False,
393
- action="store_true",
394
- help="Write stream objects without encoding",
395
- )
396
- codec_parser.add_argument(
397
- "--binary-stream",
398
- "-b",
399
- default=False,
400
- action="store_true",
401
- help="Write stream objects with binary encoding",
402
- )
403
- codec_parser.add_argument(
404
- "--text-stream",
405
- "-t",
406
- default=False,
407
- action="store_true",
408
- help="Write stream objects as plain text",
409
- )
410
-
411
- return parser
412
-
413
-
414
- def main(argv: Optional[List[str]] = None) -> None:
415
- parser = create_parser()
416
- args = parser.parse_args(args=argv)
417
-
418
- if args.debug:
419
- logging.getLogger().setLevel(logging.DEBUG)
420
-
421
- if args.outfile == "-":
422
- outfp = sys.stdout
423
- else:
424
- outfp = open(args.outfile, "w")
425
-
426
- if args.objects:
427
- objids = [int(x) for x in args.objects.split(",")]
428
- else:
429
- objids = []
430
-
431
- if args.page_numbers:
432
- pagenos = {x - 1 for x in args.page_numbers}
433
- elif args.pagenos:
434
- pagenos = {int(x) - 1 for x in args.pagenos.split(",")}
435
- else:
436
- pagenos = set()
437
-
438
- password = args.password
439
-
440
- if args.raw_stream:
441
- codec: Optional[str] = "raw"
442
- elif args.binary_stream:
443
- codec = "binary"
444
- elif args.text_stream:
445
- codec = "text"
446
- else:
447
- codec = None
448
-
449
- for fname in args.files:
450
- if args.extract_toc:
451
- dumpoutline(
452
- outfp,
453
- fname,
454
- objids,
455
- pagenos,
456
- password=password,
457
- dumpall=args.all,
458
- codec=codec,
459
- extractdir=None,
460
- )
461
- elif args.extract_embedded:
462
- extractembedded(fname, password=password, extractdir=args.extract_embedded)
463
- else:
464
- dumppdf(
465
- outfp,
466
- fname,
467
- objids,
468
- pagenos,
469
- password=password,
470
- dumpall=args.all,
471
- codec=codec,
472
- extractdir=None,
473
- show_fallback_xref=args.show_fallback_xref,
474
- )
475
-
476
- outfp.close()
477
-
478
-
479
- if __name__ == "__main__":
480
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/email_validator DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from email_validator.__main__ import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/f2py DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from numpy.f2py.f2py2e import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/flask DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from flask.cli import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/huggingface-cli DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from huggingface_hub.commands.huggingface_cli import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/jsonschema DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from jsonschema.cli import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/markdown-it DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from markdown_it.cli.parse import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/nltk DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from nltk.cli import cli
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(cli())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/normalizer DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from charset_normalizer import cli
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(cli.cli_detect())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/numpy-config DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from numpy._configtool import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/pdf2txt.py DELETED
@@ -1,323 +0,0 @@
1
- #!/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12
2
- """A command line tool for extracting text and images from PDF and
3
- output it to plain text, html, xml or tags.
4
- """
5
-
6
- import argparse
7
- import logging
8
- import sys
9
- from typing import Any, Container, Iterable, List, Optional
10
-
11
- import pdfminer.high_level
12
- from pdfminer.layout import LAParams
13
- from pdfminer.pdfexceptions import PDFValueError
14
- from pdfminer.utils import AnyIO
15
-
16
- logging.basicConfig()
17
-
18
- OUTPUT_TYPES = ((".htm", "html"), (".html", "html"), (".xml", "xml"), (".tag", "tag"))
19
-
20
-
21
- def float_or_disabled(x: str) -> Optional[float]:
22
- if x.lower().strip() == "disabled":
23
- return None
24
- try:
25
- return float(x)
26
- except ValueError:
27
- raise argparse.ArgumentTypeError(f"invalid float value: {x}")
28
-
29
-
30
- def extract_text(
31
- files: Iterable[str] = [],
32
- outfile: str = "-",
33
- laparams: Optional[LAParams] = None,
34
- output_type: str = "text",
35
- codec: str = "utf-8",
36
- strip_control: bool = False,
37
- maxpages: int = 0,
38
- page_numbers: Optional[Container[int]] = None,
39
- password: str = "",
40
- scale: float = 1.0,
41
- rotation: int = 0,
42
- layoutmode: str = "normal",
43
- output_dir: Optional[str] = None,
44
- debug: bool = False,
45
- disable_caching: bool = False,
46
- **kwargs: Any,
47
- ) -> AnyIO:
48
- if not files:
49
- raise PDFValueError("Must provide files to work upon!")
50
-
51
- if output_type == "text" and outfile != "-":
52
- for override, alttype in OUTPUT_TYPES:
53
- if outfile.endswith(override):
54
- output_type = alttype
55
-
56
- if outfile == "-":
57
- outfp: AnyIO = sys.stdout
58
- if sys.stdout.encoding is not None:
59
- codec = "utf-8"
60
- else:
61
- outfp = open(outfile, "wb")
62
-
63
- for fname in files:
64
- with open(fname, "rb") as fp:
65
- pdfminer.high_level.extract_text_to_fp(fp, **locals())
66
- return outfp
67
-
68
-
69
- def create_parser() -> argparse.ArgumentParser:
70
- parser = argparse.ArgumentParser(description=__doc__, add_help=True)
71
- parser.add_argument(
72
- "files",
73
- type=str,
74
- default=None,
75
- nargs="+",
76
- help="One or more paths to PDF files.",
77
- )
78
-
79
- parser.add_argument(
80
- "--version",
81
- "-v",
82
- action="version",
83
- version=f"pdfminer.six v{pdfminer.__version__}",
84
- )
85
- parser.add_argument(
86
- "--debug",
87
- "-d",
88
- default=False,
89
- action="store_true",
90
- help="Use debug logging level.",
91
- )
92
- parser.add_argument(
93
- "--disable-caching",
94
- "-C",
95
- default=False,
96
- action="store_true",
97
- help="If caching or resources, such as fonts, should be disabled.",
98
- )
99
-
100
- parse_params = parser.add_argument_group(
101
- "Parser",
102
- description="Used during PDF parsing",
103
- )
104
- parse_params.add_argument(
105
- "--page-numbers",
106
- type=int,
107
- default=None,
108
- nargs="+",
109
- help="A space-seperated list of page numbers to parse.",
110
- )
111
- parse_params.add_argument(
112
- "--pagenos",
113
- "-p",
114
- type=str,
115
- help="A comma-separated list of page numbers to parse. "
116
- "Included for legacy applications, use --page-numbers "
117
- "for more idiomatic argument entry.",
118
- )
119
- parse_params.add_argument(
120
- "--maxpages",
121
- "-m",
122
- type=int,
123
- default=0,
124
- help="The maximum number of pages to parse.",
125
- )
126
- parse_params.add_argument(
127
- "--password",
128
- "-P",
129
- type=str,
130
- default="",
131
- help="The password to use for decrypting PDF file.",
132
- )
133
- parse_params.add_argument(
134
- "--rotation",
135
- "-R",
136
- default=0,
137
- type=int,
138
- help="The number of degrees to rotate the PDF "
139
- "before other types of processing.",
140
- )
141
-
142
- la_params = LAParams() # will be used for defaults
143
- la_param_group = parser.add_argument_group(
144
- "Layout analysis",
145
- description="Used during layout analysis.",
146
- )
147
- la_param_group.add_argument(
148
- "--no-laparams",
149
- "-n",
150
- default=False,
151
- action="store_true",
152
- help="If layout analysis parameters should be ignored.",
153
- )
154
- la_param_group.add_argument(
155
- "--detect-vertical",
156
- "-V",
157
- default=la_params.detect_vertical,
158
- action="store_true",
159
- help="If vertical text should be considered during layout analysis",
160
- )
161
- la_param_group.add_argument(
162
- "--line-overlap",
163
- type=float,
164
- default=la_params.line_overlap,
165
- help="If two characters have more overlap than this they "
166
- "are considered to be on the same line. The overlap is specified "
167
- "relative to the minimum height of both characters.",
168
- )
169
- la_param_group.add_argument(
170
- "--char-margin",
171
- "-M",
172
- type=float,
173
- default=la_params.char_margin,
174
- help="If two characters are closer together than this margin they "
175
- "are considered to be part of the same line. The margin is "
176
- "specified relative to the width of the character.",
177
- )
178
- la_param_group.add_argument(
179
- "--word-margin",
180
- "-W",
181
- type=float,
182
- default=la_params.word_margin,
183
- help="If two characters on the same line are further apart than this "
184
- "margin then they are considered to be two separate words, and "
185
- "an intermediate space will be added for readability. The margin "
186
- "is specified relative to the width of the character.",
187
- )
188
- la_param_group.add_argument(
189
- "--line-margin",
190
- "-L",
191
- type=float,
192
- default=la_params.line_margin,
193
- help="If two lines are close together they are considered to "
194
- "be part of the same paragraph. The margin is specified "
195
- "relative to the height of a line.",
196
- )
197
- la_param_group.add_argument(
198
- "--boxes-flow",
199
- "-F",
200
- type=float_or_disabled,
201
- default=la_params.boxes_flow,
202
- help="Specifies how much a horizontal and vertical position of a "
203
- "text matters when determining the order of lines. The value "
204
- "should be within the range of -1.0 (only horizontal position "
205
- "matters) to +1.0 (only vertical position matters). You can also "
206
- "pass `disabled` to disable advanced layout analysis, and "
207
- "instead return text based on the position of the bottom left "
208
- "corner of the text box.",
209
- )
210
- la_param_group.add_argument(
211
- "--all-texts",
212
- "-A",
213
- default=la_params.all_texts,
214
- action="store_true",
215
- help="If layout analysis should be performed on text in figures.",
216
- )
217
-
218
- output_params = parser.add_argument_group(
219
- "Output",
220
- description="Used during output generation.",
221
- )
222
- output_params.add_argument(
223
- "--outfile",
224
- "-o",
225
- type=str,
226
- default="-",
227
- help="Path to file where output is written. "
228
- 'Or "-" (default) to write to stdout.',
229
- )
230
- output_params.add_argument(
231
- "--output_type",
232
- "-t",
233
- type=str,
234
- default="text",
235
- help="Type of output to generate {text,html,xml,tag}.",
236
- )
237
- output_params.add_argument(
238
- "--codec",
239
- "-c",
240
- type=str,
241
- default="utf-8",
242
- help="Text encoding to use in output file.",
243
- )
244
- output_params.add_argument(
245
- "--output-dir",
246
- "-O",
247
- default=None,
248
- help="The output directory to put extracted images in. If not given, "
249
- "images are not extracted.",
250
- )
251
- output_params.add_argument(
252
- "--layoutmode",
253
- "-Y",
254
- default="normal",
255
- type=str,
256
- help="Type of layout to use when generating html "
257
- "{normal,exact,loose}. If normal,each line is"
258
- " positioned separately in the html. If exact"
259
- ", each character is positioned separately in"
260
- " the html. If loose, same result as normal "
261
- "but with an additional newline after each "
262
- "text line. Only used when output_type is html.",
263
- )
264
- output_params.add_argument(
265
- "--scale",
266
- "-s",
267
- type=float,
268
- default=1.0,
269
- help="The amount of zoom to use when generating html file. "
270
- "Only used when output_type is html.",
271
- )
272
- output_params.add_argument(
273
- "--strip-control",
274
- "-S",
275
- default=False,
276
- action="store_true",
277
- help="Remove control statement from text. "
278
- "Only used when output_type is xml.",
279
- )
280
-
281
- return parser
282
-
283
-
284
- def parse_args(args: Optional[List[str]]) -> argparse.Namespace:
285
- parsed_args = create_parser().parse_args(args=args)
286
-
287
- # Propagate parsed layout parameters to LAParams object
288
- if parsed_args.no_laparams:
289
- parsed_args.laparams = None
290
- else:
291
- parsed_args.laparams = LAParams(
292
- line_overlap=parsed_args.line_overlap,
293
- char_margin=parsed_args.char_margin,
294
- line_margin=parsed_args.line_margin,
295
- word_margin=parsed_args.word_margin,
296
- boxes_flow=parsed_args.boxes_flow,
297
- detect_vertical=parsed_args.detect_vertical,
298
- all_texts=parsed_args.all_texts,
299
- )
300
-
301
- if parsed_args.page_numbers:
302
- parsed_args.page_numbers = {x - 1 for x in parsed_args.page_numbers}
303
-
304
- if parsed_args.pagenos:
305
- parsed_args.page_numbers = {int(x) - 1 for x in parsed_args.pagenos.split(",")}
306
-
307
- if parsed_args.output_type == "text" and parsed_args.outfile != "-":
308
- for override, alttype in OUTPUT_TYPES:
309
- if parsed_args.outfile.endswith(override):
310
- parsed_args.output_type = alttype
311
-
312
- return parsed_args
313
-
314
-
315
- def main(args: Optional[List[str]] = None) -> int:
316
- parsed_args = parse_args(args)
317
- outfp = extract_text(**vars(parsed_args))
318
- outfp.close()
319
- return 0
320
-
321
-
322
- if __name__ == "__main__":
323
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/pip DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from pip._internal.cli.main import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/pip3 DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from pip._internal.cli.main import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/pip3.12 DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from pip._internal.cli.main import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/pygmentize DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from pygments.cmdline import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/pymupdf DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from pymupdf.__main__ import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/pyresparser DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from pyresparser.command_line import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/python DELETED
@@ -1 +0,0 @@
1
- python3.12
 
 
.venv/bin/python3 DELETED
@@ -1 +0,0 @@
1
- python3.12
 
 
.venv/bin/python3.12 DELETED
@@ -1 +0,0 @@
1
- /Library/Frameworks/Python.framework/Versions/3.12/bin/python3.12
 
 
.venv/bin/spacy DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from spacy.cli import setup_cli
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(setup_cli())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/tiny-agents DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from huggingface_hub.inference._mcp.cli import app
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(app())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/tqdm DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from tqdm.cli import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/typer DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from typer.cli import main
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(main())
 
 
 
 
 
 
 
 
 
 
 
.venv/bin/weasel DELETED
@@ -1,10 +0,0 @@
1
- #!/bin/sh
2
- '''exec' "/Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv/bin/python3.12" "$0" "$@"
3
- ' '''
4
- # -*- coding: utf-8 -*-
5
- import re
6
- import sys
7
- from weasel.cli import app
8
- if __name__ == '__main__':
9
- sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
10
- sys.exit(app())
 
 
 
 
 
 
 
 
 
 
 
.venv/pyvenv.cfg DELETED
@@ -1,5 +0,0 @@
1
- home = /Library/Frameworks/Python.framework/Versions/3.12/bin
2
- include-system-site-packages = false
3
- version = 3.12.6
4
- executable = /Library/Frameworks/Python.framework/Versions/3.12/bin/python3.12
5
- command = /Library/Frameworks/Python.framework/Versions/3.12/bin/python3 -m venv /Users/husseinelsaadi/Documents/Data Science USAL/Spring 24-25/FYP - Codingo/Codingo/.venv