Dominik Macháček
commited on
Commit
·
e62fba3
1
Parent(s):
a365074
line packet commited
Browse filesoriginally from ELITR -- TODO -- change comments
- line_packet.py +94 -0
line_packet.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
"""Functions for sending and receiving individual lines of text over a socket.
|
4 |
+
|
5 |
+
Used by marian-server-server.py to communicate with the Marian worker.
|
6 |
+
|
7 |
+
A line is transmitted using one or more fixed-size packets of UTF-8 bytes
|
8 |
+
containing:
|
9 |
+
|
10 |
+
- Zero or more bytes of UTF-8, excluding \n and \0, followed by
|
11 |
+
|
12 |
+
- Zero or more \0 bytes as required to pad the packet to PACKET_SIZE
|
13 |
+
|
14 |
+
"""
|
15 |
+
|
16 |
+
PACKET_SIZE = 65536
|
17 |
+
|
18 |
+
|
19 |
+
def send_one_line(socket, text):
|
20 |
+
"""Sends a line of text over the given socket.
|
21 |
+
|
22 |
+
The 'text' argument should contain a single line of text (line break
|
23 |
+
characters are optional). Line boundaries are determined by Python's
|
24 |
+
str.splitlines() function [1]. We also count '\0' as a line terminator.
|
25 |
+
If 'text' contains multiple lines then only the first will be sent.
|
26 |
+
|
27 |
+
If the send fails then an exception will be raised.
|
28 |
+
|
29 |
+
[1] https://docs.python.org/3.5/library/stdtypes.html#str.splitlines
|
30 |
+
|
31 |
+
Args:
|
32 |
+
socket: a socket object.
|
33 |
+
text: string containing a line of text for transmission.
|
34 |
+
"""
|
35 |
+
text.replace('\0', '\n')
|
36 |
+
lines = text.splitlines()
|
37 |
+
first_line = '' if len(lines) == 0 else lines[0]
|
38 |
+
# TODO Is there a better way of handling bad input than 'replace'?
|
39 |
+
data = first_line.encode('utf-8', errors='replace') + b'\n\0'
|
40 |
+
for offset in range(0, len(data), PACKET_SIZE):
|
41 |
+
bytes_remaining = len(data) - offset
|
42 |
+
if bytes_remaining < PACKET_SIZE:
|
43 |
+
padding_length = PACKET_SIZE - bytes_remaining
|
44 |
+
packet = data[offset:] + b'\0' * padding_length
|
45 |
+
else:
|
46 |
+
packet = data[offset:offset+PACKET_SIZE]
|
47 |
+
socket.sendall(packet)
|
48 |
+
|
49 |
+
|
50 |
+
def receive_one_line(socket):
|
51 |
+
"""Receives a line of text from the given socket.
|
52 |
+
|
53 |
+
This function will (attempt to) receive a single line of text. If data is
|
54 |
+
currently unavailable then it will block until data becomes available or
|
55 |
+
the sender has closed the connection (in which case it will return an
|
56 |
+
empty string).
|
57 |
+
|
58 |
+
The string should not contain any newline characters, but if it does then
|
59 |
+
only the first line will be returned.
|
60 |
+
|
61 |
+
Args:
|
62 |
+
socket: a socket object.
|
63 |
+
|
64 |
+
Returns:
|
65 |
+
A string representing a single line with a terminating newline or
|
66 |
+
None if the connection has been closed.
|
67 |
+
"""
|
68 |
+
data = b''
|
69 |
+
while True:
|
70 |
+
packet = socket.recv(PACKET_SIZE)
|
71 |
+
if not packet: # Connection has been closed.
|
72 |
+
return None
|
73 |
+
data += packet
|
74 |
+
if b'\0' in packet:
|
75 |
+
break
|
76 |
+
# TODO Is there a better way of handling bad input than 'replace'?
|
77 |
+
text = data.decode('utf-8', errors='replace').strip('\0')
|
78 |
+
lines = text.split('\n')
|
79 |
+
return lines[0] + '\n'
|
80 |
+
|
81 |
+
|
82 |
+
def receive_lines(socket):
|
83 |
+
try:
|
84 |
+
data = socket.recv(PACKET_SIZE)
|
85 |
+
except BlockingIOError:
|
86 |
+
return []
|
87 |
+
if data is None: # Connection has been closed.
|
88 |
+
return None
|
89 |
+
# TODO Is there a better way of handling bad input than 'replace'?
|
90 |
+
text = data.decode('utf-8', errors='replace').strip('\0')
|
91 |
+
lines = text.split('\n')
|
92 |
+
if len(lines)==1 and not lines[0]:
|
93 |
+
return None
|
94 |
+
return lines
|