Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

# -*- encoding: utf-8 -*- 

import requests 

import ftplib 

import base64 

from requests.compat import urlparse 

from requests.hooks import dispatch_hook 

from requests import Response, codes 

from io import BytesIO 

import cgi 

import os 

import socket 

 

from requests.exceptions import ConnectionError, ConnectTimeout, ReadTimeout 

from requests.exceptions import RequestException 

from requests.utils import prepend_scheme_if_needed 

 

class FTPSession(requests.Session): 

def __init__(self): 

super(FTPSession, self).__init__() 

self.mount('ftp://', FTPAdapter()) 

 

# Define our helper methods. 

def list(self, url, **kwargs): 

'''Sends an FTP LIST. Returns a Response object.''' 

return self.request('LIST', url, **kwargs) 

 

def retr(self, url, **kwargs): 

'''Sends an FTP RETR for a given url. Returns a Response object whose 

content field contains the binary data.''' 

return self.request('RETR', url, **kwargs) 

 

def stor(self, url, files=None, **kwargs): 

'''Sends an FTP STOR to a given URL. Returns a Response object. Expects 

to be given one file by the standard Requests method. The remote 

filename will be given by the URL provided.''' 

return self.request('STOR', url, files=files, **kwargs) 

 

def nlst(self, url, **kwargs): 

'''Sends an FTP NLST. Returns a Response object.''' 

return self.request('NLST', url, **kwargs) 

 

def size(self, url, **kwargs): 

'''Sends an FTP SIZE. Returns a decimal number.''' 

return self.request('SIZE', url, **kwargs) 

 

 

def monkeypatch_session(): 

'''Monkeypatch Requests Sessions to provide all the helper 

methods needed for use with FTP.''' 

 

requests.Session = FTPSession 

return 

 

 

def parse_multipart_files(request): 

'''Given a prepared reqest, return a file-like object containing the 

original data. This is pretty hacky.''' 

# Start by grabbing the pdict. 

_, pdict = cgi.parse_header(request.headers['Content-Type']) 

 

# Now, wrap the multipart data in a BytesIO buffer. This is annoying. 

buf = BytesIO() 

buf.write(request.body) 

buf.seek(0) 

 

# Parse the data. Simply take the first file. 

data = cgi.parse_multipart(buf, pdict) 

_, filedata = data.popitem() 

buf.close() 

 

# Get a BytesIO now, and write the file into it. 

buf = BytesIO() 

buf.write(''.join(filedata)) 

buf.seek(0) 

 

return buf 

 

 

def data_callback_factory(variable): 

'''Returns a callback suitable for use by the FTP library. This callback 

will repeatedly save data into the variable provided to this function. This 

variable should be a file-like structure.''' 

def callback(data): 

variable.write(data) 

if hasattr(variable, "content_len"): 

variable.content_len += len(data) 

else: 

variable.content_len = len(data) 

 

return 

 

return callback 

 

 

def build_text_response(request, data, code): 

'''Build a response for textual data.''' 

return build_response(request, data, code, 'ascii') 

 

 

def build_binary_response(request, data, code): 

'''Build a response for data whose encoding is unknown.''' 

return build_response(request, data, code, None) 

 

 

def build_response(request, data, code, encoding): 

'''Builds a response object from the data returned by ftplib, using the 

specified encoding.''' 

response = Response() 

 

response.encoding = encoding 

 

# Fill in some useful fields. 

response.raw = data 

response.url = request.url 

response.request = request 

response.status_code = int(code.split()[0]) 

if hasattr(data, "content_len"): 

response.headers['Content-Length'] = str(data.content_len) 

 

# Make sure to seek the file-like raw object back to the start. 

response.raw.seek(0) 

 

# Run the response hook. 

response = dispatch_hook('response', request.hooks, response) 

return response 

 

 

class FTPAdapter(requests.adapters.BaseAdapter): 

'''A Requests Transport Adapter that handles FTP urls.''' 

def __init__(self): 

super(FTPAdapter, self).__init__() 

 

# Build a dictionary keyed off the methods we support in upper case. 

# The values of this dictionary should be the functions we use to 

# send the specific queries. 

self.func_table = {'LIST': self.list, 

'RETR': self.retr, 

'STOR': self.stor, 

'NLST': self.nlst, 

'SIZE': self.size, 

'HEAD': self.head, 

'GET': self.get,} 

 

def send(self, request, **kwargs): 

'''Sends a PreparedRequest object over FTP. Returns a response object. 

''' 

# Get the authentication from the prepared request, if any. 

auth = self.get_username_password_from_header(request) 

 

# Next, get the host and the path. 

scheme, host, port, path = self.get_host_and_path_from_url(request) 

 

# Sort out the timeout. 

timeout = kwargs.get('timeout', None) 

 

# Look for a proxy 

proxies = kwargs.get('proxies', {}) 

proxy = proxies.get(scheme) 

 

# If there is a proxy, then we actually want to make a HTTP request 

if proxy: 

return self.send_proxy(request, proxy, **kwargs) 

 

# Establish the connection and login if needed. 

self.conn = ftplib.FTP() 

 

# Use a flag to distinguish read vs connection timeouts, and a flat set 

# of except blocks instead of a nested try-except, because python 3 

# exception chaining makes things weird 

connected = False 

 

try: 

self.conn.connect(host, port, timeout) 

connected = True 

 

if auth is not None: 

self.conn.login(auth[0], auth[1]) 

else: 

self.conn.login() 

 

# Get the method and attempt to find the function to call. 

resp = self.func_table[request.method](path, request) 

except socket.timeout as e: 

# requests distinguishes between connection timeouts and others 

if connected: 

raise ReadTimeout(e, request=request) 

else: 

raise ConnectTimeout(e, request=request) 

# ftplib raises EOFError if the connection is unexpectedly closed. 

# Convert that or any other socket error to a ConnectionError. 

except (EOFError, socket.error) as e: 

raise ConnectionError(e, request=request) 

# Raised for 5xx errors. FTP uses 550 for both ENOENT and EPERM type 

# errors, so just translate all of these into a http-ish 404 

except ftplib.error_perm as e: 

# The exception message is probably from the server, so if it's 

# non-ascii, who knows what the encoding is. Latin1 has the 

# advantage of not being able to fail. 

resp = build_text_response(request, 

BytesIO(str(e).encode('latin1')), str(codes.not_found)) 

# 4xx reply, translate to a http 503 

except ftplib.error_temp as e: 

resp = build_text_response(request, 

BytesIO(str(e).encode('latin1')), str(codes.unavailable)) 

# error_reply is an unexpected status code, and error_proto is an 

# invalid status code. Error is the generic ftplib error, usually 

# raised when a line is too long. Translate all of them to a generic 

# RequestException 

except (ftplib.error_reply, ftplib.error_proto, ftplib.Error) as e: 

raise RequestException(e, request=request) 

 

# Return the response. 

return resp 

 

def close(self): 

'''Dispose of any internal state.''' 

# Currently this is a no-op. 

pass 

 

def send_proxy(self, request, proxy, **kwargs): 

'''Send a FTP request through a HTTP proxy''' 

# Direct the request through a HTTP adapter instead 

proxy_url = prepend_scheme_if_needed(proxy, 'http') 

s = requests.Session() 

adapter = s.get_adapter(proxy_url) 

 

try: 

return adapter.send(request, **kwargs) 

finally: 

adapter.close() 

 

def list(self, path, request): 

'''Executes the FTP LIST command on the given path.''' 

data = BytesIO() 

 

# To ensure the BytesIO object gets cleaned up, we need to alias its 

# close method to the release_conn() method. This is a dirty hack, but 

# there you go. 

data.release_conn = data.close 

 

self.conn.cwd(path) 

code = self.conn.retrbinary('LIST', data_callback_factory(data)) 

 

# When that call has finished executing, we'll have all our data. 

response = build_text_response(request, data, code) 

 

# Close the connection. 

self.conn.close() 

 

return response 

 

def retr(self, path, request): 

'''Executes the FTP RETR command on the given path.''' 

data = BytesIO() 

 

# To ensure the BytesIO gets cleaned up, we need to alias its close 

# method. See self.list(). 

data.release_conn = data.close 

 

code = self.conn.retrbinary('RETR ' + path, data_callback_factory(data)) 

 

response = build_binary_response(request, data, code) 

 

# Close the connection. 

self.conn.close() 

 

return response 

 

def get(self, path, request): 

'''Executes the FTP RETR command on the given path. 

 

This is the same as retr except that the FTP server code is 

converted to a HTTP 200. 

''' 

 

response = self.retr(path, request) 

 

# Errors are handled in send(), so assume everything is ok if we 

# made it this far 

response.status_code = codes.ok 

return response 

 

def size(self, path, request): 

'''Executes the FTP SIZE command on the given path.''' 

self.conn.voidcmd('TYPE I') # SIZE is not usually allowed in ASCII mode 

 

size = self.conn.size(path) 

 

if not str(size).isdigit(): 

self.conn.close() 

return None 

 

data = BytesIO(bytes(size)) 

# To ensure the BytesIO gets cleaned up, we need to alias its close 

# method to the release_conn() method. This is a dirty hack, but there 

# you go. 

data.release_conn = data.close 

data.content_len = size 

 

response = build_text_response(request, data, '213') 

 

self.conn.close() 

 

return response 

 

def head(self, path, request): 

'''Executes the FTP SIZE command on the given path. 

 

This is the same as size except that the FTP server code is 

converted to a HTTP 200. 

''' 

 

response = self.size(path, request) 

response.status_code = codes.ok 

return response 

 

def stor(self, path, request): 

'''Executes the FTP STOR command on the given path.''' 

 

# First, get the file handle. We assume (bravely) 

# that there is only one file to be sent to a given URL. We also 

# assume that the filename is sent as part of the URL, not as part of 

# the files argument. Both of these assumptions are rarely correct, 

# but they are easy. 

data = parse_multipart_files(request) 

 

# Split into the path and the filename. 

path, filename = os.path.split(path) 

 

# Switch directories and upload the data. 

self.conn.cwd(path) 

code = self.conn.storbinary('STOR ' + filename, data) 

 

# Close the connection and build the response. 

self.conn.close() 

 

response = build_binary_response(request, BytesIO(), code) 

 

return response 

 

def nlst(self, path, request): 

'''Executes the FTP NLST command on the given path.''' 

data = BytesIO() 

 

# Alias the close method. 

data.release_conn = data.close 

 

self.conn.cwd(path) 

code = self.conn.retrbinary('NLST', data_callback_factory(data)) 

 

# When that call has finished executing, we'll have all our data. 

response = build_text_response(request, data, code) 

 

# Close the connection. 

self.conn.close() 

 

return response 

 

def get_username_password_from_header(self, request): 

'''Given a PreparedRequest object, reverse the process of adding HTTP 

Basic auth to obtain the username and password. Allows the FTP adapter 

to piggyback on the basic auth notation without changing the control 

flow.''' 

auth_header = request.headers.get('Authorization') 

 

if auth_header: 

# The basic auth header is of the form 'Basic xyz'. We want the 

# second part. Check that we have the right kind of auth though. 

encoded_components = auth_header.split()[:2] 

if encoded_components[0] != 'Basic': 

raise AuthError('Invalid form of Authentication used.') 

else: 

encoded = encoded_components[1] 

 

# Decode the base64 encoded string. 

decoded = base64.b64decode(encoded) 

 

# The auth string was encoded to bytes by requests using latin1, 

# and will be encoded to bytes by ftplib (in python 3) using 

# latin1. In the meantime, use a str 

decoded = decoded.decode('latin1') 

 

# The string is of the form 'username:password'. Split on the 

# colon. 

components = decoded.split(':') 

username = components[0] 

password = components[1] 

return (username, password) 

else: 

# No auth header. Return None. 

return None 

 

def get_host_and_path_from_url(self, request): 

'''Given a PreparedRequest object, split the URL in such a manner as to 

determine the host and the path. This is a separate method to wrap some 

of urlparse's craziness.''' 

url = request.url 

# scheme, netloc, path, params, query, fragment = urlparse(url) 

parsed = urlparse(url) 

scheme = parsed.scheme 

path = parsed.path 

 

# If there is a slash on the front of the path, chuck it. 

if path.startswith('/'): 

path = path[1:] 

 

host = parsed.hostname 

port = parsed.port or 0 

 

return (scheme, host, port, path) 

 

 

class AuthError(Exception): 

'''Denotes an error with authentication.''' 

pass