transferwee.py - transferwee - Download/upload file via wetransfer.com
 (HTM) git clone https://github.com/iamleot/transferwee
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
       ---
       transferwee.py (12768B)
       ---
            1 #!/usr/bin/env python3
            2 
            3 #
            4 # Copyright (c) 2018-2019 Leonardo Taccari
            5 # All rights reserved.
            6 # 
            7 # Redistribution and use in source and binary forms, with or without
            8 # modification, are permitted provided that the following conditions
            9 # are met:
           10 # 
           11 # 1. Redistributions of source code must retain the above copyright
           12 #    notice, this list of conditions and the following disclaimer.
           13 # 2. Redistributions in binary form must reproduce the above copyright
           14 #    notice, this list of conditions and the following disclaimer in the
           15 #    documentation and/or other materials provided with the distribution.
           16 # 
           17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
           18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
           19 # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
           20 # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
           21 # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
           22 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
           23 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
           24 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
           25 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
           26 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
           27 # POSSIBILITY OF SUCH DAMAGE.
           28 #
           29 
           30 
           31 """
           32 Download/upload files via wetransfer.com
           33 
           34 transferwee is a script/module to download/upload files via wetransfer.com.
           35 
           36 It exposes `download' and `upload' subcommands, respectively used to download
           37 files from a `we.tl' or `wetransfer.com/downloads' URLs and upload files that
           38 will be shared via emails or link.
           39 """
           40 
           41 from sys import exit
           42 from typing import List
           43 import os.path
           44 import re
           45 import urllib.parse
           46 import zlib
           47 
           48 import requests
           49 
           50 
           51 WETRANSFER_API_URL = 'https://wetransfer.com/api/v4/transfers'
           52 WETRANSFER_DOWNLOAD_URL = WETRANSFER_API_URL + '/{transfer_id}/download'
           53 WETRANSFER_UPLOAD_EMAIL_URL = WETRANSFER_API_URL + '/email'
           54 WETRANSFER_UPLOAD_LINK_URL = WETRANSFER_API_URL + '/link'
           55 WETRANSFER_FILES_URL = WETRANSFER_API_URL + '/{transfer_id}/files'
           56 WETRANSFER_PART_PUT_URL = WETRANSFER_FILES_URL + '/{file_id}/part-put-url'
           57 WETRANSFER_FINALIZE_MPP_URL = WETRANSFER_FILES_URL + '/{file_id}/finalize-mpp'
           58 WETRANSFER_FINALIZE_URL = WETRANSFER_API_URL + '/{transfer_id}/finalize'
           59 
           60 WETRANSFER_DEFAULT_CHUNK_SIZE = 5242880
           61 
           62 
           63 def download_url(url: str) -> str:
           64     """Given a wetransfer.com download URL download return the downloadable URL.
           65 
           66     The URL should be of the form `https://we.tl/' or
           67     `https://wetransfer.com/downloads/'. If it is a short URL (i.e. `we.tl')
           68     the redirect is followed in order to retrieve the corresponding
           69     `wetransfer.com/downloads/' URL.
           70 
           71     The following type of URLs are supported:
           72      - `https://we.tl/<short_url_id>`:
           73         received via link upload, via email to the sender and printed by
           74         `upload` action
           75      - `https://wetransfer.com/<transfer_id>/<security_hash>`:
           76         directly not shared in any ways but the short URLs actually redirect to
           77         them
           78      - `https://wetransfer.com/<transfer_id>/<recipient_id>/<security_hash>`:
           79         received via email by recipients when the files are shared via email
           80         upload
           81 
           82     Return the download URL (AKA `direct_link') as a str or None if the URL
           83     could not be parsed.
           84     """
           85     # Follow the redirect if we have a short URL
           86     if url.startswith('https://we.tl/'):
           87         r = requests.head(url, allow_redirects=True)
           88         url = r.url
           89 
           90     recipient_id = None
           91     params = url.replace('https://wetransfer.com/downloads/', '').split('/')
           92 
           93     if len(params) == 2:
           94         transfer_id, security_hash = params
           95     elif len(params) == 3:
           96         transfer_id, recipient_id, security_hash = params
           97     else:
           98         return None
           99 
          100     j = {
          101         "security_hash": security_hash,
          102     }
          103     if recipient_id:
          104         j["recipient_id"] = recipient_id
          105     request_data = _prepare_request_data()
          106     r = requests.post(WETRANSFER_DOWNLOAD_URL.format(transfer_id=transfer_id),
          107                       json=j,
          108                       cookies=request_data['cookies'],
          109                       headers=request_data['headers'])
          110 
          111     j = r.json()
          112     return j.get('direct_link')
          113 
          114 
          115 def _file_unquote(file: str) -> str:
          116     """Given a URL encoded file unquote it.
          117 
          118     All occurences of `\', `/' and `../' will be ignored to avoid possible
          119     directory traversals.
          120     """
          121     return urllib.parse.unquote(file).replace('../', '').replace('/', '').replace('\\', '')
          122 
          123 
          124 def download(url: str) -> None:
          125     """Given a `we.tl/' or `wetransfer.com/downloads/' download it.
          126 
          127     First a direct link is retrieved (via download_url()), the filename will
          128     be extracted to it and it will be fetched and stored on the current
          129     working directory.
          130     """
          131     dl_url = download_url(url)
          132     file = _file_unquote(urllib.parse.urlparse(dl_url).path.split('/')[-1])
          133 
          134     r = requests.get(dl_url, stream=True)
          135     with open(file, 'wb') as f:
          136         for chunk in r.iter_content(chunk_size=1024):
          137             f.write(chunk)
          138 
          139 
          140 def _file_name_and_size(file: str) -> dict:
          141     """Given a file, prepare the "name" and "size" dictionary.
          142 
          143     Return a dictionary with "name" and "size" keys.
          144     """
          145     filename = os.path.basename(file)
          146     filesize = os.path.getsize(file)
          147 
          148     return {
          149         "name": filename,
          150         "size": filesize
          151     }
          152 
          153 
          154 def _prepare_request_data() -> dict:
          155     """Prepare a wetransfer.com request.
          156 
          157     Return a dictionary with "cookies" and "headers" keys, needed for most
          158     wetransfer requests.
          159     """
          160 
          161     r = requests.get('https://wetransfer.com/')
          162     m = re.search('name="csrf-token" content="([^"]+)"', r.text)
          163 
          164     return {
          165         'headers': { 'X-CSRF-Token': m.group(1), },
          166         'cookies': r.cookies,
          167     }
          168 
          169 
          170 def _prepare_email_upload(filenames: List[str], message: str,
          171                           sender: str, recipients: List[str],
          172                           request_data: dict) -> str:
          173     """Given a list of filenames, message a sender and recipients prepare for
          174     the email upload.
          175 
          176     Return the parsed JSON response.
          177     """
          178     j = {
          179         "files": [_file_name_and_size(f) for f in filenames],
          180         "from": sender,
          181         "message": message,
          182         "recipients": recipients,
          183         "ui_language": "en",
          184     }
          185 
          186     r = requests.post(WETRANSFER_UPLOAD_EMAIL_URL, json=j,
          187                       cookies=request_data['cookies'],
          188                       headers=request_data['headers'])
          189     return r.json()
          190 
          191 
          192 def _prepare_link_upload(filenames: List[str], message: str,
          193                          request_data: dict) -> str:
          194     """Given a list of filenames and a message prepare for the link upload.
          195 
          196     Return the parsed JSON response.
          197     """
          198     j = {
          199         "files": [_file_name_and_size(f) for f in filenames],
          200         "message": message,
          201         "ui_language": "en",
          202     }
          203 
          204     r = requests.post(WETRANSFER_UPLOAD_LINK_URL, json=j,
          205                       cookies=request_data['cookies'],
          206                       headers=request_data['headers'])
          207     return r.json()
          208 
          209 
          210 def _prepare_file_upload(transfer_id: str, file: str,
          211                          request_data: dict) -> str:
          212     """Given a transfer_id and file prepare it for the upload.
          213 
          214     Return the parsed JSON response.
          215     """
          216     j = _file_name_and_size(file)
          217     r = requests.post(WETRANSFER_FILES_URL.format(transfer_id=transfer_id),
          218                       json=j,
          219                       cookies=request_data['cookies'],
          220                       headers=request_data['headers'])
          221     return r.json()
          222 
          223 
          224 def _upload_chunks(transfer_id: str, file_id: str, file: str,
          225                    request_data: dict,
          226                    default_chunk_size: int = WETRANSFER_DEFAULT_CHUNK_SIZE) -> str:
          227     """Given a transfer_id, file_id and file upload it.
          228 
          229     Return the parsed JSON response.
          230     """
          231     f = open(file, 'rb')
          232 
          233     chunk_number = 0
          234     while True:
          235         chunk = f.read(default_chunk_size)
          236         chunk_size = len(chunk)
          237         if chunk_size == 0:
          238             break
          239         chunk_number += 1
          240 
          241         j = {
          242             "chunk_crc": zlib.crc32(chunk),
          243             "chunk_number": chunk_number,
          244             "chunk_size": chunk_size,
          245             "retries": 0
          246         }
          247 
          248         r = requests.post(
          249             WETRANSFER_PART_PUT_URL.format(transfer_id=transfer_id,
          250                                            file_id=file_id),
          251             json=j,
          252             cookies=request_data['cookies'],
          253             headers=request_data['headers'])
          254         url = r.json().get('url')
          255         r = requests.options(url,
          256                              headers={
          257                                  'Origin': 'https://wetransfer.com',
          258                                  'Access-Control-Request-Method': 'PUT',
          259                              })
          260         r = requests.put(url, data=chunk)
          261 
          262     j = {
          263         'chunk_count': chunk_number
          264     }
          265     r = requests.put(
          266         WETRANSFER_FINALIZE_MPP_URL.format(transfer_id=transfer_id,
          267                                            file_id=file_id),
          268         json=j,
          269         cookies=request_data['cookies'],
          270         headers=request_data['headers'])
          271 
          272     return r.json()
          273 
          274 
          275 def _finalize_upload(transfer_id: str, request_data: dict) -> str:
          276     """Given a transfer_id finalize the upload.
          277 
          278     Return the parsed JSON response.
          279     """
          280     r = requests.put(WETRANSFER_FINALIZE_URL.format(transfer_id=transfer_id),
          281                      cookies=request_data['cookies'],
          282                      headers=request_data['headers'])
          283 
          284     return r.json()
          285 
          286 
          287 def upload(files: List[str], message: str = '', sender: str = None,
          288            recipients: List[str] = []) -> str:
          289     """Given a list of files upload them and return the corresponding URL.
          290 
          291     Also accepts optional parameters:
          292      - `message': message used as a description of the transfer
          293      - `sender': email address used to receive an ACK if the upload is
          294                  successfull. For every download by the recipients an email
          295                  will be also sent
          296      - `recipients': list of email addresses of recipients. When the upload
          297                      succeed every recipients will receive an email with a link
          298 
          299     If both sender and recipient parameters are passed the email upload will be
          300     used. Otherwise, the link upload will be used.
          301 
          302     Return the short URL of the transfer on success.
          303     """
          304 
          305     # Check that all files exists
          306     for f in files:
          307         if not os.path.exists(f):
          308             raise FileNotFoundError(f)
          309 
          310     # Check that there are no duplicates filenames
          311     # (despite possible different dirname())
          312     filenames = [os.path.basename(f) for f in files]
          313     if len(files) != len(set(filenames)):
          314         raise FileExistsError('Duplicate filenames')
          315 
          316     transfer_id = None
          317     rd = _prepare_request_data()
          318     if sender and recipients:
          319         # email upload
          320         transfer_id = \
          321             _prepare_email_upload(files, message, sender, recipients, rd)['id']
          322     else:
          323         # link upload
          324         transfer_id = _prepare_link_upload(files, message, rd)['id']
          325 
          326     for f in files:
          327         file_id = _prepare_file_upload(transfer_id, f, rd)['id']
          328         _upload_chunks(transfer_id, file_id, f, rd)
          329 
          330     return _finalize_upload(transfer_id, rd)['shortened_url']
          331 
          332 
          333 if __name__ == '__main__':
          334     import argparse
          335 
          336     ap = argparse.ArgumentParser(
          337         prog='transferwee',
          338         description='Download/upload files via wetransfer.com'
          339     )
          340     sp = ap.add_subparsers(dest='action', help='action')
          341 
          342     # download subcommand
          343     dp = sp.add_parser('download', help='download files')
          344     dp.add_argument('-g', action='store_true',
          345                     help='only print the direct link (without downloading it)')
          346     dp.add_argument('url', nargs='+', type=str, metavar='url',
          347                     help='URL (we.tl/... or wetransfer.com/downloads/...)')
          348 
          349     # upload subcommand
          350     up = sp.add_parser('upload', help='upload files')
          351     up.add_argument('-m', type=str, default='', metavar='message',
          352                     help='message description for the transfer')
          353     up.add_argument('-f', type=str, metavar='from', help='sender email')
          354     up.add_argument('-t', nargs='+', type=str, metavar='to',
          355                     help='recipient emails')
          356     up.add_argument('files', nargs='+', type=str, metavar='file',
          357                     help='files to upload')
          358 
          359     args = ap.parse_args()
          360 
          361     if args.action == 'download':
          362         if args.g:
          363             for u in args.url:
          364                 print(download_url(u))
          365         else:
          366             for u in args.url:
          367                 download(u)
          368         exit(0)
          369 
          370     if args.action == 'upload':
          371         print(upload(args.files, args.m, args.f, args.t))
          372         exit(0)
          373 
          374     # No action selected, print help message
          375     ap.print_help()
          376     exit(1)