Simple backup script in Python

up vote
10
down vote

favorite

Since rsync lacks compression and I'm not a big fan of rdiff, I thought I'd try my own little backup script. Since I'm very inexperienced with file management in Python, I'm sure there is lot of room for improvement.

The script creates the entire folder structure within the backup folder and then creates syncs recursively for all files within the source folders. Every file above a certain threshold gets gziped on the way.

A possible command would be:

python3 sync.py -target MY_BACKUPFOLDER -source IMPORTANT_1 IMPORTANT_2

Please tell me what you think of the source:

""" Simple backup script which just creates the root structure in an other

folder and syncs everything which recursevely lies within one of the source

folders. For files bigger than a threshold they are first gziped."""



import argparse

import gzip

import os

import shutil

import sys



def parse_input():

    parser = argparse.ArgumentParser()

    parser.add_argument('-target', nargs=1, required=True,

                        help='Target Backup folder')

    parser.add_argument('-source', nargs='+', required=True,

                        help='Source Files to be added')

    parser.add_argument('-compress', nargs=1,  type=int,

                        help='Gzip threshold in bytes', default=[100000])



    # no input means show me the help

    if len(sys.argv) == 1:

        parser.print_help()

        sys.exit()



    return parser.parse_args()





def size_if_newer(source, target):

    """ If newer it returns size, otherwise it returns False """



    src_stat = os.stat(source)

    try:

        target_ts = os.stat(target).st_mtime

    except FileNotFoundError:

        try:

            target_ts = os.stat(target + '.gz').st_mtime

        except FileNotFoundError:

            target_ts = 0



    # The time difference of one second is necessary since subsecond accuracy

    # of os.st_mtime is striped by copy2

    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False





def sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        transfer_file(source, target, size > compress)





def transfer_file(source, target, compress):

    """ Either copy or compress and copies the file """



    try:

        if compress:

            with gzip.open(target + '.gz', 'wb') as target_fid:

                with open(source, 'rb') as source_fid:

                    target_fid.writelines(source_fid)

            print('Compress {}'.format(source))

        else:

            shutil.copy2(source, target)

            print('Copy {}'.format(source))

    except FileNotFoundError:

        os.makedirs(os.path.dirname(target))

        transfer_file(source, target, compress)





def sync_root(root, arg):

    target = arg.target[0]

    compress = arg.compress[0]



    for path, _, files in os.walk(root):

        for source in files:

            source = path + '/' + source

            sync_file(source, target + source, compress)





if __name__ == '__main__':

    arg = parse_input()

    print('### Start copy ####')

    for root in arg.source:

        sync_root(root, arg)

    print('### Done ###')

edited Nov 18 at 6:34

200_success

127k15148411

asked Aug 21 '15 at 23:10

magu_

463517

For the record (even though it is a year later), rsync does support compression with the -z flag.
– Graipher
Dec 5 '16 at 11:32

add a comment |

up vote
10
down vote

favorite

A possible command would be:

python3 sync.py -target MY_BACKUPFOLDER -source IMPORTANT_1 IMPORTANT_2

Please tell me what you think of the source:

""" Simple backup script which just creates the root structure in an other

folder and syncs everything which recursevely lies within one of the source

folders. For files bigger than a threshold they are first gziped."""



import argparse

import gzip

import os

import shutil

import sys



def parse_input():

    parser = argparse.ArgumentParser()

    parser.add_argument('-target', nargs=1, required=True,

                        help='Target Backup folder')

    parser.add_argument('-source', nargs='+', required=True,

                        help='Source Files to be added')

    parser.add_argument('-compress', nargs=1,  type=int,

                        help='Gzip threshold in bytes', default=[100000])



    # no input means show me the help

    if len(sys.argv) == 1:

        parser.print_help()

        sys.exit()



    return parser.parse_args()





def size_if_newer(source, target):

    """ If newer it returns size, otherwise it returns False """



    src_stat = os.stat(source)

    try:

        target_ts = os.stat(target).st_mtime

    except FileNotFoundError:

        try:

            target_ts = os.stat(target + '.gz').st_mtime

        except FileNotFoundError:

            target_ts = 0



    # The time difference of one second is necessary since subsecond accuracy

    # of os.st_mtime is striped by copy2

    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False





def sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        transfer_file(source, target, size > compress)





def transfer_file(source, target, compress):

    """ Either copy or compress and copies the file """



    try:

        if compress:

            with gzip.open(target + '.gz', 'wb') as target_fid:

                with open(source, 'rb') as source_fid:

                    target_fid.writelines(source_fid)

            print('Compress {}'.format(source))

        else:

            shutil.copy2(source, target)

            print('Copy {}'.format(source))

    except FileNotFoundError:

        os.makedirs(os.path.dirname(target))

        transfer_file(source, target, compress)





def sync_root(root, arg):

    target = arg.target[0]

    compress = arg.compress[0]



    for path, _, files in os.walk(root):

        for source in files:

            source = path + '/' + source

            sync_file(source, target + source, compress)





if __name__ == '__main__':

    arg = parse_input()

    print('### Start copy ####')

    for root in arg.source:

        sync_root(root, arg)

    print('### Done ###')

edited Nov 18 at 6:34

200_success

127k15148411

asked Aug 21 '15 at 23:10

magu_

463517

For the record (even though it is a year later), rsync does support compression with the -z flag.
– Graipher
Dec 5 '16 at 11:32

add a comment |

up vote
10
down vote

favorite

A possible command would be:

python3 sync.py -target MY_BACKUPFOLDER -source IMPORTANT_1 IMPORTANT_2

Please tell me what you think of the source:

""" Simple backup script which just creates the root structure in an other

folder and syncs everything which recursevely lies within one of the source

folders. For files bigger than a threshold they are first gziped."""



import argparse

import gzip

import os

import shutil

import sys



def parse_input():

    parser = argparse.ArgumentParser()

    parser.add_argument('-target', nargs=1, required=True,

                        help='Target Backup folder')

    parser.add_argument('-source', nargs='+', required=True,

                        help='Source Files to be added')

    parser.add_argument('-compress', nargs=1,  type=int,

                        help='Gzip threshold in bytes', default=[100000])



    # no input means show me the help

    if len(sys.argv) == 1:

        parser.print_help()

        sys.exit()



    return parser.parse_args()





def size_if_newer(source, target):

    """ If newer it returns size, otherwise it returns False """



    src_stat = os.stat(source)

    try:

        target_ts = os.stat(target).st_mtime

    except FileNotFoundError:

        try:

            target_ts = os.stat(target + '.gz').st_mtime

        except FileNotFoundError:

            target_ts = 0



    # The time difference of one second is necessary since subsecond accuracy

    # of os.st_mtime is striped by copy2

    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False





def sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        transfer_file(source, target, size > compress)





def transfer_file(source, target, compress):

    """ Either copy or compress and copies the file """



    try:

        if compress:

            with gzip.open(target + '.gz', 'wb') as target_fid:

                with open(source, 'rb') as source_fid:

                    target_fid.writelines(source_fid)

            print('Compress {}'.format(source))

        else:

            shutil.copy2(source, target)

            print('Copy {}'.format(source))

    except FileNotFoundError:

        os.makedirs(os.path.dirname(target))

        transfer_file(source, target, compress)





def sync_root(root, arg):

    target = arg.target[0]

    compress = arg.compress[0]



    for path, _, files in os.walk(root):

        for source in files:

            source = path + '/' + source

            sync_file(source, target + source, compress)





if __name__ == '__main__':

    arg = parse_input()

    print('### Start copy ####')

    for root in arg.source:

        sync_root(root, arg)

    print('### Done ###')

edited Nov 18 at 6:34

200_success

127k15148411

asked Aug 21 '15 at 23:10

magu_

463517

A possible command would be:

python3 sync.py -target MY_BACKUPFOLDER -source IMPORTANT_1 IMPORTANT_2

Please tell me what you think of the source:

""" Simple backup script which just creates the root structure in an other

folder and syncs everything which recursevely lies within one of the source

folders. For files bigger than a threshold they are first gziped."""



import argparse

import gzip

import os

import shutil

import sys



def parse_input():

    parser = argparse.ArgumentParser()

    parser.add_argument('-target', nargs=1, required=True,

                        help='Target Backup folder')

    parser.add_argument('-source', nargs='+', required=True,

                        help='Source Files to be added')

    parser.add_argument('-compress', nargs=1,  type=int,

                        help='Gzip threshold in bytes', default=[100000])



    # no input means show me the help

    if len(sys.argv) == 1:

        parser.print_help()

        sys.exit()



    return parser.parse_args()





def size_if_newer(source, target):

    """ If newer it returns size, otherwise it returns False """



    src_stat = os.stat(source)

    try:

        target_ts = os.stat(target).st_mtime

    except FileNotFoundError:

        try:

            target_ts = os.stat(target + '.gz').st_mtime

        except FileNotFoundError:

            target_ts = 0



    # The time difference of one second is necessary since subsecond accuracy

    # of os.st_mtime is striped by copy2

    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False





def sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        transfer_file(source, target, size > compress)





def transfer_file(source, target, compress):

    """ Either copy or compress and copies the file """



    try:

        if compress:

            with gzip.open(target + '.gz', 'wb') as target_fid:

                with open(source, 'rb') as source_fid:

                    target_fid.writelines(source_fid)

            print('Compress {}'.format(source))

        else:

            shutil.copy2(source, target)

            print('Copy {}'.format(source))

    except FileNotFoundError:

        os.makedirs(os.path.dirname(target))

        transfer_file(source, target, compress)





def sync_root(root, arg):

    target = arg.target[0]

    compress = arg.compress[0]



    for path, _, files in os.walk(root):

        for source in files:

            source = path + '/' + source

            sync_file(source, target + source, compress)





if __name__ == '__main__':

    arg = parse_input()

    print('### Start copy ####')

    for root in arg.source:

        sync_root(root, arg)

    print('### Done ###')

python python-3.x file-system compression

edited Nov 18 at 6:34

200_success

127k15148411

asked Aug 21 '15 at 23:10

magu_

463517

edited Nov 18 at 6:34

200_success

127k15148411

asked Aug 21 '15 at 23:10

magu_

463517

edited Nov 18 at 6:34

200_success

127k15148411

edited Nov 18 at 6:34

200_success

127k15148411

edited Nov 18 at 6:34

200_success

127k15148411

asked Aug 21 '15 at 23:10

magu_

463517

asked Aug 21 '15 at 23:10

magu_

463517

asked Aug 21 '15 at 23:10

magu_

463517

For the record (even though it is a year later), rsync does support compression with the -z flag.
– Graipher
Dec 5 '16 at 11:32

add a comment |

For the record (even though it is a year later), rsync does support compression with the -z flag.
– Graipher
Dec 5 '16 at 11:32

For the record (even though it is a year later), rsync does support compression with the -z flag.
– Graipher
Dec 5 '16 at 11:32

add a comment |

2 Answers
2

active

oldest

votes

up vote
8
down vote

accepted

This program is really a pleasure to read. It accomplishes the desired outcome neatly and succinctly. The only thing I could think of that might be an improvement is threading. Other than that this program looks fantastic.

I can see where making this program threaded can be a great benefit when dealing with smaller files. I'm not sure, but I fear with larger files or smaller buffering, having this program run copy routines in parallel might bring the system to a halt. I wrote up a minor modification to your excellent source code to illustrate my idea:

""" Simple backup script which just creates the root structure in an other

folder and syncs everything which recursevely lies within one of the source

folders. For files bigger than a threshold they are first gziped."""



import argparse

import gzip

import os

import shutil

import sys

import threading



def parse_input():

    parser = argparse.ArgumentParser()

    parser.add_argument('-target', nargs=1, required=True,

                        help='Target Backup folder')

    parser.add_argument('-source', nargs='+', required=True,

                        help='Source Files to be added')

    parser.add_argument('-compress', nargs=1,  type=int,

                        help='Gzip threshold in bytes', default=[100000])



    # no input means show me the help

    if len(sys.argv) == 1:

        parser.print_help()

        sys.exit()



    return parser.parse_args()





def size_if_newer(source, target):

    """ If newer it returns size, otherwise it returns False """



    src_stat = os.stat(source)

    try:

        target_ts = os.stat(target).st_mtime

    except FileNotFoundError:

        try:

            target_ts = os.stat(target + '.gz').st_mtime

        except FileNotFoundError:

            target_ts = 0



    # The time difference of one second is necessary since subsecond accuracy

    # of os.st_mtime is striped by copy2

    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False



def threaded_sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        thread = threading.Thread(target=transfer_file, 

                                  args=(source, target, size > compress))

        thread.start()

        return thread



def sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        transfer_file(source, target, size > compress)





def transfer_file(source, target, compress):

    """ Either copy or compress and copies the file """



    try:

        if compress:

            with gzip.open(target + '.gz', 'wb') as target_fid:

                with open(source, 'rb') as source_fid:

                    target_fid.writelines(source_fid)

            print('Compress {}'.format(source))

        else:

            shutil.copy2(source, target)

            print('Copy {}'.format(source))

    except FileNotFoundError:

        os.makedirs(os.path.dirname(target))

        transfer_file(source, target, compress)





def sync_root(root, arg):

    target = arg.target[0]

    compress = arg.compress[0]

    threads = 



    for path, _, files in os.walk(root):

        for source in files:

            source = path + '/' + source

            threads.append(threaded_sync_file(source, 

                           target + source, compress))

#            sync_file(source, target + source, compress)

    for thread in threads:

        thread.join()





if __name__ == '__main__':

    arg = parse_input()

    print('### Start copy ####')

    for root in arg.source:

        sync_root(root, arg)

    print('### Done ###')

edited Apr 1 '16 at 3:33

answered Apr 1 '16 at 3:25

motoku

1,11111139

Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
– magu_
Apr 1 '16 at 15:23

@magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
– Turkhan Badalov
Jun 4 at 17:55

add a comment |

up vote
3
down vote

If the file size is 0, it ignores that file (when it returns 0 from the size function and uses that to determine if the file is newer). I noticed this when testing it and I created an empty file and saw it was not mirrored. I would think the right behavior should be that it should copy the empty file.

Also, it does not mirror deletions, if something is deleted in the source folder, it will not be deleted in the 'synced' folder

answered Nov 17 at 23:29

aljgom

1312

add a comment |

Your Answer

StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f101616%2fsimple-backup-script-in-python%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

2 Answers
2

active

oldest

votes

2 Answers
2

active

oldest

votes

up vote
8
down vote

accepted

""" Simple backup script which just creates the root structure in an other

folder and syncs everything which recursevely lies within one of the source

folders. For files bigger than a threshold they are first gziped."""



import argparse

import gzip

import os

import shutil

import sys

import threading



def parse_input():

    parser = argparse.ArgumentParser()

    parser.add_argument('-target', nargs=1, required=True,

                        help='Target Backup folder')

    parser.add_argument('-source', nargs='+', required=True,

                        help='Source Files to be added')

    parser.add_argument('-compress', nargs=1,  type=int,

                        help='Gzip threshold in bytes', default=[100000])



    # no input means show me the help

    if len(sys.argv) == 1:

        parser.print_help()

        sys.exit()



    return parser.parse_args()





def size_if_newer(source, target):

    """ If newer it returns size, otherwise it returns False """



    src_stat = os.stat(source)

    try:

        target_ts = os.stat(target).st_mtime

    except FileNotFoundError:

        try:

            target_ts = os.stat(target + '.gz').st_mtime

        except FileNotFoundError:

            target_ts = 0



    # The time difference of one second is necessary since subsecond accuracy

    # of os.st_mtime is striped by copy2

    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False



def threaded_sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        thread = threading.Thread(target=transfer_file, 

                                  args=(source, target, size > compress))

        thread.start()

        return thread



def sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        transfer_file(source, target, size > compress)





def transfer_file(source, target, compress):

    """ Either copy or compress and copies the file """



    try:

        if compress:

            with gzip.open(target + '.gz', 'wb') as target_fid:

                with open(source, 'rb') as source_fid:

                    target_fid.writelines(source_fid)

            print('Compress {}'.format(source))

        else:

            shutil.copy2(source, target)

            print('Copy {}'.format(source))

    except FileNotFoundError:

        os.makedirs(os.path.dirname(target))

        transfer_file(source, target, compress)





def sync_root(root, arg):

    target = arg.target[0]

    compress = arg.compress[0]

    threads = 



    for path, _, files in os.walk(root):

        for source in files:

            source = path + '/' + source

            threads.append(threaded_sync_file(source, 

                           target + source, compress))

#            sync_file(source, target + source, compress)

    for thread in threads:

        thread.join()





if __name__ == '__main__':

    arg = parse_input()

    print('### Start copy ####')

    for root in arg.source:

        sync_root(root, arg)

    print('### Done ###')

edited Apr 1 '16 at 3:33

answered Apr 1 '16 at 3:25

motoku

1,11111139

Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
– magu_
Apr 1 '16 at 15:23

@magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
– Turkhan Badalov
Jun 4 at 17:55

add a comment |

up vote
8
down vote

accepted

""" Simple backup script which just creates the root structure in an other

folder and syncs everything which recursevely lies within one of the source

folders. For files bigger than a threshold they are first gziped."""



import argparse

import gzip

import os

import shutil

import sys

import threading



def parse_input():

    parser = argparse.ArgumentParser()

    parser.add_argument('-target', nargs=1, required=True,

                        help='Target Backup folder')

    parser.add_argument('-source', nargs='+', required=True,

                        help='Source Files to be added')

    parser.add_argument('-compress', nargs=1,  type=int,

                        help='Gzip threshold in bytes', default=[100000])



    # no input means show me the help

    if len(sys.argv) == 1:

        parser.print_help()

        sys.exit()



    return parser.parse_args()





def size_if_newer(source, target):

    """ If newer it returns size, otherwise it returns False """



    src_stat = os.stat(source)

    try:

        target_ts = os.stat(target).st_mtime

    except FileNotFoundError:

        try:

            target_ts = os.stat(target + '.gz').st_mtime

        except FileNotFoundError:

            target_ts = 0



    # The time difference of one second is necessary since subsecond accuracy

    # of os.st_mtime is striped by copy2

    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False



def threaded_sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        thread = threading.Thread(target=transfer_file, 

                                  args=(source, target, size > compress))

        thread.start()

        return thread



def sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        transfer_file(source, target, size > compress)





def transfer_file(source, target, compress):

    """ Either copy or compress and copies the file """



    try:

        if compress:

            with gzip.open(target + '.gz', 'wb') as target_fid:

                with open(source, 'rb') as source_fid:

                    target_fid.writelines(source_fid)

            print('Compress {}'.format(source))

        else:

            shutil.copy2(source, target)

            print('Copy {}'.format(source))

    except FileNotFoundError:

        os.makedirs(os.path.dirname(target))

        transfer_file(source, target, compress)





def sync_root(root, arg):

    target = arg.target[0]

    compress = arg.compress[0]

    threads = 



    for path, _, files in os.walk(root):

        for source in files:

            source = path + '/' + source

            threads.append(threaded_sync_file(source, 

                           target + source, compress))

#            sync_file(source, target + source, compress)

    for thread in threads:

        thread.join()





if __name__ == '__main__':

    arg = parse_input()

    print('### Start copy ####')

    for root in arg.source:

        sync_root(root, arg)

    print('### Done ###')

edited Apr 1 '16 at 3:33

answered Apr 1 '16 at 3:25

motoku

1,11111139

Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
– magu_
Apr 1 '16 at 15:23

@magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
– Turkhan Badalov
Jun 4 at 17:55

add a comment |

up vote
8
down vote

accepted

""" Simple backup script which just creates the root structure in an other

folder and syncs everything which recursevely lies within one of the source

folders. For files bigger than a threshold they are first gziped."""



import argparse

import gzip

import os

import shutil

import sys

import threading



def parse_input():

    parser = argparse.ArgumentParser()

    parser.add_argument('-target', nargs=1, required=True,

                        help='Target Backup folder')

    parser.add_argument('-source', nargs='+', required=True,

                        help='Source Files to be added')

    parser.add_argument('-compress', nargs=1,  type=int,

                        help='Gzip threshold in bytes', default=[100000])



    # no input means show me the help

    if len(sys.argv) == 1:

        parser.print_help()

        sys.exit()



    return parser.parse_args()





def size_if_newer(source, target):

    """ If newer it returns size, otherwise it returns False """



    src_stat = os.stat(source)

    try:

        target_ts = os.stat(target).st_mtime

    except FileNotFoundError:

        try:

            target_ts = os.stat(target + '.gz').st_mtime

        except FileNotFoundError:

            target_ts = 0



    # The time difference of one second is necessary since subsecond accuracy

    # of os.st_mtime is striped by copy2

    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False



def threaded_sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        thread = threading.Thread(target=transfer_file, 

                                  args=(source, target, size > compress))

        thread.start()

        return thread



def sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        transfer_file(source, target, size > compress)





def transfer_file(source, target, compress):

    """ Either copy or compress and copies the file """



    try:

        if compress:

            with gzip.open(target + '.gz', 'wb') as target_fid:

                with open(source, 'rb') as source_fid:

                    target_fid.writelines(source_fid)

            print('Compress {}'.format(source))

        else:

            shutil.copy2(source, target)

            print('Copy {}'.format(source))

    except FileNotFoundError:

        os.makedirs(os.path.dirname(target))

        transfer_file(source, target, compress)





def sync_root(root, arg):

    target = arg.target[0]

    compress = arg.compress[0]

    threads = 



    for path, _, files in os.walk(root):

        for source in files:

            source = path + '/' + source

            threads.append(threaded_sync_file(source, 

                           target + source, compress))

#            sync_file(source, target + source, compress)

    for thread in threads:

        thread.join()





if __name__ == '__main__':

    arg = parse_input()

    print('### Start copy ####')

    for root in arg.source:

        sync_root(root, arg)

    print('### Done ###')

edited Apr 1 '16 at 3:33

answered Apr 1 '16 at 3:25

motoku

1,11111139

""" Simple backup script which just creates the root structure in an other

folder and syncs everything which recursevely lies within one of the source

folders. For files bigger than a threshold they are first gziped."""



import argparse

import gzip

import os

import shutil

import sys

import threading



def parse_input():

    parser = argparse.ArgumentParser()

    parser.add_argument('-target', nargs=1, required=True,

                        help='Target Backup folder')

    parser.add_argument('-source', nargs='+', required=True,

                        help='Source Files to be added')

    parser.add_argument('-compress', nargs=1,  type=int,

                        help='Gzip threshold in bytes', default=[100000])



    # no input means show me the help

    if len(sys.argv) == 1:

        parser.print_help()

        sys.exit()



    return parser.parse_args()





def size_if_newer(source, target):

    """ If newer it returns size, otherwise it returns False """



    src_stat = os.stat(source)

    try:

        target_ts = os.stat(target).st_mtime

    except FileNotFoundError:

        try:

            target_ts = os.stat(target + '.gz').st_mtime

        except FileNotFoundError:

            target_ts = 0



    # The time difference of one second is necessary since subsecond accuracy

    # of os.st_mtime is striped by copy2

    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False



def threaded_sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        thread = threading.Thread(target=transfer_file, 

                                  args=(source, target, size > compress))

        thread.start()

        return thread



def sync_file(source, target, compress):

    size = size_if_newer(source, target)



    if size:

        transfer_file(source, target, size > compress)





def transfer_file(source, target, compress):

    """ Either copy or compress and copies the file """



    try:

        if compress:

            with gzip.open(target + '.gz', 'wb') as target_fid:

                with open(source, 'rb') as source_fid:

                    target_fid.writelines(source_fid)

            print('Compress {}'.format(source))

        else:

            shutil.copy2(source, target)

            print('Copy {}'.format(source))

    except FileNotFoundError:

        os.makedirs(os.path.dirname(target))

        transfer_file(source, target, compress)





def sync_root(root, arg):

    target = arg.target[0]

    compress = arg.compress[0]

    threads = 



    for path, _, files in os.walk(root):

        for source in files:

            source = path + '/' + source

            threads.append(threaded_sync_file(source, 

                           target + source, compress))

#            sync_file(source, target + source, compress)

    for thread in threads:

        thread.join()





if __name__ == '__main__':

    arg = parse_input()

    print('### Start copy ####')

    for root in arg.source:

        sync_root(root, arg)

    print('### Done ###')

edited Apr 1 '16 at 3:33

answered Apr 1 '16 at 3:25

motoku

1,11111139

edited Apr 1 '16 at 3:33

answered Apr 1 '16 at 3:25

motoku

1,11111139

answered Apr 1 '16 at 3:25

motoku

1,11111139

answered Apr 1 '16 at 3:25

motoku

1,11111139

Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
– magu_
Apr 1 '16 at 15:23

@magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
– Turkhan Badalov
Jun 4 at 17:55

add a comment |

Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
– magu_
Apr 1 '16 at 15:23

@magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
– Turkhan Badalov
Jun 4 at 17:55

Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
– magu_
Apr 1 '16 at 15:23

@magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
– Turkhan Badalov
Jun 4 at 17:55

add a comment |

up vote
3
down vote

Also, it does not mirror deletions, if something is deleted in the source folder, it will not be deleted in the 'synced' folder

answered Nov 17 at 23:29

aljgom

1312

add a comment |

up vote
3
down vote

Also, it does not mirror deletions, if something is deleted in the source folder, it will not be deleted in the 'synced' folder

answered Nov 17 at 23:29

aljgom

1312

add a comment |

up vote
3
down vote

Also, it does not mirror deletions, if something is deleted in the source folder, it will not be deleted in the 'synced' folder

answered Nov 17 at 23:29

aljgom

1312

Also, it does not mirror deletions, if something is deleted in the source folder, it will not be deleted in the 'synced' folder

answered Nov 17 at 23:29

aljgom

1312

answered Nov 17 at 23:29

aljgom

1312

answered Nov 17 at 23:29

aljgom

1312

answered Nov 17 at 23:29

aljgom

1312

add a comment |

draft saved

draft discarded

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Gfrktyl