Simple backup script in Python











up vote
10
down vote

favorite
2












Since rsync lacks compression and I'm not a big fan of rdiff, I thought I'd try my own little backup script. Since I'm very inexperienced with file management in Python, I'm sure there is lot of room for improvement.



The script creates the entire folder structure within the backup folder and then creates syncs recursively for all files within the source folders. Every file above a certain threshold gets gziped on the way.



A possible command would be:




python3 sync.py -target MY_BACKUPFOLDER -source IMPORTANT_1 IMPORTANT_2




Please tell me what you think of the source:



""" Simple backup script which just creates the root structure in an other
folder and syncs everything which recursevely lies within one of the source
folders. For files bigger than a threshold they are first gziped."""

import argparse
import gzip
import os
import shutil
import sys

def parse_input():
parser = argparse.ArgumentParser()
parser.add_argument('-target', nargs=1, required=True,
help='Target Backup folder')
parser.add_argument('-source', nargs='+', required=True,
help='Source Files to be added')
parser.add_argument('-compress', nargs=1, type=int,
help='Gzip threshold in bytes', default=[100000])

# no input means show me the help
if len(sys.argv) == 1:
parser.print_help()
sys.exit()

return parser.parse_args()


def size_if_newer(source, target):
""" If newer it returns size, otherwise it returns False """

src_stat = os.stat(source)
try:
target_ts = os.stat(target).st_mtime
except FileNotFoundError:
try:
target_ts = os.stat(target + '.gz').st_mtime
except FileNotFoundError:
target_ts = 0

# The time difference of one second is necessary since subsecond accuracy
# of os.st_mtime is striped by copy2
return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False


def sync_file(source, target, compress):
size = size_if_newer(source, target)

if size:
transfer_file(source, target, size > compress)


def transfer_file(source, target, compress):
""" Either copy or compress and copies the file """

try:
if compress:
with gzip.open(target + '.gz', 'wb') as target_fid:
with open(source, 'rb') as source_fid:
target_fid.writelines(source_fid)
print('Compress {}'.format(source))
else:
shutil.copy2(source, target)
print('Copy {}'.format(source))
except FileNotFoundError:
os.makedirs(os.path.dirname(target))
transfer_file(source, target, compress)


def sync_root(root, arg):
target = arg.target[0]
compress = arg.compress[0]

for path, _, files in os.walk(root):
for source in files:
source = path + '/' + source
sync_file(source, target + source, compress)


if __name__ == '__main__':
arg = parse_input()
print('### Start copy ####')
for root in arg.source:
sync_root(root, arg)
print('### Done ###')









share|improve this question
























  • For the record (even though it is a year later), rsync does support compression with the -z flag.
    – Graipher
    Dec 5 '16 at 11:32















up vote
10
down vote

favorite
2












Since rsync lacks compression and I'm not a big fan of rdiff, I thought I'd try my own little backup script. Since I'm very inexperienced with file management in Python, I'm sure there is lot of room for improvement.



The script creates the entire folder structure within the backup folder and then creates syncs recursively for all files within the source folders. Every file above a certain threshold gets gziped on the way.



A possible command would be:




python3 sync.py -target MY_BACKUPFOLDER -source IMPORTANT_1 IMPORTANT_2




Please tell me what you think of the source:



""" Simple backup script which just creates the root structure in an other
folder and syncs everything which recursevely lies within one of the source
folders. For files bigger than a threshold they are first gziped."""

import argparse
import gzip
import os
import shutil
import sys

def parse_input():
parser = argparse.ArgumentParser()
parser.add_argument('-target', nargs=1, required=True,
help='Target Backup folder')
parser.add_argument('-source', nargs='+', required=True,
help='Source Files to be added')
parser.add_argument('-compress', nargs=1, type=int,
help='Gzip threshold in bytes', default=[100000])

# no input means show me the help
if len(sys.argv) == 1:
parser.print_help()
sys.exit()

return parser.parse_args()


def size_if_newer(source, target):
""" If newer it returns size, otherwise it returns False """

src_stat = os.stat(source)
try:
target_ts = os.stat(target).st_mtime
except FileNotFoundError:
try:
target_ts = os.stat(target + '.gz').st_mtime
except FileNotFoundError:
target_ts = 0

# The time difference of one second is necessary since subsecond accuracy
# of os.st_mtime is striped by copy2
return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False


def sync_file(source, target, compress):
size = size_if_newer(source, target)

if size:
transfer_file(source, target, size > compress)


def transfer_file(source, target, compress):
""" Either copy or compress and copies the file """

try:
if compress:
with gzip.open(target + '.gz', 'wb') as target_fid:
with open(source, 'rb') as source_fid:
target_fid.writelines(source_fid)
print('Compress {}'.format(source))
else:
shutil.copy2(source, target)
print('Copy {}'.format(source))
except FileNotFoundError:
os.makedirs(os.path.dirname(target))
transfer_file(source, target, compress)


def sync_root(root, arg):
target = arg.target[0]
compress = arg.compress[0]

for path, _, files in os.walk(root):
for source in files:
source = path + '/' + source
sync_file(source, target + source, compress)


if __name__ == '__main__':
arg = parse_input()
print('### Start copy ####')
for root in arg.source:
sync_root(root, arg)
print('### Done ###')









share|improve this question
























  • For the record (even though it is a year later), rsync does support compression with the -z flag.
    – Graipher
    Dec 5 '16 at 11:32













up vote
10
down vote

favorite
2









up vote
10
down vote

favorite
2






2





Since rsync lacks compression and I'm not a big fan of rdiff, I thought I'd try my own little backup script. Since I'm very inexperienced with file management in Python, I'm sure there is lot of room for improvement.



The script creates the entire folder structure within the backup folder and then creates syncs recursively for all files within the source folders. Every file above a certain threshold gets gziped on the way.



A possible command would be:




python3 sync.py -target MY_BACKUPFOLDER -source IMPORTANT_1 IMPORTANT_2




Please tell me what you think of the source:



""" Simple backup script which just creates the root structure in an other
folder and syncs everything which recursevely lies within one of the source
folders. For files bigger than a threshold they are first gziped."""

import argparse
import gzip
import os
import shutil
import sys

def parse_input():
parser = argparse.ArgumentParser()
parser.add_argument('-target', nargs=1, required=True,
help='Target Backup folder')
parser.add_argument('-source', nargs='+', required=True,
help='Source Files to be added')
parser.add_argument('-compress', nargs=1, type=int,
help='Gzip threshold in bytes', default=[100000])

# no input means show me the help
if len(sys.argv) == 1:
parser.print_help()
sys.exit()

return parser.parse_args()


def size_if_newer(source, target):
""" If newer it returns size, otherwise it returns False """

src_stat = os.stat(source)
try:
target_ts = os.stat(target).st_mtime
except FileNotFoundError:
try:
target_ts = os.stat(target + '.gz').st_mtime
except FileNotFoundError:
target_ts = 0

# The time difference of one second is necessary since subsecond accuracy
# of os.st_mtime is striped by copy2
return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False


def sync_file(source, target, compress):
size = size_if_newer(source, target)

if size:
transfer_file(source, target, size > compress)


def transfer_file(source, target, compress):
""" Either copy or compress and copies the file """

try:
if compress:
with gzip.open(target + '.gz', 'wb') as target_fid:
with open(source, 'rb') as source_fid:
target_fid.writelines(source_fid)
print('Compress {}'.format(source))
else:
shutil.copy2(source, target)
print('Copy {}'.format(source))
except FileNotFoundError:
os.makedirs(os.path.dirname(target))
transfer_file(source, target, compress)


def sync_root(root, arg):
target = arg.target[0]
compress = arg.compress[0]

for path, _, files in os.walk(root):
for source in files:
source = path + '/' + source
sync_file(source, target + source, compress)


if __name__ == '__main__':
arg = parse_input()
print('### Start copy ####')
for root in arg.source:
sync_root(root, arg)
print('### Done ###')









share|improve this question















Since rsync lacks compression and I'm not a big fan of rdiff, I thought I'd try my own little backup script. Since I'm very inexperienced with file management in Python, I'm sure there is lot of room for improvement.



The script creates the entire folder structure within the backup folder and then creates syncs recursively for all files within the source folders. Every file above a certain threshold gets gziped on the way.



A possible command would be:




python3 sync.py -target MY_BACKUPFOLDER -source IMPORTANT_1 IMPORTANT_2




Please tell me what you think of the source:



""" Simple backup script which just creates the root structure in an other
folder and syncs everything which recursevely lies within one of the source
folders. For files bigger than a threshold they are first gziped."""

import argparse
import gzip
import os
import shutil
import sys

def parse_input():
parser = argparse.ArgumentParser()
parser.add_argument('-target', nargs=1, required=True,
help='Target Backup folder')
parser.add_argument('-source', nargs='+', required=True,
help='Source Files to be added')
parser.add_argument('-compress', nargs=1, type=int,
help='Gzip threshold in bytes', default=[100000])

# no input means show me the help
if len(sys.argv) == 1:
parser.print_help()
sys.exit()

return parser.parse_args()


def size_if_newer(source, target):
""" If newer it returns size, otherwise it returns False """

src_stat = os.stat(source)
try:
target_ts = os.stat(target).st_mtime
except FileNotFoundError:
try:
target_ts = os.stat(target + '.gz').st_mtime
except FileNotFoundError:
target_ts = 0

# The time difference of one second is necessary since subsecond accuracy
# of os.st_mtime is striped by copy2
return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False


def sync_file(source, target, compress):
size = size_if_newer(source, target)

if size:
transfer_file(source, target, size > compress)


def transfer_file(source, target, compress):
""" Either copy or compress and copies the file """

try:
if compress:
with gzip.open(target + '.gz', 'wb') as target_fid:
with open(source, 'rb') as source_fid:
target_fid.writelines(source_fid)
print('Compress {}'.format(source))
else:
shutil.copy2(source, target)
print('Copy {}'.format(source))
except FileNotFoundError:
os.makedirs(os.path.dirname(target))
transfer_file(source, target, compress)


def sync_root(root, arg):
target = arg.target[0]
compress = arg.compress[0]

for path, _, files in os.walk(root):
for source in files:
source = path + '/' + source
sync_file(source, target + source, compress)


if __name__ == '__main__':
arg = parse_input()
print('### Start copy ####')
for root in arg.source:
sync_root(root, arg)
print('### Done ###')






python python-3.x file-system compression






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Nov 18 at 6:34









200_success

127k15148411




127k15148411










asked Aug 21 '15 at 23:10









magu_

463517




463517












  • For the record (even though it is a year later), rsync does support compression with the -z flag.
    – Graipher
    Dec 5 '16 at 11:32


















  • For the record (even though it is a year later), rsync does support compression with the -z flag.
    – Graipher
    Dec 5 '16 at 11:32
















For the record (even though it is a year later), rsync does support compression with the -z flag.
– Graipher
Dec 5 '16 at 11:32




For the record (even though it is a year later), rsync does support compression with the -z flag.
– Graipher
Dec 5 '16 at 11:32










2 Answers
2






active

oldest

votes

















up vote
8
down vote



accepted










This program is really a pleasure to read. It accomplishes the desired outcome neatly and succinctly. The only thing I could think of that might be an improvement is threading. Other than that this program looks fantastic.



I can see where making this program threaded can be a great benefit when dealing with smaller files. I'm not sure, but I fear with larger files or smaller buffering, having this program run copy routines in parallel might bring the system to a halt. I wrote up a minor modification to your excellent source code to illustrate my idea:



""" Simple backup script which just creates the root structure in an other
folder and syncs everything which recursevely lies within one of the source
folders. For files bigger than a threshold they are first gziped."""

import argparse
import gzip
import os
import shutil
import sys
import threading

def parse_input():
parser = argparse.ArgumentParser()
parser.add_argument('-target', nargs=1, required=True,
help='Target Backup folder')
parser.add_argument('-source', nargs='+', required=True,
help='Source Files to be added')
parser.add_argument('-compress', nargs=1, type=int,
help='Gzip threshold in bytes', default=[100000])

# no input means show me the help
if len(sys.argv) == 1:
parser.print_help()
sys.exit()

return parser.parse_args()


def size_if_newer(source, target):
""" If newer it returns size, otherwise it returns False """

src_stat = os.stat(source)
try:
target_ts = os.stat(target).st_mtime
except FileNotFoundError:
try:
target_ts = os.stat(target + '.gz').st_mtime
except FileNotFoundError:
target_ts = 0

# The time difference of one second is necessary since subsecond accuracy
# of os.st_mtime is striped by copy2
return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False

def threaded_sync_file(source, target, compress):
size = size_if_newer(source, target)

if size:
thread = threading.Thread(target=transfer_file,
args=(source, target, size > compress))
thread.start()
return thread

def sync_file(source, target, compress):
size = size_if_newer(source, target)

if size:
transfer_file(source, target, size > compress)


def transfer_file(source, target, compress):
""" Either copy or compress and copies the file """

try:
if compress:
with gzip.open(target + '.gz', 'wb') as target_fid:
with open(source, 'rb') as source_fid:
target_fid.writelines(source_fid)
print('Compress {}'.format(source))
else:
shutil.copy2(source, target)
print('Copy {}'.format(source))
except FileNotFoundError:
os.makedirs(os.path.dirname(target))
transfer_file(source, target, compress)


def sync_root(root, arg):
target = arg.target[0]
compress = arg.compress[0]
threads =

for path, _, files in os.walk(root):
for source in files:
source = path + '/' + source
threads.append(threaded_sync_file(source,
target + source, compress))
# sync_file(source, target + source, compress)
for thread in threads:
thread.join()


if __name__ == '__main__':
arg = parse_input()
print('### Start copy ####')
for root in arg.source:
sync_root(root, arg)
print('### Done ###')





share|improve this answer























  • Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
    – magu_
    Apr 1 '16 at 15:23










  • @magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
    – Turkhan Badalov
    Jun 4 at 17:55




















up vote
3
down vote













If the file size is 0, it ignores that file (when it returns 0 from the size function and uses that to determine if the file is newer). I noticed this when testing it and I created an empty file and saw it was not mirrored. I would think the right behavior should be that it should copy the empty file.



Also, it does not mirror deletions, if something is deleted in the source folder, it will not be deleted in the 'synced' folder






share|improve this answer





















    Your Answer





    StackExchange.ifUsing("editor", function () {
    return StackExchange.using("mathjaxEditing", function () {
    StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
    StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
    });
    });
    }, "mathjax-editing");

    StackExchange.ifUsing("editor", function () {
    StackExchange.using("externalEditor", function () {
    StackExchange.using("snippets", function () {
    StackExchange.snippets.init();
    });
    });
    }, "code-snippets");

    StackExchange.ready(function() {
    var channelOptions = {
    tags: "".split(" "),
    id: "196"
    };
    initTagRenderer("".split(" "), "".split(" "), channelOptions);

    StackExchange.using("externalEditor", function() {
    // Have to fire editor after snippets, if snippets enabled
    if (StackExchange.settings.snippets.snippetsEnabled) {
    StackExchange.using("snippets", function() {
    createEditor();
    });
    }
    else {
    createEditor();
    }
    });

    function createEditor() {
    StackExchange.prepareEditor({
    heartbeatType: 'answer',
    convertImagesToLinks: false,
    noModals: true,
    showLowRepImageUploadWarning: true,
    reputationToPostImages: null,
    bindNavPrevention: true,
    postfix: "",
    imageUploader: {
    brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
    contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
    allowUrls: true
    },
    onDemand: true,
    discardSelector: ".discard-answer"
    ,immediatelyShowMarkdownHelp:true
    });


    }
    });














     

    draft saved


    draft discarded


















    StackExchange.ready(
    function () {
    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f101616%2fsimple-backup-script-in-python%23new-answer', 'question_page');
    }
    );

    Post as a guest















    Required, but never shown

























    2 Answers
    2






    active

    oldest

    votes








    2 Answers
    2






    active

    oldest

    votes









    active

    oldest

    votes






    active

    oldest

    votes








    up vote
    8
    down vote



    accepted










    This program is really a pleasure to read. It accomplishes the desired outcome neatly and succinctly. The only thing I could think of that might be an improvement is threading. Other than that this program looks fantastic.



    I can see where making this program threaded can be a great benefit when dealing with smaller files. I'm not sure, but I fear with larger files or smaller buffering, having this program run copy routines in parallel might bring the system to a halt. I wrote up a minor modification to your excellent source code to illustrate my idea:



    """ Simple backup script which just creates the root structure in an other
    folder and syncs everything which recursevely lies within one of the source
    folders. For files bigger than a threshold they are first gziped."""

    import argparse
    import gzip
    import os
    import shutil
    import sys
    import threading

    def parse_input():
    parser = argparse.ArgumentParser()
    parser.add_argument('-target', nargs=1, required=True,
    help='Target Backup folder')
    parser.add_argument('-source', nargs='+', required=True,
    help='Source Files to be added')
    parser.add_argument('-compress', nargs=1, type=int,
    help='Gzip threshold in bytes', default=[100000])

    # no input means show me the help
    if len(sys.argv) == 1:
    parser.print_help()
    sys.exit()

    return parser.parse_args()


    def size_if_newer(source, target):
    """ If newer it returns size, otherwise it returns False """

    src_stat = os.stat(source)
    try:
    target_ts = os.stat(target).st_mtime
    except FileNotFoundError:
    try:
    target_ts = os.stat(target + '.gz').st_mtime
    except FileNotFoundError:
    target_ts = 0

    # The time difference of one second is necessary since subsecond accuracy
    # of os.st_mtime is striped by copy2
    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False

    def threaded_sync_file(source, target, compress):
    size = size_if_newer(source, target)

    if size:
    thread = threading.Thread(target=transfer_file,
    args=(source, target, size > compress))
    thread.start()
    return thread

    def sync_file(source, target, compress):
    size = size_if_newer(source, target)

    if size:
    transfer_file(source, target, size > compress)


    def transfer_file(source, target, compress):
    """ Either copy or compress and copies the file """

    try:
    if compress:
    with gzip.open(target + '.gz', 'wb') as target_fid:
    with open(source, 'rb') as source_fid:
    target_fid.writelines(source_fid)
    print('Compress {}'.format(source))
    else:
    shutil.copy2(source, target)
    print('Copy {}'.format(source))
    except FileNotFoundError:
    os.makedirs(os.path.dirname(target))
    transfer_file(source, target, compress)


    def sync_root(root, arg):
    target = arg.target[0]
    compress = arg.compress[0]
    threads =

    for path, _, files in os.walk(root):
    for source in files:
    source = path + '/' + source
    threads.append(threaded_sync_file(source,
    target + source, compress))
    # sync_file(source, target + source, compress)
    for thread in threads:
    thread.join()


    if __name__ == '__main__':
    arg = parse_input()
    print('### Start copy ####')
    for root in arg.source:
    sync_root(root, arg)
    print('### Done ###')





    share|improve this answer























    • Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
      – magu_
      Apr 1 '16 at 15:23










    • @magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
      – Turkhan Badalov
      Jun 4 at 17:55

















    up vote
    8
    down vote



    accepted










    This program is really a pleasure to read. It accomplishes the desired outcome neatly and succinctly. The only thing I could think of that might be an improvement is threading. Other than that this program looks fantastic.



    I can see where making this program threaded can be a great benefit when dealing with smaller files. I'm not sure, but I fear with larger files or smaller buffering, having this program run copy routines in parallel might bring the system to a halt. I wrote up a minor modification to your excellent source code to illustrate my idea:



    """ Simple backup script which just creates the root structure in an other
    folder and syncs everything which recursevely lies within one of the source
    folders. For files bigger than a threshold they are first gziped."""

    import argparse
    import gzip
    import os
    import shutil
    import sys
    import threading

    def parse_input():
    parser = argparse.ArgumentParser()
    parser.add_argument('-target', nargs=1, required=True,
    help='Target Backup folder')
    parser.add_argument('-source', nargs='+', required=True,
    help='Source Files to be added')
    parser.add_argument('-compress', nargs=1, type=int,
    help='Gzip threshold in bytes', default=[100000])

    # no input means show me the help
    if len(sys.argv) == 1:
    parser.print_help()
    sys.exit()

    return parser.parse_args()


    def size_if_newer(source, target):
    """ If newer it returns size, otherwise it returns False """

    src_stat = os.stat(source)
    try:
    target_ts = os.stat(target).st_mtime
    except FileNotFoundError:
    try:
    target_ts = os.stat(target + '.gz').st_mtime
    except FileNotFoundError:
    target_ts = 0

    # The time difference of one second is necessary since subsecond accuracy
    # of os.st_mtime is striped by copy2
    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False

    def threaded_sync_file(source, target, compress):
    size = size_if_newer(source, target)

    if size:
    thread = threading.Thread(target=transfer_file,
    args=(source, target, size > compress))
    thread.start()
    return thread

    def sync_file(source, target, compress):
    size = size_if_newer(source, target)

    if size:
    transfer_file(source, target, size > compress)


    def transfer_file(source, target, compress):
    """ Either copy or compress and copies the file """

    try:
    if compress:
    with gzip.open(target + '.gz', 'wb') as target_fid:
    with open(source, 'rb') as source_fid:
    target_fid.writelines(source_fid)
    print('Compress {}'.format(source))
    else:
    shutil.copy2(source, target)
    print('Copy {}'.format(source))
    except FileNotFoundError:
    os.makedirs(os.path.dirname(target))
    transfer_file(source, target, compress)


    def sync_root(root, arg):
    target = arg.target[0]
    compress = arg.compress[0]
    threads =

    for path, _, files in os.walk(root):
    for source in files:
    source = path + '/' + source
    threads.append(threaded_sync_file(source,
    target + source, compress))
    # sync_file(source, target + source, compress)
    for thread in threads:
    thread.join()


    if __name__ == '__main__':
    arg = parse_input()
    print('### Start copy ####')
    for root in arg.source:
    sync_root(root, arg)
    print('### Done ###')





    share|improve this answer























    • Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
      – magu_
      Apr 1 '16 at 15:23










    • @magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
      – Turkhan Badalov
      Jun 4 at 17:55















    up vote
    8
    down vote



    accepted







    up vote
    8
    down vote



    accepted






    This program is really a pleasure to read. It accomplishes the desired outcome neatly and succinctly. The only thing I could think of that might be an improvement is threading. Other than that this program looks fantastic.



    I can see where making this program threaded can be a great benefit when dealing with smaller files. I'm not sure, but I fear with larger files or smaller buffering, having this program run copy routines in parallel might bring the system to a halt. I wrote up a minor modification to your excellent source code to illustrate my idea:



    """ Simple backup script which just creates the root structure in an other
    folder and syncs everything which recursevely lies within one of the source
    folders. For files bigger than a threshold they are first gziped."""

    import argparse
    import gzip
    import os
    import shutil
    import sys
    import threading

    def parse_input():
    parser = argparse.ArgumentParser()
    parser.add_argument('-target', nargs=1, required=True,
    help='Target Backup folder')
    parser.add_argument('-source', nargs='+', required=True,
    help='Source Files to be added')
    parser.add_argument('-compress', nargs=1, type=int,
    help='Gzip threshold in bytes', default=[100000])

    # no input means show me the help
    if len(sys.argv) == 1:
    parser.print_help()
    sys.exit()

    return parser.parse_args()


    def size_if_newer(source, target):
    """ If newer it returns size, otherwise it returns False """

    src_stat = os.stat(source)
    try:
    target_ts = os.stat(target).st_mtime
    except FileNotFoundError:
    try:
    target_ts = os.stat(target + '.gz').st_mtime
    except FileNotFoundError:
    target_ts = 0

    # The time difference of one second is necessary since subsecond accuracy
    # of os.st_mtime is striped by copy2
    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False

    def threaded_sync_file(source, target, compress):
    size = size_if_newer(source, target)

    if size:
    thread = threading.Thread(target=transfer_file,
    args=(source, target, size > compress))
    thread.start()
    return thread

    def sync_file(source, target, compress):
    size = size_if_newer(source, target)

    if size:
    transfer_file(source, target, size > compress)


    def transfer_file(source, target, compress):
    """ Either copy or compress and copies the file """

    try:
    if compress:
    with gzip.open(target + '.gz', 'wb') as target_fid:
    with open(source, 'rb') as source_fid:
    target_fid.writelines(source_fid)
    print('Compress {}'.format(source))
    else:
    shutil.copy2(source, target)
    print('Copy {}'.format(source))
    except FileNotFoundError:
    os.makedirs(os.path.dirname(target))
    transfer_file(source, target, compress)


    def sync_root(root, arg):
    target = arg.target[0]
    compress = arg.compress[0]
    threads =

    for path, _, files in os.walk(root):
    for source in files:
    source = path + '/' + source
    threads.append(threaded_sync_file(source,
    target + source, compress))
    # sync_file(source, target + source, compress)
    for thread in threads:
    thread.join()


    if __name__ == '__main__':
    arg = parse_input()
    print('### Start copy ####')
    for root in arg.source:
    sync_root(root, arg)
    print('### Done ###')





    share|improve this answer














    This program is really a pleasure to read. It accomplishes the desired outcome neatly and succinctly. The only thing I could think of that might be an improvement is threading. Other than that this program looks fantastic.



    I can see where making this program threaded can be a great benefit when dealing with smaller files. I'm not sure, but I fear with larger files or smaller buffering, having this program run copy routines in parallel might bring the system to a halt. I wrote up a minor modification to your excellent source code to illustrate my idea:



    """ Simple backup script which just creates the root structure in an other
    folder and syncs everything which recursevely lies within one of the source
    folders. For files bigger than a threshold they are first gziped."""

    import argparse
    import gzip
    import os
    import shutil
    import sys
    import threading

    def parse_input():
    parser = argparse.ArgumentParser()
    parser.add_argument('-target', nargs=1, required=True,
    help='Target Backup folder')
    parser.add_argument('-source', nargs='+', required=True,
    help='Source Files to be added')
    parser.add_argument('-compress', nargs=1, type=int,
    help='Gzip threshold in bytes', default=[100000])

    # no input means show me the help
    if len(sys.argv) == 1:
    parser.print_help()
    sys.exit()

    return parser.parse_args()


    def size_if_newer(source, target):
    """ If newer it returns size, otherwise it returns False """

    src_stat = os.stat(source)
    try:
    target_ts = os.stat(target).st_mtime
    except FileNotFoundError:
    try:
    target_ts = os.stat(target + '.gz').st_mtime
    except FileNotFoundError:
    target_ts = 0

    # The time difference of one second is necessary since subsecond accuracy
    # of os.st_mtime is striped by copy2
    return src_stat.st_size if (src_stat.st_mtime - target_ts > 1) else False

    def threaded_sync_file(source, target, compress):
    size = size_if_newer(source, target)

    if size:
    thread = threading.Thread(target=transfer_file,
    args=(source, target, size > compress))
    thread.start()
    return thread

    def sync_file(source, target, compress):
    size = size_if_newer(source, target)

    if size:
    transfer_file(source, target, size > compress)


    def transfer_file(source, target, compress):
    """ Either copy or compress and copies the file """

    try:
    if compress:
    with gzip.open(target + '.gz', 'wb') as target_fid:
    with open(source, 'rb') as source_fid:
    target_fid.writelines(source_fid)
    print('Compress {}'.format(source))
    else:
    shutil.copy2(source, target)
    print('Copy {}'.format(source))
    except FileNotFoundError:
    os.makedirs(os.path.dirname(target))
    transfer_file(source, target, compress)


    def sync_root(root, arg):
    target = arg.target[0]
    compress = arg.compress[0]
    threads =

    for path, _, files in os.walk(root):
    for source in files:
    source = path + '/' + source
    threads.append(threaded_sync_file(source,
    target + source, compress))
    # sync_file(source, target + source, compress)
    for thread in threads:
    thread.join()


    if __name__ == '__main__':
    arg = parse_input()
    print('### Start copy ####')
    for root in arg.source:
    sync_root(root, arg)
    print('### Done ###')






    share|improve this answer














    share|improve this answer



    share|improve this answer








    edited Apr 1 '16 at 3:33

























    answered Apr 1 '16 at 3:25









    motoku

    1,11111139




    1,11111139












    • Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
      – magu_
      Apr 1 '16 at 15:23










    • @magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
      – Turkhan Badalov
      Jun 4 at 17:55




















    • Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
      – magu_
      Apr 1 '16 at 15:23










    • @magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
      – Turkhan Badalov
      Jun 4 at 17:55


















    Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
    – magu_
    Apr 1 '16 at 15:23




    Thanks for the kind words. Your are absolutely right about threading. Waiting for IO operations could slow down the entire process for small files.
    – magu_
    Apr 1 '16 at 15:23












    @magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
    – Turkhan Badalov
    Jun 4 at 17:55






    @magu_, could you please share some links or refer the direction where to dig to read about IO operations making the entire process slow and how threading helps?
    – Turkhan Badalov
    Jun 4 at 17:55














    up vote
    3
    down vote













    If the file size is 0, it ignores that file (when it returns 0 from the size function and uses that to determine if the file is newer). I noticed this when testing it and I created an empty file and saw it was not mirrored. I would think the right behavior should be that it should copy the empty file.



    Also, it does not mirror deletions, if something is deleted in the source folder, it will not be deleted in the 'synced' folder






    share|improve this answer

























      up vote
      3
      down vote













      If the file size is 0, it ignores that file (when it returns 0 from the size function and uses that to determine if the file is newer). I noticed this when testing it and I created an empty file and saw it was not mirrored. I would think the right behavior should be that it should copy the empty file.



      Also, it does not mirror deletions, if something is deleted in the source folder, it will not be deleted in the 'synced' folder






      share|improve this answer























        up vote
        3
        down vote










        up vote
        3
        down vote









        If the file size is 0, it ignores that file (when it returns 0 from the size function and uses that to determine if the file is newer). I noticed this when testing it and I created an empty file and saw it was not mirrored. I would think the right behavior should be that it should copy the empty file.



        Also, it does not mirror deletions, if something is deleted in the source folder, it will not be deleted in the 'synced' folder






        share|improve this answer












        If the file size is 0, it ignores that file (when it returns 0 from the size function and uses that to determine if the file is newer). I noticed this when testing it and I created an empty file and saw it was not mirrored. I would think the right behavior should be that it should copy the empty file.



        Also, it does not mirror deletions, if something is deleted in the source folder, it will not be deleted in the 'synced' folder







        share|improve this answer












        share|improve this answer



        share|improve this answer










        answered Nov 17 at 23:29









        aljgom

        1312




        1312






























             

            draft saved


            draft discarded



















































             


            draft saved


            draft discarded














            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f101616%2fsimple-backup-script-in-python%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown





















































            Required, but never shown














            Required, but never shown












            Required, but never shown







            Required, but never shown

































            Required, but never shown














            Required, but never shown












            Required, but never shown







            Required, but never shown







            Popular posts from this blog

            Terni

            A new problem with tex4ht and tikz

            Sun Ra