Django module to transform data from database to be displayed in Slickgrid tables











up vote
3
down vote

favorite












This is a Django tag with 2 classes, but can be viewed as a module:



What does the module do? it loads a database, performs some transformations on the data, returns the output as a dictionary of json strings for the frontend of django to represent in slickgrid tables



It's all working fine, but my class is too abstract and ended using a lot of static methods because there is actually no state that I need.



This first class shouldn't be important, don't think there is something here to improve. I basically have 4 types in 3 categories and want to go from 4 pandas dataframes to 12 json strings that are passed to django frontend:



class RenderTag:

@staticmethod
def get_context_data():

annotations = Annotations()
df_type_1_category_1, df_type_1_category_2, df_type_1_category_3 = annotations.filter_categories(annotations.df_type_1)
df_type_2_category_1, df_type_2_category_2, df_type_2_category_3 = annotations.filter_categories(annotations.df_type_2)
df_type_3_category_1, df_type_3_category_2, df_type_3_category_3 = annotations.filter_categories(annotations.df_type_3)
df_type_4_category_1, df_type_4_category_2, df_type_4_category_3 = annotations.filter_categories(annotations.df_type_4)

# json data for js tables
json_for_category_1_1 = df_type_1_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_2 = df_type_2_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_3 = df_type_3_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_4 = df_type_4_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_1 = df_type_1_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_2 = df_type_2_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_3 = df_type_3_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_4 = df_type_4_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_1 = df_type_1_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_2 = df_type_2_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_3 = df_type_3_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_4 = df_type_4_category_3.apply(lambda x: x.to_json(), axis=1)

context = {
"json_1_1": json_for_category_1_1.to_json(orient='split'),
"json_1_2": json_for_category_1_2.to_json(orient='split'),
"json_1_3": json_for_category_1_3.to_json(orient='split'),
"json_1_4": json_for_category_1_4.to_json(orient='split'),
"json_2_1": json_for_category_2_1.to_json(orient='split'),
"json_2_2": json_for_category_2_2.to_json(orient='split'),
"json_2_3": json_for_category_2_3.to_json(orient='split'),
"json_2_4": json_for_category_2_4.to_json(orient='split'),
"json_3_1": json_for_category_3_1.to_json(orient='split'),
"json_3_2": json_for_category_3_2.to_json(orient='split'),
"json_3_3": json_for_category_3_3.to_json(orient='split'),
"json_3_4": json_for_category_3_4.to_json(orient='split'),
}

return context


This class I think needs a lot of improvement:



class Annotations:

def __init__(self):

# loading data
self.df_type_2 = helpers.load_database("type_2").round(2)
self.df_type_3 = helpers.load_database("type_3").round(2)
self.df_type_1 = helpers.load_database("type_1").round(2)

# main transformations

# type_2 and 4
self.df_type_2, self.df_type_4 = self.split_2_into_2_and_4(self.df_type_2)
self.df_type_4 = self.do_transformations_for_4(self.df_type_4)
self.df_type_2 = self.do_transformations_for_2(self.df_type_2)

# type_1
self.df_type_1 = self.do_transformations_for_1(self.df_type_1)

# type_3
self.df_type_3 = self.do_transformations_for_3(self.df_type_3)

# and I have 4 methods that call a lot of static functions

def do_transformations_for_1(self, df):
"""
This is the main function that edits the data for type 1
We take the main df and then we run a series of manipulations

Args:
df(pd.DataFrame): the df that we want to process

Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df["id"] = df.index
df = df.pipe(self.do_something)
.pipe(self.do_something_1)
.pipe(self.do_something_2)
.pipe(self.do_something_3)
.pipe(self.do_something_4)
.pipe(self.do_something_5)
.pipe(self.add_colors_log2foldchange)
.pipe(self.fill_na_with_empty_strings)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_snv)

return df

def do_transformations_for_2(self, df):
"""
This is a function that runs only for type 2
We take the main df and then we run a series of manipulations

Args:
df(pd.DataFrame): the df that we want to process

Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something)
.pipe(self.add_colors_log2foldchange)
.pipe(self.do_something_7)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_4)

return df

def do_transformations_for_3(self, df):
"""
This is a function that runs only for type 3. We take the main df and then we run a series of manipulations

Args:
df(pd.DataFrame): the df that we want to process

Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something, False)
.pipe(self.do_something_9)
.pipe(self.add_colors_log2foldchange)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_3)

return df

def do_transformations_for_4(self, df):
"""
This is a function that runs only for the type_4
We take the main df and then we run a series of manipulations

Args:
df(pd.DataFrame): the df that we want to process

Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something, True)
.pipe(self.do_something_9)
.pipe(self.add_colors_log2foldchange)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_2)

return df

# many static methods that are only used once or twice, deleted many of them


@staticmethod
def unicode_lists_to_string(df, columns):
for column in columns:
df[column] = df[column].str.strip("").str.replace("u'|'",'').str.replace(",",";")

return df

@staticmethod
def transform_type_4_position(df: pd.DataFrame):
"""
Remove copy number from position in type_4 table and also add chr in front

Args:
df(pd.DataFrame):

Returns:
pd.DataFrame: with position modified
"""
df["vid_position"] = "chr" + df["vid"].str.split(":").str[:3].str.join(":")

return df

@staticmethod
def filter_categories(df):
"""
Split the df by categories because we want them in separate tables

Args:
df: main df

Returns:
Tuple[pd.DataFrame]: a tuple of 3 dataframes from categories 1,2,3

"""
df_1 = df[df["category"] == 1]
df_2 = df[df["category"] == 2]
df_3 = df[df["category"] == 3]

return df_1, df_2, df_3

@staticmethod
def add_colors_log2foldchange(df: pd.DataFrame):
"""
We want to add background colors to log2foldchange values, fron blue to red

Args:
df(pd.DataFrame): df with log2foldchange values

Returns:
df(pd.DataFrame): df with a new hex_color column
"""
df_new = helpers.add_colors_to_df(df, helpers.get_colors(), "log2_fold_change")
df_new["hex_color"] = df_new["hex_color"].str.replace("#", "")

return df_new

@staticmethod
def edit_support_alt_ref(df: pd.DataFrame) -> pd.DataFrame:
"""

Args:
df(pd.DataFrame):

Returns:
pd.DataFrame:
"""
def strip_germline_from_alt_ref(row):
if pd.notna(row):
if "]," in row:
row = row.split("],")
row = row[1]

row = row.replace("[", "").replace("]", "").replace(",", "|").replace(" ", "")

return row

df["paired_end_reads"] = df["paired_end_reads"].apply(strip_germline_from_alt_ref)
df["split_end_reads"] = df["split_end_reads"].apply(strip_germline_from_alt_ref)

return df


As you can see I do modifications for all 4 types in 4 methods.



I think I can go along with not using a class here, but sort of want to use a class here to be inline with the whole project... I use static methods because they are obviously easier to unit test. Pass a df and return a df, easy unit test.










share|improve this question




























    up vote
    3
    down vote

    favorite












    This is a Django tag with 2 classes, but can be viewed as a module:



    What does the module do? it loads a database, performs some transformations on the data, returns the output as a dictionary of json strings for the frontend of django to represent in slickgrid tables



    It's all working fine, but my class is too abstract and ended using a lot of static methods because there is actually no state that I need.



    This first class shouldn't be important, don't think there is something here to improve. I basically have 4 types in 3 categories and want to go from 4 pandas dataframes to 12 json strings that are passed to django frontend:



    class RenderTag:

    @staticmethod
    def get_context_data():

    annotations = Annotations()
    df_type_1_category_1, df_type_1_category_2, df_type_1_category_3 = annotations.filter_categories(annotations.df_type_1)
    df_type_2_category_1, df_type_2_category_2, df_type_2_category_3 = annotations.filter_categories(annotations.df_type_2)
    df_type_3_category_1, df_type_3_category_2, df_type_3_category_3 = annotations.filter_categories(annotations.df_type_3)
    df_type_4_category_1, df_type_4_category_2, df_type_4_category_3 = annotations.filter_categories(annotations.df_type_4)

    # json data for js tables
    json_for_category_1_1 = df_type_1_category_1.apply(lambda x: x.to_json(), axis=1)
    json_for_category_1_2 = df_type_2_category_1.apply(lambda x: x.to_json(), axis=1)
    json_for_category_1_3 = df_type_3_category_1.apply(lambda x: x.to_json(), axis=1)
    json_for_category_1_4 = df_type_4_category_1.apply(lambda x: x.to_json(), axis=1)
    json_for_category_2_1 = df_type_1_category_2.apply(lambda x: x.to_json(), axis=1)
    json_for_category_2_2 = df_type_2_category_2.apply(lambda x: x.to_json(), axis=1)
    json_for_category_2_3 = df_type_3_category_2.apply(lambda x: x.to_json(), axis=1)
    json_for_category_2_4 = df_type_4_category_2.apply(lambda x: x.to_json(), axis=1)
    json_for_category_3_1 = df_type_1_category_3.apply(lambda x: x.to_json(), axis=1)
    json_for_category_3_2 = df_type_2_category_3.apply(lambda x: x.to_json(), axis=1)
    json_for_category_3_3 = df_type_3_category_3.apply(lambda x: x.to_json(), axis=1)
    json_for_category_3_4 = df_type_4_category_3.apply(lambda x: x.to_json(), axis=1)

    context = {
    "json_1_1": json_for_category_1_1.to_json(orient='split'),
    "json_1_2": json_for_category_1_2.to_json(orient='split'),
    "json_1_3": json_for_category_1_3.to_json(orient='split'),
    "json_1_4": json_for_category_1_4.to_json(orient='split'),
    "json_2_1": json_for_category_2_1.to_json(orient='split'),
    "json_2_2": json_for_category_2_2.to_json(orient='split'),
    "json_2_3": json_for_category_2_3.to_json(orient='split'),
    "json_2_4": json_for_category_2_4.to_json(orient='split'),
    "json_3_1": json_for_category_3_1.to_json(orient='split'),
    "json_3_2": json_for_category_3_2.to_json(orient='split'),
    "json_3_3": json_for_category_3_3.to_json(orient='split'),
    "json_3_4": json_for_category_3_4.to_json(orient='split'),
    }

    return context


    This class I think needs a lot of improvement:



    class Annotations:

    def __init__(self):

    # loading data
    self.df_type_2 = helpers.load_database("type_2").round(2)
    self.df_type_3 = helpers.load_database("type_3").round(2)
    self.df_type_1 = helpers.load_database("type_1").round(2)

    # main transformations

    # type_2 and 4
    self.df_type_2, self.df_type_4 = self.split_2_into_2_and_4(self.df_type_2)
    self.df_type_4 = self.do_transformations_for_4(self.df_type_4)
    self.df_type_2 = self.do_transformations_for_2(self.df_type_2)

    # type_1
    self.df_type_1 = self.do_transformations_for_1(self.df_type_1)

    # type_3
    self.df_type_3 = self.do_transformations_for_3(self.df_type_3)

    # and I have 4 methods that call a lot of static functions

    def do_transformations_for_1(self, df):
    """
    This is the main function that edits the data for type 1
    We take the main df and then we run a series of manipulations

    Args:
    df(pd.DataFrame): the df that we want to process

    Returns:
    df(pd.DataFrame): the end dataframe that will be transferred to the js file
    """
    df["id"] = df.index
    df = df.pipe(self.do_something)
    .pipe(self.do_something_1)
    .pipe(self.do_something_2)
    .pipe(self.do_something_3)
    .pipe(self.do_something_4)
    .pipe(self.do_something_5)
    .pipe(self.add_colors_log2foldchange)
    .pipe(self.fill_na_with_empty_strings)
    .pipe(helpers.sort_df_by_columns, self.columns_to_sort_snv)

    return df

    def do_transformations_for_2(self, df):
    """
    This is a function that runs only for type 2
    We take the main df and then we run a series of manipulations

    Args:
    df(pd.DataFrame): the df that we want to process

    Returns:
    df(pd.DataFrame): the end dataframe that will be transferred to the js file
    """
    df = df.pipe(self.do_something)
    .pipe(self.add_colors_log2foldchange)
    .pipe(self.do_something_7)
    .pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_4)

    return df

    def do_transformations_for_3(self, df):
    """
    This is a function that runs only for type 3. We take the main df and then we run a series of manipulations

    Args:
    df(pd.DataFrame): the df that we want to process

    Returns:
    df(pd.DataFrame): the end dataframe that will be transferred to the js file
    """
    df = df.pipe(self.do_something, False)
    .pipe(self.do_something_9)
    .pipe(self.add_colors_log2foldchange)
    .pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_3)

    return df

    def do_transformations_for_4(self, df):
    """
    This is a function that runs only for the type_4
    We take the main df and then we run a series of manipulations

    Args:
    df(pd.DataFrame): the df that we want to process

    Returns:
    df(pd.DataFrame): the end dataframe that will be transferred to the js file
    """
    df = df.pipe(self.do_something, True)
    .pipe(self.do_something_9)
    .pipe(self.add_colors_log2foldchange)
    .pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_2)

    return df

    # many static methods that are only used once or twice, deleted many of them


    @staticmethod
    def unicode_lists_to_string(df, columns):
    for column in columns:
    df[column] = df[column].str.strip("").str.replace("u'|'",'').str.replace(",",";")

    return df

    @staticmethod
    def transform_type_4_position(df: pd.DataFrame):
    """
    Remove copy number from position in type_4 table and also add chr in front

    Args:
    df(pd.DataFrame):

    Returns:
    pd.DataFrame: with position modified
    """
    df["vid_position"] = "chr" + df["vid"].str.split(":").str[:3].str.join(":")

    return df

    @staticmethod
    def filter_categories(df):
    """
    Split the df by categories because we want them in separate tables

    Args:
    df: main df

    Returns:
    Tuple[pd.DataFrame]: a tuple of 3 dataframes from categories 1,2,3

    """
    df_1 = df[df["category"] == 1]
    df_2 = df[df["category"] == 2]
    df_3 = df[df["category"] == 3]

    return df_1, df_2, df_3

    @staticmethod
    def add_colors_log2foldchange(df: pd.DataFrame):
    """
    We want to add background colors to log2foldchange values, fron blue to red

    Args:
    df(pd.DataFrame): df with log2foldchange values

    Returns:
    df(pd.DataFrame): df with a new hex_color column
    """
    df_new = helpers.add_colors_to_df(df, helpers.get_colors(), "log2_fold_change")
    df_new["hex_color"] = df_new["hex_color"].str.replace("#", "")

    return df_new

    @staticmethod
    def edit_support_alt_ref(df: pd.DataFrame) -> pd.DataFrame:
    """

    Args:
    df(pd.DataFrame):

    Returns:
    pd.DataFrame:
    """
    def strip_germline_from_alt_ref(row):
    if pd.notna(row):
    if "]," in row:
    row = row.split("],")
    row = row[1]

    row = row.replace("[", "").replace("]", "").replace(",", "|").replace(" ", "")

    return row

    df["paired_end_reads"] = df["paired_end_reads"].apply(strip_germline_from_alt_ref)
    df["split_end_reads"] = df["split_end_reads"].apply(strip_germline_from_alt_ref)

    return df


    As you can see I do modifications for all 4 types in 4 methods.



    I think I can go along with not using a class here, but sort of want to use a class here to be inline with the whole project... I use static methods because they are obviously easier to unit test. Pass a df and return a df, easy unit test.










    share|improve this question


























      up vote
      3
      down vote

      favorite









      up vote
      3
      down vote

      favorite











      This is a Django tag with 2 classes, but can be viewed as a module:



      What does the module do? it loads a database, performs some transformations on the data, returns the output as a dictionary of json strings for the frontend of django to represent in slickgrid tables



      It's all working fine, but my class is too abstract and ended using a lot of static methods because there is actually no state that I need.



      This first class shouldn't be important, don't think there is something here to improve. I basically have 4 types in 3 categories and want to go from 4 pandas dataframes to 12 json strings that are passed to django frontend:



      class RenderTag:

      @staticmethod
      def get_context_data():

      annotations = Annotations()
      df_type_1_category_1, df_type_1_category_2, df_type_1_category_3 = annotations.filter_categories(annotations.df_type_1)
      df_type_2_category_1, df_type_2_category_2, df_type_2_category_3 = annotations.filter_categories(annotations.df_type_2)
      df_type_3_category_1, df_type_3_category_2, df_type_3_category_3 = annotations.filter_categories(annotations.df_type_3)
      df_type_4_category_1, df_type_4_category_2, df_type_4_category_3 = annotations.filter_categories(annotations.df_type_4)

      # json data for js tables
      json_for_category_1_1 = df_type_1_category_1.apply(lambda x: x.to_json(), axis=1)
      json_for_category_1_2 = df_type_2_category_1.apply(lambda x: x.to_json(), axis=1)
      json_for_category_1_3 = df_type_3_category_1.apply(lambda x: x.to_json(), axis=1)
      json_for_category_1_4 = df_type_4_category_1.apply(lambda x: x.to_json(), axis=1)
      json_for_category_2_1 = df_type_1_category_2.apply(lambda x: x.to_json(), axis=1)
      json_for_category_2_2 = df_type_2_category_2.apply(lambda x: x.to_json(), axis=1)
      json_for_category_2_3 = df_type_3_category_2.apply(lambda x: x.to_json(), axis=1)
      json_for_category_2_4 = df_type_4_category_2.apply(lambda x: x.to_json(), axis=1)
      json_for_category_3_1 = df_type_1_category_3.apply(lambda x: x.to_json(), axis=1)
      json_for_category_3_2 = df_type_2_category_3.apply(lambda x: x.to_json(), axis=1)
      json_for_category_3_3 = df_type_3_category_3.apply(lambda x: x.to_json(), axis=1)
      json_for_category_3_4 = df_type_4_category_3.apply(lambda x: x.to_json(), axis=1)

      context = {
      "json_1_1": json_for_category_1_1.to_json(orient='split'),
      "json_1_2": json_for_category_1_2.to_json(orient='split'),
      "json_1_3": json_for_category_1_3.to_json(orient='split'),
      "json_1_4": json_for_category_1_4.to_json(orient='split'),
      "json_2_1": json_for_category_2_1.to_json(orient='split'),
      "json_2_2": json_for_category_2_2.to_json(orient='split'),
      "json_2_3": json_for_category_2_3.to_json(orient='split'),
      "json_2_4": json_for_category_2_4.to_json(orient='split'),
      "json_3_1": json_for_category_3_1.to_json(orient='split'),
      "json_3_2": json_for_category_3_2.to_json(orient='split'),
      "json_3_3": json_for_category_3_3.to_json(orient='split'),
      "json_3_4": json_for_category_3_4.to_json(orient='split'),
      }

      return context


      This class I think needs a lot of improvement:



      class Annotations:

      def __init__(self):

      # loading data
      self.df_type_2 = helpers.load_database("type_2").round(2)
      self.df_type_3 = helpers.load_database("type_3").round(2)
      self.df_type_1 = helpers.load_database("type_1").round(2)

      # main transformations

      # type_2 and 4
      self.df_type_2, self.df_type_4 = self.split_2_into_2_and_4(self.df_type_2)
      self.df_type_4 = self.do_transformations_for_4(self.df_type_4)
      self.df_type_2 = self.do_transformations_for_2(self.df_type_2)

      # type_1
      self.df_type_1 = self.do_transformations_for_1(self.df_type_1)

      # type_3
      self.df_type_3 = self.do_transformations_for_3(self.df_type_3)

      # and I have 4 methods that call a lot of static functions

      def do_transformations_for_1(self, df):
      """
      This is the main function that edits the data for type 1
      We take the main df and then we run a series of manipulations

      Args:
      df(pd.DataFrame): the df that we want to process

      Returns:
      df(pd.DataFrame): the end dataframe that will be transferred to the js file
      """
      df["id"] = df.index
      df = df.pipe(self.do_something)
      .pipe(self.do_something_1)
      .pipe(self.do_something_2)
      .pipe(self.do_something_3)
      .pipe(self.do_something_4)
      .pipe(self.do_something_5)
      .pipe(self.add_colors_log2foldchange)
      .pipe(self.fill_na_with_empty_strings)
      .pipe(helpers.sort_df_by_columns, self.columns_to_sort_snv)

      return df

      def do_transformations_for_2(self, df):
      """
      This is a function that runs only for type 2
      We take the main df and then we run a series of manipulations

      Args:
      df(pd.DataFrame): the df that we want to process

      Returns:
      df(pd.DataFrame): the end dataframe that will be transferred to the js file
      """
      df = df.pipe(self.do_something)
      .pipe(self.add_colors_log2foldchange)
      .pipe(self.do_something_7)
      .pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_4)

      return df

      def do_transformations_for_3(self, df):
      """
      This is a function that runs only for type 3. We take the main df and then we run a series of manipulations

      Args:
      df(pd.DataFrame): the df that we want to process

      Returns:
      df(pd.DataFrame): the end dataframe that will be transferred to the js file
      """
      df = df.pipe(self.do_something, False)
      .pipe(self.do_something_9)
      .pipe(self.add_colors_log2foldchange)
      .pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_3)

      return df

      def do_transformations_for_4(self, df):
      """
      This is a function that runs only for the type_4
      We take the main df and then we run a series of manipulations

      Args:
      df(pd.DataFrame): the df that we want to process

      Returns:
      df(pd.DataFrame): the end dataframe that will be transferred to the js file
      """
      df = df.pipe(self.do_something, True)
      .pipe(self.do_something_9)
      .pipe(self.add_colors_log2foldchange)
      .pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_2)

      return df

      # many static methods that are only used once or twice, deleted many of them


      @staticmethod
      def unicode_lists_to_string(df, columns):
      for column in columns:
      df[column] = df[column].str.strip("").str.replace("u'|'",'').str.replace(",",";")

      return df

      @staticmethod
      def transform_type_4_position(df: pd.DataFrame):
      """
      Remove copy number from position in type_4 table and also add chr in front

      Args:
      df(pd.DataFrame):

      Returns:
      pd.DataFrame: with position modified
      """
      df["vid_position"] = "chr" + df["vid"].str.split(":").str[:3].str.join(":")

      return df

      @staticmethod
      def filter_categories(df):
      """
      Split the df by categories because we want them in separate tables

      Args:
      df: main df

      Returns:
      Tuple[pd.DataFrame]: a tuple of 3 dataframes from categories 1,2,3

      """
      df_1 = df[df["category"] == 1]
      df_2 = df[df["category"] == 2]
      df_3 = df[df["category"] == 3]

      return df_1, df_2, df_3

      @staticmethod
      def add_colors_log2foldchange(df: pd.DataFrame):
      """
      We want to add background colors to log2foldchange values, fron blue to red

      Args:
      df(pd.DataFrame): df with log2foldchange values

      Returns:
      df(pd.DataFrame): df with a new hex_color column
      """
      df_new = helpers.add_colors_to_df(df, helpers.get_colors(), "log2_fold_change")
      df_new["hex_color"] = df_new["hex_color"].str.replace("#", "")

      return df_new

      @staticmethod
      def edit_support_alt_ref(df: pd.DataFrame) -> pd.DataFrame:
      """

      Args:
      df(pd.DataFrame):

      Returns:
      pd.DataFrame:
      """
      def strip_germline_from_alt_ref(row):
      if pd.notna(row):
      if "]," in row:
      row = row.split("],")
      row = row[1]

      row = row.replace("[", "").replace("]", "").replace(",", "|").replace(" ", "")

      return row

      df["paired_end_reads"] = df["paired_end_reads"].apply(strip_germline_from_alt_ref)
      df["split_end_reads"] = df["split_end_reads"].apply(strip_germline_from_alt_ref)

      return df


      As you can see I do modifications for all 4 types in 4 methods.



      I think I can go along with not using a class here, but sort of want to use a class here to be inline with the whole project... I use static methods because they are obviously easier to unit test. Pass a df and return a df, easy unit test.










      share|improve this question















      This is a Django tag with 2 classes, but can be viewed as a module:



      What does the module do? it loads a database, performs some transformations on the data, returns the output as a dictionary of json strings for the frontend of django to represent in slickgrid tables



      It's all working fine, but my class is too abstract and ended using a lot of static methods because there is actually no state that I need.



      This first class shouldn't be important, don't think there is something here to improve. I basically have 4 types in 3 categories and want to go from 4 pandas dataframes to 12 json strings that are passed to django frontend:



      class RenderTag:

      @staticmethod
      def get_context_data():

      annotations = Annotations()
      df_type_1_category_1, df_type_1_category_2, df_type_1_category_3 = annotations.filter_categories(annotations.df_type_1)
      df_type_2_category_1, df_type_2_category_2, df_type_2_category_3 = annotations.filter_categories(annotations.df_type_2)
      df_type_3_category_1, df_type_3_category_2, df_type_3_category_3 = annotations.filter_categories(annotations.df_type_3)
      df_type_4_category_1, df_type_4_category_2, df_type_4_category_3 = annotations.filter_categories(annotations.df_type_4)

      # json data for js tables
      json_for_category_1_1 = df_type_1_category_1.apply(lambda x: x.to_json(), axis=1)
      json_for_category_1_2 = df_type_2_category_1.apply(lambda x: x.to_json(), axis=1)
      json_for_category_1_3 = df_type_3_category_1.apply(lambda x: x.to_json(), axis=1)
      json_for_category_1_4 = df_type_4_category_1.apply(lambda x: x.to_json(), axis=1)
      json_for_category_2_1 = df_type_1_category_2.apply(lambda x: x.to_json(), axis=1)
      json_for_category_2_2 = df_type_2_category_2.apply(lambda x: x.to_json(), axis=1)
      json_for_category_2_3 = df_type_3_category_2.apply(lambda x: x.to_json(), axis=1)
      json_for_category_2_4 = df_type_4_category_2.apply(lambda x: x.to_json(), axis=1)
      json_for_category_3_1 = df_type_1_category_3.apply(lambda x: x.to_json(), axis=1)
      json_for_category_3_2 = df_type_2_category_3.apply(lambda x: x.to_json(), axis=1)
      json_for_category_3_3 = df_type_3_category_3.apply(lambda x: x.to_json(), axis=1)
      json_for_category_3_4 = df_type_4_category_3.apply(lambda x: x.to_json(), axis=1)

      context = {
      "json_1_1": json_for_category_1_1.to_json(orient='split'),
      "json_1_2": json_for_category_1_2.to_json(orient='split'),
      "json_1_3": json_for_category_1_3.to_json(orient='split'),
      "json_1_4": json_for_category_1_4.to_json(orient='split'),
      "json_2_1": json_for_category_2_1.to_json(orient='split'),
      "json_2_2": json_for_category_2_2.to_json(orient='split'),
      "json_2_3": json_for_category_2_3.to_json(orient='split'),
      "json_2_4": json_for_category_2_4.to_json(orient='split'),
      "json_3_1": json_for_category_3_1.to_json(orient='split'),
      "json_3_2": json_for_category_3_2.to_json(orient='split'),
      "json_3_3": json_for_category_3_3.to_json(orient='split'),
      "json_3_4": json_for_category_3_4.to_json(orient='split'),
      }

      return context


      This class I think needs a lot of improvement:



      class Annotations:

      def __init__(self):

      # loading data
      self.df_type_2 = helpers.load_database("type_2").round(2)
      self.df_type_3 = helpers.load_database("type_3").round(2)
      self.df_type_1 = helpers.load_database("type_1").round(2)

      # main transformations

      # type_2 and 4
      self.df_type_2, self.df_type_4 = self.split_2_into_2_and_4(self.df_type_2)
      self.df_type_4 = self.do_transformations_for_4(self.df_type_4)
      self.df_type_2 = self.do_transformations_for_2(self.df_type_2)

      # type_1
      self.df_type_1 = self.do_transformations_for_1(self.df_type_1)

      # type_3
      self.df_type_3 = self.do_transformations_for_3(self.df_type_3)

      # and I have 4 methods that call a lot of static functions

      def do_transformations_for_1(self, df):
      """
      This is the main function that edits the data for type 1
      We take the main df and then we run a series of manipulations

      Args:
      df(pd.DataFrame): the df that we want to process

      Returns:
      df(pd.DataFrame): the end dataframe that will be transferred to the js file
      """
      df["id"] = df.index
      df = df.pipe(self.do_something)
      .pipe(self.do_something_1)
      .pipe(self.do_something_2)
      .pipe(self.do_something_3)
      .pipe(self.do_something_4)
      .pipe(self.do_something_5)
      .pipe(self.add_colors_log2foldchange)
      .pipe(self.fill_na_with_empty_strings)
      .pipe(helpers.sort_df_by_columns, self.columns_to_sort_snv)

      return df

      def do_transformations_for_2(self, df):
      """
      This is a function that runs only for type 2
      We take the main df and then we run a series of manipulations

      Args:
      df(pd.DataFrame): the df that we want to process

      Returns:
      df(pd.DataFrame): the end dataframe that will be transferred to the js file
      """
      df = df.pipe(self.do_something)
      .pipe(self.add_colors_log2foldchange)
      .pipe(self.do_something_7)
      .pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_4)

      return df

      def do_transformations_for_3(self, df):
      """
      This is a function that runs only for type 3. We take the main df and then we run a series of manipulations

      Args:
      df(pd.DataFrame): the df that we want to process

      Returns:
      df(pd.DataFrame): the end dataframe that will be transferred to the js file
      """
      df = df.pipe(self.do_something, False)
      .pipe(self.do_something_9)
      .pipe(self.add_colors_log2foldchange)
      .pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_3)

      return df

      def do_transformations_for_4(self, df):
      """
      This is a function that runs only for the type_4
      We take the main df and then we run a series of manipulations

      Args:
      df(pd.DataFrame): the df that we want to process

      Returns:
      df(pd.DataFrame): the end dataframe that will be transferred to the js file
      """
      df = df.pipe(self.do_something, True)
      .pipe(self.do_something_9)
      .pipe(self.add_colors_log2foldchange)
      .pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_2)

      return df

      # many static methods that are only used once or twice, deleted many of them


      @staticmethod
      def unicode_lists_to_string(df, columns):
      for column in columns:
      df[column] = df[column].str.strip("").str.replace("u'|'",'').str.replace(",",";")

      return df

      @staticmethod
      def transform_type_4_position(df: pd.DataFrame):
      """
      Remove copy number from position in type_4 table and also add chr in front

      Args:
      df(pd.DataFrame):

      Returns:
      pd.DataFrame: with position modified
      """
      df["vid_position"] = "chr" + df["vid"].str.split(":").str[:3].str.join(":")

      return df

      @staticmethod
      def filter_categories(df):
      """
      Split the df by categories because we want them in separate tables

      Args:
      df: main df

      Returns:
      Tuple[pd.DataFrame]: a tuple of 3 dataframes from categories 1,2,3

      """
      df_1 = df[df["category"] == 1]
      df_2 = df[df["category"] == 2]
      df_3 = df[df["category"] == 3]

      return df_1, df_2, df_3

      @staticmethod
      def add_colors_log2foldchange(df: pd.DataFrame):
      """
      We want to add background colors to log2foldchange values, fron blue to red

      Args:
      df(pd.DataFrame): df with log2foldchange values

      Returns:
      df(pd.DataFrame): df with a new hex_color column
      """
      df_new = helpers.add_colors_to_df(df, helpers.get_colors(), "log2_fold_change")
      df_new["hex_color"] = df_new["hex_color"].str.replace("#", "")

      return df_new

      @staticmethod
      def edit_support_alt_ref(df: pd.DataFrame) -> pd.DataFrame:
      """

      Args:
      df(pd.DataFrame):

      Returns:
      pd.DataFrame:
      """
      def strip_germline_from_alt_ref(row):
      if pd.notna(row):
      if "]," in row:
      row = row.split("],")
      row = row[1]

      row = row.replace("[", "").replace("]", "").replace(",", "|").replace(" ", "")

      return row

      df["paired_end_reads"] = df["paired_end_reads"].apply(strip_germline_from_alt_ref)
      df["split_end_reads"] = df["split_end_reads"].apply(strip_germline_from_alt_ref)

      return df


      As you can see I do modifications for all 4 types in 4 methods.



      I think I can go along with not using a class here, but sort of want to use a class here to be inline with the whole project... I use static methods because they are obviously easier to unit test. Pass a df and return a df, easy unit test.







      python pandas django






      share|improve this question















      share|improve this question













      share|improve this question




      share|improve this question








      edited Dec 11 at 17:27

























      asked Dec 11 at 17:23









      Claudiu Creanga

      210210




      210210






















          1 Answer
          1






          active

          oldest

          votes

















          up vote
          3
          down vote














          This first class shouldn't be important, don't think there is something here to improve.




          You're wrong :)



          There's a sea of repeated code here. You need to seriously DRY it up. I don't have enough of your system to test this myself, so you need to; but you should do something like



          class RenderTag:
          @staticmethod
          def get_context_data():
          annotations = Annotations()

          def to_json(x):
          return x.to_json()

          context = {}
          for i in range(1, 5):
          df_type = getattr(annotations, f'df_type_{i}')
          categories = annotations.filter_categories(df_type)
          for j, category in enumerate(categories, 1):
          js = category.apply(to_json, axis=1).to_json(orient='split')
          context[f'json{j}_{i}'] = js

          return context


          That assumes that Annotations cannot change. You can make it even simpler if Annotations.df_type is stored as a 3-tuple, instead of three separate attributes. Elsewhere in your code, you really should carry this philosophy forward - instead of hard-coding three or four variables with numbers in the name, just maintain one tuple (if immutable) or list (if mutable).






          share|improve this answer





















            Your Answer





            StackExchange.ifUsing("editor", function () {
            return StackExchange.using("mathjaxEditing", function () {
            StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
            StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
            });
            });
            }, "mathjax-editing");

            StackExchange.ifUsing("editor", function () {
            StackExchange.using("externalEditor", function () {
            StackExchange.using("snippets", function () {
            StackExchange.snippets.init();
            });
            });
            }, "code-snippets");

            StackExchange.ready(function() {
            var channelOptions = {
            tags: "".split(" "),
            id: "196"
            };
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function() {
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled) {
            StackExchange.using("snippets", function() {
            createEditor();
            });
            }
            else {
            createEditor();
            }
            });

            function createEditor() {
            StackExchange.prepareEditor({
            heartbeatType: 'answer',
            autoActivateHeartbeat: false,
            convertImagesToLinks: false,
            noModals: true,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: null,
            bindNavPrevention: true,
            postfix: "",
            imageUploader: {
            brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
            contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
            allowUrls: true
            },
            onDemand: true,
            discardSelector: ".discard-answer"
            ,immediatelyShowMarkdownHelp:true
            });


            }
            });














            draft saved

            draft discarded


















            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f209462%2fdjango-module-to-transform-data-from-database-to-be-displayed-in-slickgrid-table%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown

























            1 Answer
            1






            active

            oldest

            votes








            1 Answer
            1






            active

            oldest

            votes









            active

            oldest

            votes






            active

            oldest

            votes








            up vote
            3
            down vote














            This first class shouldn't be important, don't think there is something here to improve.




            You're wrong :)



            There's a sea of repeated code here. You need to seriously DRY it up. I don't have enough of your system to test this myself, so you need to; but you should do something like



            class RenderTag:
            @staticmethod
            def get_context_data():
            annotations = Annotations()

            def to_json(x):
            return x.to_json()

            context = {}
            for i in range(1, 5):
            df_type = getattr(annotations, f'df_type_{i}')
            categories = annotations.filter_categories(df_type)
            for j, category in enumerate(categories, 1):
            js = category.apply(to_json, axis=1).to_json(orient='split')
            context[f'json{j}_{i}'] = js

            return context


            That assumes that Annotations cannot change. You can make it even simpler if Annotations.df_type is stored as a 3-tuple, instead of three separate attributes. Elsewhere in your code, you really should carry this philosophy forward - instead of hard-coding three or four variables with numbers in the name, just maintain one tuple (if immutable) or list (if mutable).






            share|improve this answer

























              up vote
              3
              down vote














              This first class shouldn't be important, don't think there is something here to improve.




              You're wrong :)



              There's a sea of repeated code here. You need to seriously DRY it up. I don't have enough of your system to test this myself, so you need to; but you should do something like



              class RenderTag:
              @staticmethod
              def get_context_data():
              annotations = Annotations()

              def to_json(x):
              return x.to_json()

              context = {}
              for i in range(1, 5):
              df_type = getattr(annotations, f'df_type_{i}')
              categories = annotations.filter_categories(df_type)
              for j, category in enumerate(categories, 1):
              js = category.apply(to_json, axis=1).to_json(orient='split')
              context[f'json{j}_{i}'] = js

              return context


              That assumes that Annotations cannot change. You can make it even simpler if Annotations.df_type is stored as a 3-tuple, instead of three separate attributes. Elsewhere in your code, you really should carry this philosophy forward - instead of hard-coding three or four variables with numbers in the name, just maintain one tuple (if immutable) or list (if mutable).






              share|improve this answer























                up vote
                3
                down vote










                up vote
                3
                down vote










                This first class shouldn't be important, don't think there is something here to improve.




                You're wrong :)



                There's a sea of repeated code here. You need to seriously DRY it up. I don't have enough of your system to test this myself, so you need to; but you should do something like



                class RenderTag:
                @staticmethod
                def get_context_data():
                annotations = Annotations()

                def to_json(x):
                return x.to_json()

                context = {}
                for i in range(1, 5):
                df_type = getattr(annotations, f'df_type_{i}')
                categories = annotations.filter_categories(df_type)
                for j, category in enumerate(categories, 1):
                js = category.apply(to_json, axis=1).to_json(orient='split')
                context[f'json{j}_{i}'] = js

                return context


                That assumes that Annotations cannot change. You can make it even simpler if Annotations.df_type is stored as a 3-tuple, instead of three separate attributes. Elsewhere in your code, you really should carry this philosophy forward - instead of hard-coding three or four variables with numbers in the name, just maintain one tuple (if immutable) or list (if mutable).






                share|improve this answer













                This first class shouldn't be important, don't think there is something here to improve.




                You're wrong :)



                There's a sea of repeated code here. You need to seriously DRY it up. I don't have enough of your system to test this myself, so you need to; but you should do something like



                class RenderTag:
                @staticmethod
                def get_context_data():
                annotations = Annotations()

                def to_json(x):
                return x.to_json()

                context = {}
                for i in range(1, 5):
                df_type = getattr(annotations, f'df_type_{i}')
                categories = annotations.filter_categories(df_type)
                for j, category in enumerate(categories, 1):
                js = category.apply(to_json, axis=1).to_json(orient='split')
                context[f'json{j}_{i}'] = js

                return context


                That assumes that Annotations cannot change. You can make it even simpler if Annotations.df_type is stored as a 3-tuple, instead of three separate attributes. Elsewhere in your code, you really should carry this philosophy forward - instead of hard-coding three or four variables with numbers in the name, just maintain one tuple (if immutable) or list (if mutable).







                share|improve this answer












                share|improve this answer



                share|improve this answer










                answered Dec 11 at 22:22









                Reinderien

                2,047616




                2,047616






























                    draft saved

                    draft discarded




















































                    Thanks for contributing an answer to Code Review Stack Exchange!


                    • Please be sure to answer the question. Provide details and share your research!

                    But avoid



                    • Asking for help, clarification, or responding to other answers.

                    • Making statements based on opinion; back them up with references or personal experience.


                    Use MathJax to format equations. MathJax reference.


                    To learn more, see our tips on writing great answers.





                    Some of your past answers have not been well-received, and you're in danger of being blocked from answering.


                    Please pay close attention to the following guidance:


                    • Please be sure to answer the question. Provide details and share your research!

                    But avoid



                    • Asking for help, clarification, or responding to other answers.

                    • Making statements based on opinion; back them up with references or personal experience.


                    To learn more, see our tips on writing great answers.




                    draft saved


                    draft discarded














                    StackExchange.ready(
                    function () {
                    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f209462%2fdjango-module-to-transform-data-from-database-to-be-displayed-in-slickgrid-table%23new-answer', 'question_page');
                    }
                    );

                    Post as a guest















                    Required, but never shown





















































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown

































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown







                    Popular posts from this blog

                    Сан-Квентин

                    8-я гвардейская общевойсковая армия

                    Алькесар