Django module to transform data from database to be displayed in Slickgrid tables
up vote
3
down vote
favorite
This is a Django tag with 2 classes, but can be viewed as a module:
What does the module do? it loads a database, performs some transformations on the data, returns the output as a dictionary of json strings for the frontend of django to represent in slickgrid tables
It's all working fine, but my class is too abstract and ended using a lot of static methods because there is actually no state that I need.
This first class shouldn't be important, don't think there is something here to improve. I basically have 4 types in 3 categories and want to go from 4 pandas dataframes to 12 json strings that are passed to django frontend:
class RenderTag:
@staticmethod
def get_context_data():
annotations = Annotations()
df_type_1_category_1, df_type_1_category_2, df_type_1_category_3 = annotations.filter_categories(annotations.df_type_1)
df_type_2_category_1, df_type_2_category_2, df_type_2_category_3 = annotations.filter_categories(annotations.df_type_2)
df_type_3_category_1, df_type_3_category_2, df_type_3_category_3 = annotations.filter_categories(annotations.df_type_3)
df_type_4_category_1, df_type_4_category_2, df_type_4_category_3 = annotations.filter_categories(annotations.df_type_4)
# json data for js tables
json_for_category_1_1 = df_type_1_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_2 = df_type_2_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_3 = df_type_3_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_4 = df_type_4_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_1 = df_type_1_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_2 = df_type_2_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_3 = df_type_3_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_4 = df_type_4_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_1 = df_type_1_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_2 = df_type_2_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_3 = df_type_3_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_4 = df_type_4_category_3.apply(lambda x: x.to_json(), axis=1)
context = {
"json_1_1": json_for_category_1_1.to_json(orient='split'),
"json_1_2": json_for_category_1_2.to_json(orient='split'),
"json_1_3": json_for_category_1_3.to_json(orient='split'),
"json_1_4": json_for_category_1_4.to_json(orient='split'),
"json_2_1": json_for_category_2_1.to_json(orient='split'),
"json_2_2": json_for_category_2_2.to_json(orient='split'),
"json_2_3": json_for_category_2_3.to_json(orient='split'),
"json_2_4": json_for_category_2_4.to_json(orient='split'),
"json_3_1": json_for_category_3_1.to_json(orient='split'),
"json_3_2": json_for_category_3_2.to_json(orient='split'),
"json_3_3": json_for_category_3_3.to_json(orient='split'),
"json_3_4": json_for_category_3_4.to_json(orient='split'),
}
return context
This class I think needs a lot of improvement:
class Annotations:
def __init__(self):
# loading data
self.df_type_2 = helpers.load_database("type_2").round(2)
self.df_type_3 = helpers.load_database("type_3").round(2)
self.df_type_1 = helpers.load_database("type_1").round(2)
# main transformations
# type_2 and 4
self.df_type_2, self.df_type_4 = self.split_2_into_2_and_4(self.df_type_2)
self.df_type_4 = self.do_transformations_for_4(self.df_type_4)
self.df_type_2 = self.do_transformations_for_2(self.df_type_2)
# type_1
self.df_type_1 = self.do_transformations_for_1(self.df_type_1)
# type_3
self.df_type_3 = self.do_transformations_for_3(self.df_type_3)
# and I have 4 methods that call a lot of static functions
def do_transformations_for_1(self, df):
"""
This is the main function that edits the data for type 1
We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df["id"] = df.index
df = df.pipe(self.do_something)
.pipe(self.do_something_1)
.pipe(self.do_something_2)
.pipe(self.do_something_3)
.pipe(self.do_something_4)
.pipe(self.do_something_5)
.pipe(self.add_colors_log2foldchange)
.pipe(self.fill_na_with_empty_strings)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_snv)
return df
def do_transformations_for_2(self, df):
"""
This is a function that runs only for type 2
We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something)
.pipe(self.add_colors_log2foldchange)
.pipe(self.do_something_7)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_4)
return df
def do_transformations_for_3(self, df):
"""
This is a function that runs only for type 3. We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something, False)
.pipe(self.do_something_9)
.pipe(self.add_colors_log2foldchange)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_3)
return df
def do_transformations_for_4(self, df):
"""
This is a function that runs only for the type_4
We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something, True)
.pipe(self.do_something_9)
.pipe(self.add_colors_log2foldchange)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_2)
return df
# many static methods that are only used once or twice, deleted many of them
@staticmethod
def unicode_lists_to_string(df, columns):
for column in columns:
df[column] = df[column].str.strip("").str.replace("u'|'",'').str.replace(",",";")
return df
@staticmethod
def transform_type_4_position(df: pd.DataFrame):
"""
Remove copy number from position in type_4 table and also add chr in front
Args:
df(pd.DataFrame):
Returns:
pd.DataFrame: with position modified
"""
df["vid_position"] = "chr" + df["vid"].str.split(":").str[:3].str.join(":")
return df
@staticmethod
def filter_categories(df):
"""
Split the df by categories because we want them in separate tables
Args:
df: main df
Returns:
Tuple[pd.DataFrame]: a tuple of 3 dataframes from categories 1,2,3
"""
df_1 = df[df["category"] == 1]
df_2 = df[df["category"] == 2]
df_3 = df[df["category"] == 3]
return df_1, df_2, df_3
@staticmethod
def add_colors_log2foldchange(df: pd.DataFrame):
"""
We want to add background colors to log2foldchange values, fron blue to red
Args:
df(pd.DataFrame): df with log2foldchange values
Returns:
df(pd.DataFrame): df with a new hex_color column
"""
df_new = helpers.add_colors_to_df(df, helpers.get_colors(), "log2_fold_change")
df_new["hex_color"] = df_new["hex_color"].str.replace("#", "")
return df_new
@staticmethod
def edit_support_alt_ref(df: pd.DataFrame) -> pd.DataFrame:
"""
Args:
df(pd.DataFrame):
Returns:
pd.DataFrame:
"""
def strip_germline_from_alt_ref(row):
if pd.notna(row):
if "]," in row:
row = row.split("],")
row = row[1]
row = row.replace("[", "").replace("]", "").replace(",", "|").replace(" ", "")
return row
df["paired_end_reads"] = df["paired_end_reads"].apply(strip_germline_from_alt_ref)
df["split_end_reads"] = df["split_end_reads"].apply(strip_germline_from_alt_ref)
return df
As you can see I do modifications for all 4 types in 4 methods.
I think I can go along with not using a class here, but sort of want to use a class here to be inline with the whole project... I use static methods because they are obviously easier to unit test. Pass a df and return a df, easy unit test.
python pandas django
add a comment |
up vote
3
down vote
favorite
This is a Django tag with 2 classes, but can be viewed as a module:
What does the module do? it loads a database, performs some transformations on the data, returns the output as a dictionary of json strings for the frontend of django to represent in slickgrid tables
It's all working fine, but my class is too abstract and ended using a lot of static methods because there is actually no state that I need.
This first class shouldn't be important, don't think there is something here to improve. I basically have 4 types in 3 categories and want to go from 4 pandas dataframes to 12 json strings that are passed to django frontend:
class RenderTag:
@staticmethod
def get_context_data():
annotations = Annotations()
df_type_1_category_1, df_type_1_category_2, df_type_1_category_3 = annotations.filter_categories(annotations.df_type_1)
df_type_2_category_1, df_type_2_category_2, df_type_2_category_3 = annotations.filter_categories(annotations.df_type_2)
df_type_3_category_1, df_type_3_category_2, df_type_3_category_3 = annotations.filter_categories(annotations.df_type_3)
df_type_4_category_1, df_type_4_category_2, df_type_4_category_3 = annotations.filter_categories(annotations.df_type_4)
# json data for js tables
json_for_category_1_1 = df_type_1_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_2 = df_type_2_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_3 = df_type_3_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_4 = df_type_4_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_1 = df_type_1_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_2 = df_type_2_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_3 = df_type_3_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_4 = df_type_4_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_1 = df_type_1_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_2 = df_type_2_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_3 = df_type_3_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_4 = df_type_4_category_3.apply(lambda x: x.to_json(), axis=1)
context = {
"json_1_1": json_for_category_1_1.to_json(orient='split'),
"json_1_2": json_for_category_1_2.to_json(orient='split'),
"json_1_3": json_for_category_1_3.to_json(orient='split'),
"json_1_4": json_for_category_1_4.to_json(orient='split'),
"json_2_1": json_for_category_2_1.to_json(orient='split'),
"json_2_2": json_for_category_2_2.to_json(orient='split'),
"json_2_3": json_for_category_2_3.to_json(orient='split'),
"json_2_4": json_for_category_2_4.to_json(orient='split'),
"json_3_1": json_for_category_3_1.to_json(orient='split'),
"json_3_2": json_for_category_3_2.to_json(orient='split'),
"json_3_3": json_for_category_3_3.to_json(orient='split'),
"json_3_4": json_for_category_3_4.to_json(orient='split'),
}
return context
This class I think needs a lot of improvement:
class Annotations:
def __init__(self):
# loading data
self.df_type_2 = helpers.load_database("type_2").round(2)
self.df_type_3 = helpers.load_database("type_3").round(2)
self.df_type_1 = helpers.load_database("type_1").round(2)
# main transformations
# type_2 and 4
self.df_type_2, self.df_type_4 = self.split_2_into_2_and_4(self.df_type_2)
self.df_type_4 = self.do_transformations_for_4(self.df_type_4)
self.df_type_2 = self.do_transformations_for_2(self.df_type_2)
# type_1
self.df_type_1 = self.do_transformations_for_1(self.df_type_1)
# type_3
self.df_type_3 = self.do_transformations_for_3(self.df_type_3)
# and I have 4 methods that call a lot of static functions
def do_transformations_for_1(self, df):
"""
This is the main function that edits the data for type 1
We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df["id"] = df.index
df = df.pipe(self.do_something)
.pipe(self.do_something_1)
.pipe(self.do_something_2)
.pipe(self.do_something_3)
.pipe(self.do_something_4)
.pipe(self.do_something_5)
.pipe(self.add_colors_log2foldchange)
.pipe(self.fill_na_with_empty_strings)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_snv)
return df
def do_transformations_for_2(self, df):
"""
This is a function that runs only for type 2
We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something)
.pipe(self.add_colors_log2foldchange)
.pipe(self.do_something_7)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_4)
return df
def do_transformations_for_3(self, df):
"""
This is a function that runs only for type 3. We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something, False)
.pipe(self.do_something_9)
.pipe(self.add_colors_log2foldchange)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_3)
return df
def do_transformations_for_4(self, df):
"""
This is a function that runs only for the type_4
We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something, True)
.pipe(self.do_something_9)
.pipe(self.add_colors_log2foldchange)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_2)
return df
# many static methods that are only used once or twice, deleted many of them
@staticmethod
def unicode_lists_to_string(df, columns):
for column in columns:
df[column] = df[column].str.strip("").str.replace("u'|'",'').str.replace(",",";")
return df
@staticmethod
def transform_type_4_position(df: pd.DataFrame):
"""
Remove copy number from position in type_4 table and also add chr in front
Args:
df(pd.DataFrame):
Returns:
pd.DataFrame: with position modified
"""
df["vid_position"] = "chr" + df["vid"].str.split(":").str[:3].str.join(":")
return df
@staticmethod
def filter_categories(df):
"""
Split the df by categories because we want them in separate tables
Args:
df: main df
Returns:
Tuple[pd.DataFrame]: a tuple of 3 dataframes from categories 1,2,3
"""
df_1 = df[df["category"] == 1]
df_2 = df[df["category"] == 2]
df_3 = df[df["category"] == 3]
return df_1, df_2, df_3
@staticmethod
def add_colors_log2foldchange(df: pd.DataFrame):
"""
We want to add background colors to log2foldchange values, fron blue to red
Args:
df(pd.DataFrame): df with log2foldchange values
Returns:
df(pd.DataFrame): df with a new hex_color column
"""
df_new = helpers.add_colors_to_df(df, helpers.get_colors(), "log2_fold_change")
df_new["hex_color"] = df_new["hex_color"].str.replace("#", "")
return df_new
@staticmethod
def edit_support_alt_ref(df: pd.DataFrame) -> pd.DataFrame:
"""
Args:
df(pd.DataFrame):
Returns:
pd.DataFrame:
"""
def strip_germline_from_alt_ref(row):
if pd.notna(row):
if "]," in row:
row = row.split("],")
row = row[1]
row = row.replace("[", "").replace("]", "").replace(",", "|").replace(" ", "")
return row
df["paired_end_reads"] = df["paired_end_reads"].apply(strip_germline_from_alt_ref)
df["split_end_reads"] = df["split_end_reads"].apply(strip_germline_from_alt_ref)
return df
As you can see I do modifications for all 4 types in 4 methods.
I think I can go along with not using a class here, but sort of want to use a class here to be inline with the whole project... I use static methods because they are obviously easier to unit test. Pass a df and return a df, easy unit test.
python pandas django
add a comment |
up vote
3
down vote
favorite
up vote
3
down vote
favorite
This is a Django tag with 2 classes, but can be viewed as a module:
What does the module do? it loads a database, performs some transformations on the data, returns the output as a dictionary of json strings for the frontend of django to represent in slickgrid tables
It's all working fine, but my class is too abstract and ended using a lot of static methods because there is actually no state that I need.
This first class shouldn't be important, don't think there is something here to improve. I basically have 4 types in 3 categories and want to go from 4 pandas dataframes to 12 json strings that are passed to django frontend:
class RenderTag:
@staticmethod
def get_context_data():
annotations = Annotations()
df_type_1_category_1, df_type_1_category_2, df_type_1_category_3 = annotations.filter_categories(annotations.df_type_1)
df_type_2_category_1, df_type_2_category_2, df_type_2_category_3 = annotations.filter_categories(annotations.df_type_2)
df_type_3_category_1, df_type_3_category_2, df_type_3_category_3 = annotations.filter_categories(annotations.df_type_3)
df_type_4_category_1, df_type_4_category_2, df_type_4_category_3 = annotations.filter_categories(annotations.df_type_4)
# json data for js tables
json_for_category_1_1 = df_type_1_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_2 = df_type_2_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_3 = df_type_3_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_4 = df_type_4_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_1 = df_type_1_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_2 = df_type_2_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_3 = df_type_3_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_4 = df_type_4_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_1 = df_type_1_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_2 = df_type_2_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_3 = df_type_3_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_4 = df_type_4_category_3.apply(lambda x: x.to_json(), axis=1)
context = {
"json_1_1": json_for_category_1_1.to_json(orient='split'),
"json_1_2": json_for_category_1_2.to_json(orient='split'),
"json_1_3": json_for_category_1_3.to_json(orient='split'),
"json_1_4": json_for_category_1_4.to_json(orient='split'),
"json_2_1": json_for_category_2_1.to_json(orient='split'),
"json_2_2": json_for_category_2_2.to_json(orient='split'),
"json_2_3": json_for_category_2_3.to_json(orient='split'),
"json_2_4": json_for_category_2_4.to_json(orient='split'),
"json_3_1": json_for_category_3_1.to_json(orient='split'),
"json_3_2": json_for_category_3_2.to_json(orient='split'),
"json_3_3": json_for_category_3_3.to_json(orient='split'),
"json_3_4": json_for_category_3_4.to_json(orient='split'),
}
return context
This class I think needs a lot of improvement:
class Annotations:
def __init__(self):
# loading data
self.df_type_2 = helpers.load_database("type_2").round(2)
self.df_type_3 = helpers.load_database("type_3").round(2)
self.df_type_1 = helpers.load_database("type_1").round(2)
# main transformations
# type_2 and 4
self.df_type_2, self.df_type_4 = self.split_2_into_2_and_4(self.df_type_2)
self.df_type_4 = self.do_transformations_for_4(self.df_type_4)
self.df_type_2 = self.do_transformations_for_2(self.df_type_2)
# type_1
self.df_type_1 = self.do_transformations_for_1(self.df_type_1)
# type_3
self.df_type_3 = self.do_transformations_for_3(self.df_type_3)
# and I have 4 methods that call a lot of static functions
def do_transformations_for_1(self, df):
"""
This is the main function that edits the data for type 1
We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df["id"] = df.index
df = df.pipe(self.do_something)
.pipe(self.do_something_1)
.pipe(self.do_something_2)
.pipe(self.do_something_3)
.pipe(self.do_something_4)
.pipe(self.do_something_5)
.pipe(self.add_colors_log2foldchange)
.pipe(self.fill_na_with_empty_strings)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_snv)
return df
def do_transformations_for_2(self, df):
"""
This is a function that runs only for type 2
We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something)
.pipe(self.add_colors_log2foldchange)
.pipe(self.do_something_7)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_4)
return df
def do_transformations_for_3(self, df):
"""
This is a function that runs only for type 3. We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something, False)
.pipe(self.do_something_9)
.pipe(self.add_colors_log2foldchange)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_3)
return df
def do_transformations_for_4(self, df):
"""
This is a function that runs only for the type_4
We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something, True)
.pipe(self.do_something_9)
.pipe(self.add_colors_log2foldchange)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_2)
return df
# many static methods that are only used once or twice, deleted many of them
@staticmethod
def unicode_lists_to_string(df, columns):
for column in columns:
df[column] = df[column].str.strip("").str.replace("u'|'",'').str.replace(",",";")
return df
@staticmethod
def transform_type_4_position(df: pd.DataFrame):
"""
Remove copy number from position in type_4 table and also add chr in front
Args:
df(pd.DataFrame):
Returns:
pd.DataFrame: with position modified
"""
df["vid_position"] = "chr" + df["vid"].str.split(":").str[:3].str.join(":")
return df
@staticmethod
def filter_categories(df):
"""
Split the df by categories because we want them in separate tables
Args:
df: main df
Returns:
Tuple[pd.DataFrame]: a tuple of 3 dataframes from categories 1,2,3
"""
df_1 = df[df["category"] == 1]
df_2 = df[df["category"] == 2]
df_3 = df[df["category"] == 3]
return df_1, df_2, df_3
@staticmethod
def add_colors_log2foldchange(df: pd.DataFrame):
"""
We want to add background colors to log2foldchange values, fron blue to red
Args:
df(pd.DataFrame): df with log2foldchange values
Returns:
df(pd.DataFrame): df with a new hex_color column
"""
df_new = helpers.add_colors_to_df(df, helpers.get_colors(), "log2_fold_change")
df_new["hex_color"] = df_new["hex_color"].str.replace("#", "")
return df_new
@staticmethod
def edit_support_alt_ref(df: pd.DataFrame) -> pd.DataFrame:
"""
Args:
df(pd.DataFrame):
Returns:
pd.DataFrame:
"""
def strip_germline_from_alt_ref(row):
if pd.notna(row):
if "]," in row:
row = row.split("],")
row = row[1]
row = row.replace("[", "").replace("]", "").replace(",", "|").replace(" ", "")
return row
df["paired_end_reads"] = df["paired_end_reads"].apply(strip_germline_from_alt_ref)
df["split_end_reads"] = df["split_end_reads"].apply(strip_germline_from_alt_ref)
return df
As you can see I do modifications for all 4 types in 4 methods.
I think I can go along with not using a class here, but sort of want to use a class here to be inline with the whole project... I use static methods because they are obviously easier to unit test. Pass a df and return a df, easy unit test.
python pandas django
This is a Django tag with 2 classes, but can be viewed as a module:
What does the module do? it loads a database, performs some transformations on the data, returns the output as a dictionary of json strings for the frontend of django to represent in slickgrid tables
It's all working fine, but my class is too abstract and ended using a lot of static methods because there is actually no state that I need.
This first class shouldn't be important, don't think there is something here to improve. I basically have 4 types in 3 categories and want to go from 4 pandas dataframes to 12 json strings that are passed to django frontend:
class RenderTag:
@staticmethod
def get_context_data():
annotations = Annotations()
df_type_1_category_1, df_type_1_category_2, df_type_1_category_3 = annotations.filter_categories(annotations.df_type_1)
df_type_2_category_1, df_type_2_category_2, df_type_2_category_3 = annotations.filter_categories(annotations.df_type_2)
df_type_3_category_1, df_type_3_category_2, df_type_3_category_3 = annotations.filter_categories(annotations.df_type_3)
df_type_4_category_1, df_type_4_category_2, df_type_4_category_3 = annotations.filter_categories(annotations.df_type_4)
# json data for js tables
json_for_category_1_1 = df_type_1_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_2 = df_type_2_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_3 = df_type_3_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_1_4 = df_type_4_category_1.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_1 = df_type_1_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_2 = df_type_2_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_3 = df_type_3_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_2_4 = df_type_4_category_2.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_1 = df_type_1_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_2 = df_type_2_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_3 = df_type_3_category_3.apply(lambda x: x.to_json(), axis=1)
json_for_category_3_4 = df_type_4_category_3.apply(lambda x: x.to_json(), axis=1)
context = {
"json_1_1": json_for_category_1_1.to_json(orient='split'),
"json_1_2": json_for_category_1_2.to_json(orient='split'),
"json_1_3": json_for_category_1_3.to_json(orient='split'),
"json_1_4": json_for_category_1_4.to_json(orient='split'),
"json_2_1": json_for_category_2_1.to_json(orient='split'),
"json_2_2": json_for_category_2_2.to_json(orient='split'),
"json_2_3": json_for_category_2_3.to_json(orient='split'),
"json_2_4": json_for_category_2_4.to_json(orient='split'),
"json_3_1": json_for_category_3_1.to_json(orient='split'),
"json_3_2": json_for_category_3_2.to_json(orient='split'),
"json_3_3": json_for_category_3_3.to_json(orient='split'),
"json_3_4": json_for_category_3_4.to_json(orient='split'),
}
return context
This class I think needs a lot of improvement:
class Annotations:
def __init__(self):
# loading data
self.df_type_2 = helpers.load_database("type_2").round(2)
self.df_type_3 = helpers.load_database("type_3").round(2)
self.df_type_1 = helpers.load_database("type_1").round(2)
# main transformations
# type_2 and 4
self.df_type_2, self.df_type_4 = self.split_2_into_2_and_4(self.df_type_2)
self.df_type_4 = self.do_transformations_for_4(self.df_type_4)
self.df_type_2 = self.do_transformations_for_2(self.df_type_2)
# type_1
self.df_type_1 = self.do_transformations_for_1(self.df_type_1)
# type_3
self.df_type_3 = self.do_transformations_for_3(self.df_type_3)
# and I have 4 methods that call a lot of static functions
def do_transformations_for_1(self, df):
"""
This is the main function that edits the data for type 1
We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df["id"] = df.index
df = df.pipe(self.do_something)
.pipe(self.do_something_1)
.pipe(self.do_something_2)
.pipe(self.do_something_3)
.pipe(self.do_something_4)
.pipe(self.do_something_5)
.pipe(self.add_colors_log2foldchange)
.pipe(self.fill_na_with_empty_strings)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_snv)
return df
def do_transformations_for_2(self, df):
"""
This is a function that runs only for type 2
We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something)
.pipe(self.add_colors_log2foldchange)
.pipe(self.do_something_7)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_4)
return df
def do_transformations_for_3(self, df):
"""
This is a function that runs only for type 3. We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something, False)
.pipe(self.do_something_9)
.pipe(self.add_colors_log2foldchange)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_3)
return df
def do_transformations_for_4(self, df):
"""
This is a function that runs only for the type_4
We take the main df and then we run a series of manipulations
Args:
df(pd.DataFrame): the df that we want to process
Returns:
df(pd.DataFrame): the end dataframe that will be transferred to the js file
"""
df = df.pipe(self.do_something, True)
.pipe(self.do_something_9)
.pipe(self.add_colors_log2foldchange)
.pipe(helpers.sort_df_by_columns, self.columns_to_sort_type_2)
return df
# many static methods that are only used once or twice, deleted many of them
@staticmethod
def unicode_lists_to_string(df, columns):
for column in columns:
df[column] = df[column].str.strip("").str.replace("u'|'",'').str.replace(",",";")
return df
@staticmethod
def transform_type_4_position(df: pd.DataFrame):
"""
Remove copy number from position in type_4 table and also add chr in front
Args:
df(pd.DataFrame):
Returns:
pd.DataFrame: with position modified
"""
df["vid_position"] = "chr" + df["vid"].str.split(":").str[:3].str.join(":")
return df
@staticmethod
def filter_categories(df):
"""
Split the df by categories because we want them in separate tables
Args:
df: main df
Returns:
Tuple[pd.DataFrame]: a tuple of 3 dataframes from categories 1,2,3
"""
df_1 = df[df["category"] == 1]
df_2 = df[df["category"] == 2]
df_3 = df[df["category"] == 3]
return df_1, df_2, df_3
@staticmethod
def add_colors_log2foldchange(df: pd.DataFrame):
"""
We want to add background colors to log2foldchange values, fron blue to red
Args:
df(pd.DataFrame): df with log2foldchange values
Returns:
df(pd.DataFrame): df with a new hex_color column
"""
df_new = helpers.add_colors_to_df(df, helpers.get_colors(), "log2_fold_change")
df_new["hex_color"] = df_new["hex_color"].str.replace("#", "")
return df_new
@staticmethod
def edit_support_alt_ref(df: pd.DataFrame) -> pd.DataFrame:
"""
Args:
df(pd.DataFrame):
Returns:
pd.DataFrame:
"""
def strip_germline_from_alt_ref(row):
if pd.notna(row):
if "]," in row:
row = row.split("],")
row = row[1]
row = row.replace("[", "").replace("]", "").replace(",", "|").replace(" ", "")
return row
df["paired_end_reads"] = df["paired_end_reads"].apply(strip_germline_from_alt_ref)
df["split_end_reads"] = df["split_end_reads"].apply(strip_germline_from_alt_ref)
return df
As you can see I do modifications for all 4 types in 4 methods.
I think I can go along with not using a class here, but sort of want to use a class here to be inline with the whole project... I use static methods because they are obviously easier to unit test. Pass a df and return a df, easy unit test.
python pandas django
python pandas django
edited Dec 11 at 17:27
asked Dec 11 at 17:23
Claudiu Creanga
210210
210210
add a comment |
add a comment |
1 Answer
1
active
oldest
votes
up vote
3
down vote
This first class shouldn't be important, don't think there is something here to improve.
You're wrong :)
There's a sea of repeated code here. You need to seriously DRY it up. I don't have enough of your system to test this myself, so you need to; but you should do something like
class RenderTag:
@staticmethod
def get_context_data():
annotations = Annotations()
def to_json(x):
return x.to_json()
context = {}
for i in range(1, 5):
df_type = getattr(annotations, f'df_type_{i}')
categories = annotations.filter_categories(df_type)
for j, category in enumerate(categories, 1):
js = category.apply(to_json, axis=1).to_json(orient='split')
context[f'json{j}_{i}'] = js
return context
That assumes that Annotations
cannot change. You can make it even simpler if Annotations.df_type
is stored as a 3-tuple, instead of three separate attributes. Elsewhere in your code, you really should carry this philosophy forward - instead of hard-coding three or four variables with numbers in the name, just maintain one tuple (if immutable) or list (if mutable).
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f209462%2fdjango-module-to-transform-data-from-database-to-be-displayed-in-slickgrid-table%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
3
down vote
This first class shouldn't be important, don't think there is something here to improve.
You're wrong :)
There's a sea of repeated code here. You need to seriously DRY it up. I don't have enough of your system to test this myself, so you need to; but you should do something like
class RenderTag:
@staticmethod
def get_context_data():
annotations = Annotations()
def to_json(x):
return x.to_json()
context = {}
for i in range(1, 5):
df_type = getattr(annotations, f'df_type_{i}')
categories = annotations.filter_categories(df_type)
for j, category in enumerate(categories, 1):
js = category.apply(to_json, axis=1).to_json(orient='split')
context[f'json{j}_{i}'] = js
return context
That assumes that Annotations
cannot change. You can make it even simpler if Annotations.df_type
is stored as a 3-tuple, instead of three separate attributes. Elsewhere in your code, you really should carry this philosophy forward - instead of hard-coding three or four variables with numbers in the name, just maintain one tuple (if immutable) or list (if mutable).
add a comment |
up vote
3
down vote
This first class shouldn't be important, don't think there is something here to improve.
You're wrong :)
There's a sea of repeated code here. You need to seriously DRY it up. I don't have enough of your system to test this myself, so you need to; but you should do something like
class RenderTag:
@staticmethod
def get_context_data():
annotations = Annotations()
def to_json(x):
return x.to_json()
context = {}
for i in range(1, 5):
df_type = getattr(annotations, f'df_type_{i}')
categories = annotations.filter_categories(df_type)
for j, category in enumerate(categories, 1):
js = category.apply(to_json, axis=1).to_json(orient='split')
context[f'json{j}_{i}'] = js
return context
That assumes that Annotations
cannot change. You can make it even simpler if Annotations.df_type
is stored as a 3-tuple, instead of three separate attributes. Elsewhere in your code, you really should carry this philosophy forward - instead of hard-coding three or four variables with numbers in the name, just maintain one tuple (if immutable) or list (if mutable).
add a comment |
up vote
3
down vote
up vote
3
down vote
This first class shouldn't be important, don't think there is something here to improve.
You're wrong :)
There's a sea of repeated code here. You need to seriously DRY it up. I don't have enough of your system to test this myself, so you need to; but you should do something like
class RenderTag:
@staticmethod
def get_context_data():
annotations = Annotations()
def to_json(x):
return x.to_json()
context = {}
for i in range(1, 5):
df_type = getattr(annotations, f'df_type_{i}')
categories = annotations.filter_categories(df_type)
for j, category in enumerate(categories, 1):
js = category.apply(to_json, axis=1).to_json(orient='split')
context[f'json{j}_{i}'] = js
return context
That assumes that Annotations
cannot change. You can make it even simpler if Annotations.df_type
is stored as a 3-tuple, instead of three separate attributes. Elsewhere in your code, you really should carry this philosophy forward - instead of hard-coding three or four variables with numbers in the name, just maintain one tuple (if immutable) or list (if mutable).
This first class shouldn't be important, don't think there is something here to improve.
You're wrong :)
There's a sea of repeated code here. You need to seriously DRY it up. I don't have enough of your system to test this myself, so you need to; but you should do something like
class RenderTag:
@staticmethod
def get_context_data():
annotations = Annotations()
def to_json(x):
return x.to_json()
context = {}
for i in range(1, 5):
df_type = getattr(annotations, f'df_type_{i}')
categories = annotations.filter_categories(df_type)
for j, category in enumerate(categories, 1):
js = category.apply(to_json, axis=1).to_json(orient='split')
context[f'json{j}_{i}'] = js
return context
That assumes that Annotations
cannot change. You can make it even simpler if Annotations.df_type
is stored as a 3-tuple, instead of three separate attributes. Elsewhere in your code, you really should carry this philosophy forward - instead of hard-coding three or four variables with numbers in the name, just maintain one tuple (if immutable) or list (if mutable).
answered Dec 11 at 22:22
Reinderien
2,047616
2,047616
add a comment |
add a comment |
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f209462%2fdjango-module-to-transform-data-from-database-to-be-displayed-in-slickgrid-table%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown