Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 29 additions & 38 deletions Script/PRONTO.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
from decimal import Decimal
from copy import deepcopy
import pronto.pronto as pronto
import pandas
import math
from pdf2image import convert_from_path

runID = ""
Expand Down Expand Up @@ -731,45 +733,34 @@ def insert_image_to_ppt(DNA_sampleID,DNA_normal_sampleID,RNA_sampleID,DNA_image_
ppt.save(output_ppt_file)


def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):
table_file = open(table_data_file)
lines = table_file.readlines()
if not lines:
def insert_table_to_ppt(table_file,slide_n,table_name,left_h,top_h,width_h,left_t,top_t,width_t,height_t,font_size,table_header,output_ppt_file,if_print_rowNo,table_column_width,table_max_rows_per_slide):

# load table data
try:
table_data = pandas.read_csv(table_file, sep='\t')
except pandas.errors.EmptyDataError:
logging.warning("{} is empty".format(table_file))
return
first_line = lines[0]
first_line_cells = first_line.split('\t')

# add empty columns for missing header columns and move additional columns to the right
table_data = pronto.normalize_column_index(table_data, table_header)

# determine column and row number
cols = len(table_header)
header_not_exist_in_table = []
for n in range(len(table_header)):
if_exist = False
if(table_header[n] in first_line_cells):
if_exist = True
if not if_exist:
header_not_exist_in_table.append(n)
data_rows = []
for line in lines[1:]:
line_cells = line.split('\t')
if header_not_exist_in_table:
for num in header_not_exist_in_table:
line_cells.insert(num," ")
row_data = [cell.strip() for cell in line.split('\t')]
data_rows.append(row_data)
total_rows = len(data_rows)
rows = len(table_data)

ppt = Presentation(output_ppt_file)
if(table_max_rows_per_slide is None or total_rows <= table_max_rows_per_slide):
total_slides_needed = 1
rows_per_page = total_rows
start_slide_index = slide_n
else:
total_slides_needed = (total_rows + table_max_rows_per_slide -1) // table_max_rows_per_slide
rows_per_page = table_max_rows_per_slide
start_slide_index = None
# how many slides, and start slide index
if not table_max_rows_per_slide:
table_max_rows_per_slide = rows
total_slides_needed = math.ceil(rows / table_max_rows_per_slide)
start_slide_index = None if total_slides_needed > 1 else slide_n

ppt = Presentation(output_ppt_file)
for page_num in range(total_slides_needed):
start_idx = page_num * rows_per_page
end_idx = min(start_idx + rows_per_page, total_rows)
current_page_data = data_rows[start_idx:end_idx]
start_idx = page_num * table_max_rows_per_slide
end_idx = min(start_idx + table_max_rows_per_slide, rows)
data_rows = table_data.values.tolist()
current_page_data = data_rows[start_idx:end_idx] # use df
current_page_rows = len(current_page_data)
if(start_slide_index is not None and page_num == 0):
slide = ppt.slides[slide_n - 1]
Expand All @@ -796,18 +787,18 @@ def insert_table_to_ppt(table_data_file,slide_n,table_name,left_h,top_h,width_h,
textbox = slide.shapes.add_textbox(Inches(left_h),Inches(top_h),Inches(width_h),Inches(0.25))
tf = textbox.text_frame
if(if_print_rowNo == True):
if(table_max_rows_per_slide is not None):
tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")"
if(total_slides_needed > 1):
tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ", Page " + str(page_num+1) + "/" + str(total_slides_needed) + ")"
else:
tf.paragraphs[0].text = table_name +" (N=" + str(total_rows) + ")"
tf.paragraphs[0].text = table_name +" (N=" + str(rows) + ")"
else:
tf.paragraphs[0].text = table_name
tf.paragraphs[0].font.size = Pt(8)
tf.paragraphs[0].font.bold = True
tf.paragraphs[0].alignment = PP_ALIGN.CENTER

ppt.save(output_ppt_file)
return total_rows
return rows


def update_ppt_variant_summary_table(data_nrows,DNA_sampleID,RNA_sampleID,TMB_DRUP_nr,TMB_DRUP_str,DNA_variant_summary_file,RNA_variant_summary_file,output_file_preMTB_AppendixTable,output_table_file_filterResults_AllReporVariants_CodingRegion,output_ppt_file):
Expand Down
13 changes: 13 additions & 0 deletions pronto/pronto.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import glob
import logging
import os
import pandas

# get tumor mutational burden label
def get_tmb_string(val):
Expand All @@ -27,3 +28,15 @@ def glob_tsoppi_file(is_error, root, run_id, *path_units):
else:
logging.error("unsuccessful glob strings for {}:\n{}\n{}".format(run_id, glob_string_ous, glob_string_hus))
raise ValueError

def normalize_column_index(df: pandas.DataFrame, exp_col_idx: list):
# determine current, missing and additional column indices
curr_col_idx = df.columns.tolist()
miss_col_idx = list(set(exp_col_idx) - set(curr_col_idx))
add_col_idx = list(set(curr_col_idx) - set(exp_col_idx))
# add missing column indices
for i in miss_col_idx:
df[i] = ' '
# combine expected with additional to get all present column indices and rearrange columns accordingly, additional columns are moved to the right
all_col_idx = exp_col_idx + add_col_idx
return df[all_col_idx]
83 changes: 83 additions & 0 deletions pronto/tests/pronto_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas
import pytest
import pronto.pronto

Expand Down Expand Up @@ -99,3 +100,85 @@ def test_get_tmb_string(input, exception, want):
def test_glob_tsoppi_file(inputs, exception, want):
with exception:
assert pronto.pronto.glob_tsoppi_file(*inputs) == want

@pytest.mark.parametrize(
"inputs, exception, want",
[
(
(
pandas.DataFrame({
"one": [1, 2],
"two": [3, 4],
"three": [5, 6],
"four": [7, 8],
}),
["one", "two", "three", "four"],
),
does_not_raise(),
pandas.DataFrame({
"one": [1, 2],
"two": [3, 4],
"three": [5, 6],
"four": [7, 8],
}),
),
(
(
pandas.DataFrame({
"one": [1, 2],
"two": [3, 4],
"four": [7, 8],
}),
["one", "two", "three", "four"],
),
does_not_raise(),
pandas.DataFrame({
"one": [1, 2],
"two": [3, 4],
"three": [' ', ' '],
"four": [7, 8],
}),
),
(
(
pandas.DataFrame({
"one": [1, 2],
"two": [3, 4],
"three": [5, 6],
"four": [7, 8],
}),
["two", "three", "four"],
),
does_not_raise(),
pandas.DataFrame({
"two": [3, 4],
"three": [5, 6],
"four": [7, 8],
"one": [1, 2],
}),
),
(
(
pandas.DataFrame({
"one": [1, 2],
"two": [3, 4],
"four": [7, 8],
"five": [9, 10],
}),
["one", "two", "three", "four"],
),
does_not_raise(),
pandas.DataFrame({
"one": [1, 2],
"two": [3, 4],
"three": [' ', ' '],
"four": [7, 8],
"five": [9, 10],
}),
),
]
)
def test_normalize_column_index(inputs, exception, want):
with exception:
get = pronto.pronto.normalize_column_index(*inputs)
assert want.equals(get)
4 changes: 2 additions & 2 deletions requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
pytest==7.4.3
pytest-emoji==0.2.0
pytest-md==0.2.0
pytest-emoji==0.2.0
pytest==7.4.3
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
click==8.1.7
pandas==2.2.3
pdf2image==1.17.0
python-docx==1.1.0
python-pptx==0.6.23
xlrd==2.0.1
xlutils==2.0.0
pdf2image==1.17.0