File size: 2,882 Bytes
77f290b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import plotly.express as px
import numpy as np

paper_dump = pd.read_csv('data/dump.csv', sep="\t")
# Calculate total number of URLs per year and venue
custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"]
total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles')

# Calculate the number of URLs with errors per year and venue
total_url_per_venue = paper_dump[paper_dump["url"] != ""].groupby(['year', 'venue']).size().reset_index(name='total_urls')

# Merge the DataFrames to calculate the error rate
merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left')
merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles']

# Plot the error rates using Plotly, with year on x-axis and color by venue
fig = px.bar(
    merged_df,
    x='year',
    y='total_titles',
    color='venue',
    barmode='group',
    title=f'Number of papers per venue',
    labels={'error_rate': 'Success Rate', 'year': 'Year'},
    category_orders={'venue': custom_order}
)

fig.update_xaxes(range=[2018, 2024])
fig.show()

import plotly.express as px
import numpy as np

# Calculate total number of URLs per year and venue
total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles')

# Calculate the number of URLs with errors per year and venue
total_url_per_venue = paper_dump[paper_dump["url"] != ""].groupby(['year', 'venue']).size().reset_index(name='total_urls')

# Merge the DataFrames to calculate the error rate
merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left')
merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles']

# Plot the error rates using Plotly, with year on x-axis and color by venue
fig = px.bar(
    merged_df,
    x='year',
    y='total_titles',
    color='venue',
    barmode='group',
    title=f'Number of papers per venue',
    labels={'error_rate': 'Success Rate', 'year': 'Year'},
    category_orders={'venue': custom_order}
)

fig.update_xaxes(range=[2018, 2024])
fig.show()

# Plot the error rates using Plotly, with year on x-axis and color by venue
fig = px.bar(
    merged_df,
    x='year',
    y='total_urls',
    color='venue',
    barmode='group',
    title=f'Number of papers per venue',
    labels={'error_rate': 'Success Rate', 'year': 'Year'},
    category_orders={'venue': custom_order}
)

fig.update_xaxes(range=[2018, 2024])
fig.show()


# Plot the error rates using Plotly, with year on x-axis and color by venue
fig = px.bar(
    merged_df,
    x='year',
    y='repo_rate',
    color='venue',
    barmode='group',
    title=f'Number of repositories per venue',
    labels={'error_rate': 'Success Rate', 'year': 'Year'},
    category_orders={'venue': custom_order}
)
fig.update_xaxes(range=[2018, 2024])
fig.update_yaxes(range=[0, 1])

fig.show()