import collections
import textwrap

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('retina')  # Set the plot format to SVG for better quality plots

%matplotlib inline


df = pd.read_csv('responses.csv')


(
    timestamp,
    experience_level_user,
    experience_level,
    use_case_user,
    use_case,
    use_case_extra,
    scientific_field,
    code_type_user,
    code_type,
    code_type_extra,
    target_language,
    codegen_tool_user,
    codegen_tool,
    codegen_tool_extra,
    lambdify_backend,
    improve_user,
    improve,
    improve_extra,
    *_,
) = df.columns

number_response = len(df)


EXPERIENCE_CATEGORY_MAPPING = {
    0: "No Response",
    1: "Beginner User",
    2: "Intermediate User",
    3: "Advanced User",
    4: "Major Contributor",
}


number_no_response = df[experience_level].isna().sum()

experience_category_no_response = pd.Series({"0": number_no_response})
experience_category_response = df[experience_level].dropna().astype(int).value_counts(sort=False).sort_index()
experience_category = pd.concat([experience_category_no_response, experience_category_response])

experience_category_proportion = (experience_category / number_response) * 100
experience_category_proportion = [f"{proportion:.1f}%" for proportion in experience_category_proportion]

ax = sns.countplot(x=df[experience_level].fillna(0).astype(int).astype("category"))
ax.bar_label(ax.containers[0], experience_category_proportion, label_type='center')
ax.set_xticklabels(list(EXPERIENCE_CATEGORY_MAPPING.values()))
ax.set_xlabel("SymPy Experience Level")
ax.set_ylabel("Number of Respondants")
_ = ax.set_xticklabels(textwrap.fill(x.get_text(), 10) for x in ax.get_xticklabels())


USE_CASE_MAPPING = {
    0: "No Response",
    1: "Debugging Symbolic Code",
    2: "In Notebooks/Scripts",
    3: "In Scientific Research",
    4: "In Production Code",
    5: "In Library Code",
}


use_case_count = collections.Counter()
for response in df[use_case].fillna(0).astype("str"):
    use_case_count.update(response.split(","))
    
use_case_df = pd.Series(use_case_count, name="use case").sort_index().to_frame()
use_case_proportion = [f"{((user / number_response) * 100):.1f}%" for user in use_case_df["use case"]]

ax = sns.barplot(x=use_case_df.index, y=use_case_df["use case"])
ax.bar_label(ax.containers[0], use_case_proportion, label_type='center')
ax.set_xticklabels(list(USE_CASE_MAPPING.values()))
ax.set_xlabel("Codegen Use Cases")
ax.set_ylabel("Number of Respondants")
_ = ax.set_xticklabels(textwrap.fill(x.get_text(), 10) for x in ax.get_xticklabels())


scientific_field_count = collections.Counter()
for response in df[scientific_field].fillna("No Response").astype("str"):
    scientific_field_count.update(response.split(","))
    
scientific_field_df = pd.Series(scientific_field_count, name="scientific field").to_frame()
scientific_field_proportion = [f"{((user / number_response) * 100):.1f}%" 
                               for user in scientific_field_df["scientific field"]]

ax = sns.barplot(x=scientific_field_df.index, y=scientific_field_df["scientific field"])
ax.bar_label(ax.containers[0], scientific_field_proportion, label_type='center')
ax.set_xticklabels(list(scientific_field_count.keys()))
ax.set_xlabel("Scientific Research Field")
ax.set_ylabel("Number of Respondants")
_ = ax.set_xticklabels(textwrap.fill(x.get_text(), 8) for x in ax.get_xticklabels())


CODE_TYPE_MAPPING = {
    0: "No Response",
    1: "Call from Python",
    2: "Call from Another Language",
    3: "Copy-Paste into Non-Python Code",
}


code_type_count = collections.Counter()
for response in df[code_type].fillna(0).astype("str"):
    code_type_count.update(response.split(","))
    
code_type_df = pd.Series(code_type_count, name="code type").sort_index().to_frame()
code_type_proportion = [f"{((user / number_response) * 100):.1f}%" for user in code_type_df["code type"]]

ax = sns.barplot(x=code_type_df.index, y=code_type_df["code type"])
ax.bar_label(ax.containers[0], code_type_proportion, label_type='center')
ax.set_xticklabels(list(CODE_TYPE_MAPPING.values()))
ax.set_xlabel("Codegen Code Types")
ax.set_ylabel("Number of Respondants")
_ = ax.set_xticklabels(textwrap.fill(x.get_text(), 10) for x in ax.get_xticklabels())


target_language_count = collections.Counter()
for response in df[target_language].fillna("No Response").astype("str"):
    target_language_count.update(response.split(","))
    
target_language_df = pd.Series(target_language_count, name="target language").to_frame()
target_language_proportion = [f"{((users / number_response) * 100):.1f}%" 
                              for users in target_language_df["target language"]]

ax = sns.barplot(x=target_language_df.index, y=target_language_df["target language"])
ax.bar_label(ax.containers[0], target_language_proportion, label_type='center')
ax.set_xticklabels(list(target_language_count.keys()))
ax.set_xlabel("Target Languages")
ax.set_ylabel("Number of Respondants")
_ = ax.set_xticklabels(textwrap.fill(x.get_text(), 10) for x in ax.get_xticklabels())


CODEGEN_TOOL_MAPPING = {
    0: "No Response",
    1: "Lambdify",
    2: "Autowrap",
    3: "Ufuncify",
    4: "Printers",
    5: "Subs/Evalf",
}


codegen_tool_count = collections.Counter()
for response in df[codegen_tool].fillna(0).astype("str"):
    codegen_tool_count.update(response.split(","))
    
codegen_tool_df = pd.Series(codegen_tool_count, name="codegen tool").sort_index().to_frame()
codegen_tool_proportion = [f"{((user / number_response) * 100):.1f}%" for user in codegen_tool_df["codegen tool"]]

ax = sns.barplot(x=codegen_tool_df.index, y=codegen_tool_df["codegen tool"])
ax.bar_label(ax.containers[0], codegen_tool_proportion, label_type='center')
ax.set_xticklabels(list(CODEGEN_TOOL_MAPPING.values()))
ax.set_xlabel("Codegen Tools")
ax.set_ylabel("Number of Respondants")
_ = ax.set_xticklabels(textwrap.fill(x.get_text(), 10) for x in ax.get_xticklabels())


lambdify_backend_count = collections.Counter()
for response in df[lambdify_backend].fillna("No Response").astype("str"):
    lambdify_backend_count.update(response.split(","))
    
lambdify_backend_df = pd.Series(lambdify_backend_count, name="lambdify backend").to_frame()
lambdify_backend_proportion = [f"{((user / number_response) * 100):.1f}%" 
                               for user in lambdify_backend_df["lambdify backend"]]

ax = sns.barplot(x=lambdify_backend_df.index, y=lambdify_backend_df["lambdify backend"])
ax.bar_label(ax.containers[0], lambdify_backend_proportion, label_type='center')
ax.set_xticklabels(list(lambdify_backend_count.keys()))
ax.set_xlabel("Lambdify Backends")
ax.set_ylabel("Number of Respondants")
_ = ax.set_xticklabels(textwrap.fill(x.get_text(), 10) for x in ax.get_xticklabels())


IMPROVE_MAPPING = {
    0: "No Response",
    1: "More Languages",
    2: "Documentation",
    3: "More Math Functions",
    4: "Improve Derivatives",
    5: "Execution Speed",
    6: "Numerical Stability",
}


improve_count = collections.Counter()
for response in df[improve].fillna(0).astype("str"):
    improve_count.update(response.split(","))
    
improve_df = pd.Series(improve_count, name="improve").sort_index().to_frame()
improve_proportion = [f"{((user / number_response) * 100):.1f}%" for user in improve_df["improve"]]

ax = sns.barplot(x=improve_df.index, y=improve_df["improve"])
ax.bar_label(ax.containers[0], improve_proportion, label_type='center')
ax.set_xticklabels(list(IMPROVE_MAPPING.values()))
ax.set_xlabel("Improvement Area")
ax.set_ylabel("Number of Respondants")
_ = ax.set_xticklabels(textwrap.fill(x.get_text(), 8) for x in ax.get_xticklabels())

SymPy Code Generation Survey¶

Background¶

Methodology¶

Summary¶

Analysis¶

Question 1: What is your experience level with SymPy?¶

Question 2: What do you use SymPy's code generation for?¶

Question 3: What type of code do you generate using SymPy?¶

Question 4: Which SymPy code generation tools do you use?¶

Question 5: What do we need to improve about SymPy's code generation?¶

Question 6: Are there any other comments you'd like to make?¶